diff --git a/CHANGES.rst b/CHANGES.rst index 2b250f4e59..f3b8b076b4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -149,6 +149,7 @@ mast and column descriptions in the column metadata. [#3588] - Added ``pass_id`` as an alias for the ``pass`` column in query functions for the Roman mission to avoid conflicts with the reserved Python keyword. [#3588] +- Adding in ability to read FITS and ASDF data products to memory from STScI's S3 open data bucket using ``Observations.read_product()`` function. [#3561] jplspec diff --git a/astroquery/mast/observations.py b/astroquery/mast/observations.py index 6dffd2d6d2..9d4bc4ec42 100644 --- a/astroquery/mast/observations.py +++ b/astroquery/mast/observations.py @@ -11,9 +11,11 @@ import time import os from urllib.parse import quote +import importlib.util import numpy as np import astropy.units as u +from astropy.io import fits import astropy.coordinates as coord from requests import HTTPError from astropy.table import Table, Row, vstack @@ -36,6 +38,13 @@ except ImportError: pass +try: + import asdf + import fsspec +except ImportError: + asdf = None + fsspec = None + __all__ = ['Observations', 'ObservationsClass', 'MastClass', 'Mast'] CLOUD_DISABLED_MESSAGE = ( @@ -1203,6 +1212,75 @@ def get_unique_product_list(self, observations, *, batch_size=500): log.info("To return all products, use `Observations.get_product_list`") return unique_products + # TODO: Need to inlcude way to parse if it is a MAST on prem URL and handle the streaming of that + def read_product(self, product_path, ignore_unrecognized=True, **kwargs): + """ + Read a product from Open S3 bucket to memory. Currently supports FITS and ASDF product types only. + + Parameters + ---------- + product_path: str + URI to the product in the STScI S3 open data bucket. + ignore_unrecognized: bool + Tells asdf.open() to include or ignore warnings from unrecognized asdf tags. Defaults to True + **kwargs + Additional keyword arguments passed to the underlying file reader: + - For FITS files: forwarded to ``astropy.io.fits.open``. + Common options include ``memmap``, ``mode``, etc. + - Ignored for ASDF files (except for future extension if needed). + + Returns + ------- + object + FITS or ASDF object for the given data product. + """ + # Checks if a path is empty or None. + if not product_path or not str(product_path).strip(): + raise ValueError("No product path provided") + + # Forces the path to be lowercase for the extension checks. This is only used for the checks + path = str(product_path).lower() + + # Checks users enviornment for fsspec, required for both fits and asdf + if fsspec is None: + raise ImportError('The "fsspec" package is required to read products directly from a URI. ' + 'Please install it with `pip install fsspec`.') + + # Logic for reading FITS files + if path.endswith((".fits", ".fits.gz")): + try: + data_product = fits.open(product_path, fsspec_kwargs={"anon": True}, **kwargs) + log.info(f"Loaded: {product_path}") + return data_product + except Exception as e: + raise RuntimeError(f"Failed to open FITS File: {product_path} {e}") + + # Logic for reading ASDF files + elif path.endswith(".asdf"): + # checks for asdf package and will raise and error if not installed as asdf is required + for pkg in ["asdf"]: + if importlib.util.find_spec(pkg) is None: + raise ImportError(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. ' + f'Please install it with `pip install {pkg}`.') + + # Checks for gwcs and warns the user if it is not installed, this will not stop the function. + for pkg in ["gwcs"]: + if importlib.util.find_spec(pkg) is None: + warnings.warn(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. ' + f'Please install it with `pip install {pkg}`.') + + # Attempts to open the asdf files + try: + f = fsspec.open(product_path, "rb", anon=True).open() + data_product = asdf.open(f, ignore_unrecognized_tag=ignore_unrecognized) + log.info(f"Loaded: {product_path}") + return data_product + except Exception as e: + raise RuntimeError(f"Failed to open ASDF File: {product_path} {e}") + + else: + raise ValueError(f"Unsupported product type: {product_path}") + @async_to_sync class MastClass(MastQueryWithLogin): diff --git a/astroquery/mast/tests/test_mast.py b/astroquery/mast/tests/test_mast.py index 5eb868da2b..85e5c88b19 100644 --- a/astroquery/mast/tests/test_mast.py +++ b/astroquery/mast/tests/test_mast.py @@ -1372,6 +1372,65 @@ def test_observations_disable_cloud_dataset(patch_boto3): assert Observations._cloud_enabled_explicitly is False +@pytest.fixture +def mock_fits_open(mocker): + """Mock fits.open to return a valid HDUList without network access.""" + return mocker.patch("astropy.io.fits.open", return_value=fits.HDUList([fits.PrimaryHDU()])) + + +@pytest.fixture +def mock_asdf_open(mocker): + return mocker.patch( + "asdf.open", + return_value=MagicMock(name="AsdfFile"), + ) + + +@pytest.fixture +def mock_fsspec_open(mocker): + fake = mocker.Mock() + fake.open.return_value = "mock_asdf_file_object" + return mocker.patch("fsspec.open", return_value=fake) + + +def test_observations_read_product_fits(mock_fits_open): + s3_fits_path = "s3://mock_fits_path.fits" + result = Observations.read_product(s3_fits_path) + + mock_fits_open(s3_fits_path, fsspec_kwargs={"anon": True}) + assert result is mock_fits_open.return_value + + +def test_observations_read_product_asdf(mock_asdf_open, mock_fsspec_open): + s3_asdf_path = "s3://fake_asdf_path.asdf" + result = Observations.read_product(s3_asdf_path) + + mock_asdf_open("mock_asdf_file_object") + assert result is mock_asdf_open.return_value + + +@pytest.mark.parametrize( + "product_path, expected_exception, match", + [ + ("", ValueError, "No product path provided"), + (" ", ValueError, "No product path provided"), + (None, ValueError, "No product path provided"), + ("unsupported_ex.txt", ValueError, "Unsupported product type"), + ], +) +def test_observations_read_product_invalid_inputs(product_path, expected_exception, match): + with pytest.raises(expected_exception, match=match): + Observations.read_product(product_path) + + +def test_observations_read_product_fsspec_missing(monkeypatch): + # Forces fsspec to be None + monkeypatch.setitem(Observations.read_product.__globals__, "fsspec", None) + + with pytest.raises(ImportError, match="fsspec"): + Observations.read_product("file.fits") + + ###################### # CatalogClass tests # ###################### diff --git a/astroquery/mast/tests/test_mast_remote.py b/astroquery/mast/tests/test_mast_remote.py index b6f55a05e2..63c23c9b4a 100644 --- a/astroquery/mast/tests/test_mast_remote.py +++ b/astroquery/mast/tests/test_mast_remote.py @@ -20,6 +20,11 @@ from ...exceptions import (InputWarning, InvalidQueryError, MaxResultsWarning, NoResultsWarning) +try: + import asdf +except ImportError: + asdf = None + @pytest.fixture(scope="module") def msa_product_table(): @@ -1048,6 +1053,25 @@ def test_observations_get_cloud_uris_no_duplicates(self, msa_product_table, rese uris = Observations.get_cloud_uris(products) assert len(uris) == 1 + @pytest.mark.remote_data + @pytest.mark.parametrize( + "product_path, expected_type", + [ + ( + "s3://stpubdata/hst/public/u24r/u24r0102t/u24r0102t_c1f.fits", + fits.HDUList, + ), + ( + "s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/" + "r0003201001001001004_0001_wfi01_f106_cal.asdf", + asdf.AsdfFile, + ), + ], + ) + def test_observations_read_product(self, product_path, expected_type): + product = Observations.read_product(product_path) + assert isinstance(product, expected_type) + ###################### # CatalogClass tests # ###################### diff --git a/docs/mast/mast_obsquery.rst b/docs/mast/mast_obsquery.rst index 6ef2a65d47..36ecd372a7 100644 --- a/docs/mast/mast_obsquery.rst +++ b/docs/mast/mast_obsquery.rst @@ -629,3 +629,16 @@ remain fully cloud-based. COMPLETE COMPLETE COMPLETE + +Streaming Data Products from S3 to memory +----------------------------------------- + +If instead of downloading you would like to load an S3 URI directly to memory, you can use the `~astroquery.mast.ObservationsClass.read_product` method. +This function supports FITS and ASDF data products and will automatically parse the file for the suffix and load it to memory using `~astropy.io.fits.open` or `~asdf.open`. +For ASDF data products, additional packages may be required (e.g `~lz4` and `~roman-datamodels` for data from the Roman Space Telescope or `~gwcs` for common ASDF schema tags). + +.. doctest-remote-data:: + + >>> from astroquery.mast import Observations + >>> fits_product = Observations.read_product(product_path="s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits") + >>> asdf_product = Observations.read_product(product_path="s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/r0003201001001001004_0001_wfi01_f106_cal.asdf", ignore_unrecognized=True) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index bbe6c2c6cd..bf3a024491 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,17 +34,20 @@ Documentation = "https://astroquery.readthedocs.io" [project.optional-dependencies] test = [ + "asdf", + "gwcs", + "roman-datamodels", "pytest>=7.4", "pytest-doctestplus>=1.4", "pytest-timeout", "pytest-astropy", + "lz4", "matplotlib", # Temp workaround for https://github.com/RKrahl/pytest-dependency/issues/91 "pytest-dependency; platform_system != 'Windows'", "pytest-rerunfailures", - "fsspec[http]", - "moto[s3]", - "s3fs", + "fsspec[http,s3]", + "moto[s3]" ] docs = [ "sphinx", diff --git a/tox.ini b/tox.ini index ca7173cf23..fe742e1d1e 100644 --- a/tox.ini +++ b/tox.ini @@ -51,6 +51,8 @@ deps = oldestdeps-alldeps: mocpy==0.12 oldestdeps-alldeps: regions==0.5 oldestdeps-alldeps: astropy-healpix==0.7 + oldestdeps-alldeps: roman_datamodels==0.11 + oldestdeps-alldeps: gwcs==0.18 online: pytest-custom_exit_code