Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ mast
and column descriptions in the column metadata. [#3588]
- Added ``pass_id`` as an alias for the ``pass`` column in query functions for the Roman mission to avoid conflicts with
the reserved Python keyword. [#3588]
- Adding in ability to read FITS and ASDF data products to memory from STScI's S3 open data bucket using ``Observations.read_product()`` function. [#3561]


jplspec
Expand Down
78 changes: 78 additions & 0 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import time
import os
from urllib.parse import quote
import importlib.util

import numpy as np
import astropy.units as u
from astropy.io import fits
import astropy.coordinates as coord
from requests import HTTPError
from astropy.table import Table, Row, vstack
Expand All @@ -36,6 +38,13 @@
except ImportError:
pass

try:
import asdf
import fsspec
except ImportError:
asdf = None
fsspec = None

__all__ = ['Observations', 'ObservationsClass', 'MastClass', 'Mast']

CLOUD_DISABLED_MESSAGE = (
Expand Down Expand Up @@ -1203,6 +1212,75 @@ def get_unique_product_list(self, observations, *, batch_size=500):
log.info("To return all products, use `Observations.get_product_list`")
return unique_products

# TODO: Need to inlcude way to parse if it is a MAST on prem URL and handle the streaming of that
def read_product(self, product_path, ignore_unrecognized=True, **kwargs):
"""
Read a product from Open S3 bucket to memory. Currently supports FITS and ASDF product types only.

Parameters
----------
product_path: str
URI to the product in the STScI S3 open data bucket.
ignore_unrecognized: bool
Tells asdf.open() to include or ignore warnings from unrecognized asdf tags. Defaults to True
**kwargs
Additional keyword arguments passed to the underlying file reader:
- For FITS files: forwarded to ``astropy.io.fits.open``.
Common options include ``memmap``, ``mode``, etc.
- Ignored for ASDF files (except for future extension if needed).

Returns
-------
object
FITS or ASDF object for the given data product.
"""
# Checks if a path is empty or None.
if not product_path or not str(product_path).strip():
raise ValueError("No product path provided")

# Forces the path to be lowercase for the extension checks. This is only used for the checks
path = str(product_path).lower()

# Checks users enviornment for fsspec, required for both fits and asdf
if fsspec is None:
raise ImportError('The "fsspec" package is required to read products directly from a URI. '
'Please install it with `pip install fsspec`.')

# Logic for reading FITS files
if path.endswith((".fits", ".fits.gz")):
try:
data_product = fits.open(product_path, fsspec_kwargs={"anon": True}, **kwargs)
log.info(f"Loaded: {product_path}")
return data_product
except Exception as e:
raise RuntimeError(f"Failed to open FITS File: {product_path} {e}")

# Logic for reading ASDF files
elif path.endswith(".asdf"):
# checks for asdf package and will raise and error if not installed as asdf is required
for pkg in ["asdf"]:
if importlib.util.find_spec(pkg) is None:
raise ImportError(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. '
f'Please install it with `pip install {pkg}`.')

# Checks for gwcs and warns the user if it is not installed, this will not stop the function.
for pkg in ["gwcs"]:
if importlib.util.find_spec(pkg) is None:
warnings.warn(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. '
f'Please install it with `pip install {pkg}`.')

# Attempts to open the asdf files
try:
f = fsspec.open(product_path, "rb", anon=True).open()
data_product = asdf.open(f, ignore_unrecognized_tag=ignore_unrecognized)
log.info(f"Loaded: {product_path}")
return data_product
except Exception as e:
raise RuntimeError(f"Failed to open ASDF File: {product_path} {e}")

else:
raise ValueError(f"Unsupported product type: {product_path}")


@async_to_sync
class MastClass(MastQueryWithLogin):
Expand Down
59 changes: 59 additions & 0 deletions astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,65 @@ def test_observations_disable_cloud_dataset(patch_boto3):
assert Observations._cloud_enabled_explicitly is False


@pytest.fixture
def mock_fits_open(mocker):
"""Mock fits.open to return a valid HDUList without network access."""
return mocker.patch("astropy.io.fits.open", return_value=fits.HDUList([fits.PrimaryHDU()]))


@pytest.fixture
def mock_asdf_open(mocker):
return mocker.patch(
"asdf.open",
return_value=MagicMock(name="AsdfFile"),
)


@pytest.fixture
def mock_fsspec_open(mocker):
fake = mocker.Mock()
fake.open.return_value = "mock_asdf_file_object"
return mocker.patch("fsspec.open", return_value=fake)


def test_observations_read_product_fits(mock_fits_open):
s3_fits_path = "s3://mock_fits_path.fits"
result = Observations.read_product(s3_fits_path)

mock_fits_open(s3_fits_path, fsspec_kwargs={"anon": True})
assert result is mock_fits_open.return_value


def test_observations_read_product_asdf(mock_asdf_open, mock_fsspec_open):
s3_asdf_path = "s3://fake_asdf_path.asdf"
result = Observations.read_product(s3_asdf_path)

mock_asdf_open("mock_asdf_file_object")
assert result is mock_asdf_open.return_value


@pytest.mark.parametrize(
"product_path, expected_exception, match",
[
("", ValueError, "No product path provided"),
(" ", ValueError, "No product path provided"),
(None, ValueError, "No product path provided"),
("unsupported_ex.txt", ValueError, "Unsupported product type"),
],
)
def test_observations_read_product_invalid_inputs(product_path, expected_exception, match):
with pytest.raises(expected_exception, match=match):
Observations.read_product(product_path)


def test_observations_read_product_fsspec_missing(monkeypatch):
# Forces fsspec to be None
monkeypatch.setitem(Observations.read_product.__globals__, "fsspec", None)

with pytest.raises(ImportError, match="fsspec"):
Observations.read_product("file.fits")


######################
# CatalogClass tests #
######################
Expand Down
24 changes: 24 additions & 0 deletions astroquery/mast/tests/test_mast_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
from ...exceptions import (InputWarning, InvalidQueryError, MaxResultsWarning,
NoResultsWarning)

try:
import asdf
except ImportError:
asdf = None


@pytest.fixture(scope="module")
def msa_product_table():
Expand Down Expand Up @@ -1048,6 +1053,25 @@ def test_observations_get_cloud_uris_no_duplicates(self, msa_product_table, rese
uris = Observations.get_cloud_uris(products)
assert len(uris) == 1

@pytest.mark.remote_data
@pytest.mark.parametrize(
"product_path, expected_type",
[
(
"s3://stpubdata/hst/public/u24r/u24r0102t/u24r0102t_c1f.fits",
fits.HDUList,
),
(
"s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/"
"r0003201001001001004_0001_wfi01_f106_cal.asdf",
asdf.AsdfFile,
),
],
)
def test_observations_read_product(self, product_path, expected_type):
product = Observations.read_product(product_path)
assert isinstance(product, expected_type)

######################
# CatalogClass tests #
######################
Expand Down
13 changes: 13 additions & 0 deletions docs/mast/mast_obsquery.rst
Original file line number Diff line number Diff line change
Expand Up @@ -629,3 +629,16 @@ remain fully cloud-based.
COMPLETE
COMPLETE
COMPLETE

Streaming Data Products from S3 to memory
-----------------------------------------

If instead of downloading you would like to load an S3 URI directly to memory, you can use the `~astroquery.mast.ObservationsClass.read_product` method.
This function supports FITS and ASDF data products and will automatically parse the file for the suffix and load it to memory using `~astropy.io.fits.open` or `~asdf.open`.
For ASDF data products, additional packages may be required (e.g `~lz4` and `~roman-datamodels` for data from the Roman Space Telescope or `~gwcs` for common ASDF schema tags).

.. doctest-remote-data::

>>> from astroquery.mast import Observations
>>> fits_product = Observations.read_product(product_path="s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits")
>>> asdf_product = Observations.read_product(product_path="s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/r0003201001001001004_0001_wfi01_f106_cal.asdf", ignore_unrecognized=True)
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,20 @@ Documentation = "https://astroquery.readthedocs.io"

[project.optional-dependencies]
test = [
"asdf",
"gwcs",
"roman-datamodels",
"pytest>=7.4",
"pytest-doctestplus>=1.4",
"pytest-timeout",
"pytest-astropy",
"lz4",
"matplotlib",
# Temp workaround for https://github.com/RKrahl/pytest-dependency/issues/91
"pytest-dependency; platform_system != 'Windows'",
"pytest-rerunfailures",
"fsspec[http]",
"moto[s3]",
"s3fs",
"fsspec[http,s3]",
"moto[s3]"
]
docs = [
"sphinx",
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ deps =
oldestdeps-alldeps: mocpy==0.12
oldestdeps-alldeps: regions==0.5
oldestdeps-alldeps: astropy-healpix==0.7
oldestdeps-alldeps: roman_datamodels==0.11
oldestdeps-alldeps: gwcs==0.18

online: pytest-custom_exit_code

Expand Down
Loading