Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3908.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reuse a constant `ArraySpec` during indexing when possible.
43 changes: 41 additions & 2 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5370,6 +5370,37 @@ def _get_chunk_spec(
)


def _get_default_chunk_spec(
metadata: ArrayMetadata,
chunk_grid: ChunkGrid,
array_config: ArrayConfig,
prototype: BufferPrototype,
) -> ArraySpec | None:
Comment thread
d-v-b marked this conversation as resolved.
Outdated
"""Build an ArraySpec for the regular (non-edge) chunk shape, or None if not regular.

For regular grids, all chunks have the same codec_shape, so we can
build the ArraySpec once and reuse it for every chunk — avoiding the
per-chunk ChunkGrid.__getitem__ + ArraySpec construction overhead.

.. note::
Ideally the per-chunk ArraySpec would not exist at all: dtype,
fill_value, config, and prototype are constant across chunks —
only the shape varies (and only for edge chunks). A cleaner
design would pass a single ArraySpec plus a per-chunk shape
override, which ChunkTransform.decode_chunk already supports
via its ``chunk_shape`` parameter.
"""
if chunk_grid.is_regular:
return ArraySpec(
shape=chunk_grid.chunk_shape,
dtype=metadata.dtype,
fill_value=metadata.fill_value,
config=array_config,
prototype=prototype,
)
return None


async def _get_selection(
store_path: StorePath,
metadata: ArrayMetadata,
Expand Down Expand Up @@ -5449,11 +5480,16 @@ async def _get_selection(

# reading chunks and decoding them
indexed_chunks = list(indexer)
# Pre-compute the default chunk spec for regular grids to avoid
# per-chunk ChunkGrid lookups and ArraySpec construction.
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
results = await codec_pipeline.read(
[
(
store_path / metadata.encode_chunk_key(chunk_coords),
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
default_spec
if default_spec is not None
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
chunk_selection,
out_selection,
is_complete_chunk,
Expand Down Expand Up @@ -5792,11 +5828,14 @@ async def _set_selection(
_config = replace(_config, order=order)

# merging with existing data and encoding chunks
default_spec = _get_default_chunk_spec(metadata, chunk_grid, _config, prototype)
await codec_pipeline.write(
[
(
store_path / metadata.encode_chunk_key(chunk_coords),
_get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
default_spec
if default_spec is not None
else _get_chunk_spec(metadata, chunk_grid, chunk_coords, _config, prototype),
chunk_selection,
out_selection,
is_complete_chunk,
Expand Down
Loading