From b7958cd6b159089c888cb301788ca0035954914a Mon Sep 17 00:00:00 2001 From: Elihei2 Date: Mon, 1 Jun 2026 13:47:48 +0200 Subject: [PATCH] feat(io): optional z-coordinate passthrough (include_z, default False) Add an include_z preprocessor parameter (threaded through get_preprocessor); when enabled the transcript z coordinate is carried into the standardized output. Defaults to False so base behaviour is unchanged. Independent of the QV change. --- src/segger/io/fields.py | 2 ++ src/segger/io/preprocessor.py | 21 +++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/segger/io/fields.py b/src/segger/io/fields.py index 8dc30a9..fec203e 100644 --- a/src/segger/io/fields.py +++ b/src/segger/io/fields.py @@ -8,6 +8,7 @@ class XeniumTranscriptFields: filename: str = 'transcripts.parquet' x: str = 'x_location' y: str = 'y_location' + z: str = 'z_location' feature: str = 'feature_name' cell_id: str = 'cell_id' null_cell_id: str = 'UNASSIGNED' @@ -107,6 +108,7 @@ class StandardTranscriptFields: row_index: str = 'row_index' x: str = 'x' y: str = 'y' + z: str = 'z' feature: str = 'feature_name' cell_id: str = 'cell_id' compartment: str = 'cell_compartment' diff --git a/src/segger/io/preprocessor.py b/src/segger/io/preprocessor.py index 9bbe62d..9e60d7e 100644 --- a/src/segger/io/preprocessor.py +++ b/src/segger/io/preprocessor.py @@ -63,7 +63,7 @@ class ISTPreprocessor(ABC): transcript and boundary GeoDataFrames for the given platform. """ - def __init__(self, data_dir: Path): + def __init__(self, data_dir: Path, include_z: bool = False): """ Parameters ---------- @@ -73,6 +73,7 @@ def __init__(self, data_dir: Path): data_dir = Path(data_dir) type(self)._validate_directory(data_dir) self.data_dir = data_dir + self.include_z = include_z @staticmethod @abstractmethod @@ -287,11 +288,10 @@ def transcripts(self) -> pl.DataFrame: .alias(std.cell_id) ) # Map to standard field names - .rename({raw.x: std.x, raw.y: std.y, raw.feature: std.feature}) + .rename(rename_map) # Subset to necessary fields - .select([std.row_index, std.x, std.y, std.feature, std.cell_id, - std.compartment]) + .select(select_cols) # Add numeric index .with_row_index() @@ -405,6 +405,14 @@ def transcripts(self) -> pl.DataFrame: raw = self.tx_fields std = StandardTranscriptFields() + rename_map = {raw.x: std.x, raw.y: std.y, raw.feature: std.feature} + select_cols = [ + std.row_index, std.x, std.y, std.feature, std.cell_id, std.compartment, + ] + if self.include_z: + rename_map[raw.z] = std.z + select_cols.append(std.z) + return ( # Read in lazily pl.scan_parquet( @@ -564,7 +572,8 @@ def _infer_platform(data_dir: Path) -> str: def get_preprocessor( data_dir: Path, - platform: str | None = None + platform: str | None = None, + include_z: bool = False, ) -> ISTPreprocessor: data_dir = Path(data_dir) if platform is None: @@ -575,4 +584,4 @@ def get_preprocessor( f"Available: {list(PREPROCESSORS)}" ) cls = PREPROCESSORS[platform.lower()] - return cls(data_dir) + return cls(data_dir, include_z=include_z)