From b7958cd6b159089c888cb301788ca0035954914a Mon Sep 17 00:00:00 2001
From: Elihei2 <eheidari@student.ethz.ch>
Date: Mon, 1 Jun 2026 13:47:48 +0200
Subject: [PATCH] feat(io): optional z-coordinate passthrough (include_z,
 default False)

Add an include_z preprocessor parameter (threaded through get_preprocessor);
when enabled the transcript z coordinate is carried into the standardized
output. Defaults to False so base behaviour is unchanged. Independent of the
QV change.
---
 src/segger/io/fields.py       |  2 ++
 src/segger/io/preprocessor.py | 21 +++++++++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/segger/io/fields.py b/src/segger/io/fields.py
index 8dc30a9..fec203e 100644
--- a/src/segger/io/fields.py
+++ b/src/segger/io/fields.py
@@ -8,6 +8,7 @@ class XeniumTranscriptFields:
     filename: str = 'transcripts.parquet'
     x: str = 'x_location'
     y: str = 'y_location'
+    z: str = 'z_location'
     feature: str = 'feature_name'
     cell_id: str = 'cell_id'
     null_cell_id: str = 'UNASSIGNED'
@@ -107,6 +108,7 @@ class StandardTranscriptFields:
     row_index: str = 'row_index'
     x: str = 'x'
     y: str = 'y'
+    z: str = 'z'
     feature: str = 'feature_name'
     cell_id: str = 'cell_id'
     compartment: str = 'cell_compartment'
diff --git a/src/segger/io/preprocessor.py b/src/segger/io/preprocessor.py
index 9bbe62d..9e60d7e 100644
--- a/src/segger/io/preprocessor.py
+++ b/src/segger/io/preprocessor.py
@@ -63,7 +63,7 @@ class ISTPreprocessor(ABC):
     transcript and boundary GeoDataFrames for the given platform.
     """
 
-    def __init__(self, data_dir: Path):
+    def __init__(self, data_dir: Path, include_z: bool = False):
         """
         Parameters
         ----------
@@ -73,6 +73,7 @@ def __init__(self, data_dir: Path):
         data_dir = Path(data_dir)
         type(self)._validate_directory(data_dir)
         self.data_dir = data_dir
+        self.include_z = include_z
 
     @staticmethod
     @abstractmethod
@@ -287,11 +288,10 @@ def transcripts(self) -> pl.DataFrame:
                 .alias(std.cell_id)
             )
             # Map to standard field names
-            .rename({raw.x: std.x, raw.y: std.y, raw.feature: std.feature})
+            .rename(rename_map)
             
             # Subset to necessary fields 
-            .select([std.row_index, std.x, std.y, std.feature, std.cell_id, 
-                     std.compartment])
+            .select(select_cols)
 
             # Add numeric index
             .with_row_index()
@@ -405,6 +405,14 @@ def transcripts(self) -> pl.DataFrame:
         raw = self.tx_fields
         std = StandardTranscriptFields()
 
+        rename_map = {raw.x: std.x, raw.y: std.y, raw.feature: std.feature}
+        select_cols = [
+            std.row_index, std.x, std.y, std.feature, std.cell_id, std.compartment,
+        ]
+        if self.include_z:
+            rename_map[raw.z] = std.z
+            select_cols.append(std.z)
+
         return (
             # Read in lazily
             pl.scan_parquet(
@@ -564,7 +572,8 @@ def _infer_platform(data_dir: Path) -> str:
 
 def get_preprocessor(
     data_dir: Path,
-    platform: str | None = None
+    platform: str | None = None,
+    include_z: bool = False,
 ) -> ISTPreprocessor:
     data_dir = Path(data_dir)
     if platform is None:
@@ -575,4 +584,4 @@ def get_preprocessor(
             f"Available: {list(PREPROCESSORS)}"
         )
     cls = PREPROCESSORS[platform.lower()]
-    return cls(data_dir)
+    return cls(data_dir, include_z=include_z)