From c78667952071de052eb4a0fb410ab1b36b510f1e Mon Sep 17 00:00:00 2001
From: Bear Carlson <bearc@sdfiana008.sdf.slac.stanford.edu>
Date: Fri, 31 Jan 2025 10:13:20 -0800
Subject: [PATCH 1/7] Add flash match performance script

---
 spine/ana/script/__init__.py           |   1 +
 spine/ana/script/fmatch_performance.py | 208 +++++++++++++++++++++++++
 spine/ana/script/save.py               |   4 +-
 3 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 spine/ana/script/fmatch_performance.py

diff --git a/spine/ana/script/__init__.py b/spine/ana/script/__init__.py
index e6e218909..9cffc8486 100644
--- a/spine/ana/script/__init__.py
+++ b/spine/ana/script/__init__.py
@@ -4,3 +4,4 @@
 from .event import *
 from .metrics import *
 from .colinear_tracks import *
+from .fmatch_performance import *
diff --git a/spine/ana/script/fmatch_performance.py b/spine/ana/script/fmatch_performance.py
new file mode 100644
index 000000000..aa8520c0a
--- /dev/null
+++ b/spine/ana/script/fmatch_performance.py
@@ -0,0 +1,208 @@
+"""Analysis module template.
+
+Evaluate performance of the fmatch algorithm for SBND.
+"""
+
+# Add the imports specific to this module here
+import numpy as np
+
+# Must import the analysis script base class
+from spine.ana.base import AnaBase
+
+# Must list the analysis script(s) here to be found by the factory.
+# You must also add it to the list of imported modules in the
+# `spine.ana.factories`!
+__all__ = ['FMatchPerformance']
+
+
+class FMatchPerformance(AnaBase):
+    """
+    Evaluate performance of the fmatch algorithm for SBND.
+    """
+
+    # Name of the analysis script (as specified in the configuration)
+    name = 'fmatch_performance'
+
+    def __init__(self, log_name, flash_tmin=-0.4, flash_tmax=1.5, flash_tolerance=0.5, **kwargs):
+        """Initialize the analysis script.
+
+        Parameters
+        ----------
+        log_name : str
+            name of the analysis script
+        flash_tmin : float, optional
+            Minimum flash time to be within beam window, by default -0.4 (us)
+        flash_tmax : float, optional
+            Maximum flash time to be within beam window, by default 1.5 (us)
+        flash_tolerance : float, optional
+            Tolerance for flash time, by default 0.5 (us)
+        """
+        # Initialize the parent class
+        super().__init__(**kwargs)
+
+        # Initialize the CSV writer(s) you want
+        self.log_name = log_name
+        self.initialize_writer(f'{log_name}_fmatch_performance')
+
+        # Add additional required data products
+        self.update_keys({'interaction_matches_t2r': True}) 
+        self.update_keys({'interaction_matches_t2r_overlap': True}) 
+        
+        # Initialize the flash time window
+        self.flash_tmin = flash_tmin
+        self.flash_tmax = flash_tmax
+        self.flash_tolerance = flash_tolerance
+
+    def process(self, data):
+        """Pass data products corresponding to one entry through the analysis.
+
+        Parameters
+        ----------
+        data : dict
+            Dictionary of data products
+        """
+        # Fetch the keys you want
+        interaction_matches_t2r = data['interaction_matches_t2r']
+        
+        # Loop over matched interactions (t2r)
+        for idx, (true_inter, reco_inter) in enumerate(interaction_matches_t2r):
+            
+            if true_inter == None: continue
+            
+            # Storage
+            row_dict = {}
+            
+            
+            # Basic info - reco
+            if reco_inter == None:
+                row_dict['reco_interaction_id'] = -1
+                row_dict['reco_is_contained'] = False
+                row_dict['reco_is_fiducial'] = False
+                row_dict['reco_vtx_x'] = -9999
+                row_dict['reco_vtx_y'] = -9999
+                row_dict['reco_vtx_z'] = -9999
+                row_dict['reco_particle_count0'] = -1
+                row_dict['reco_particle_count1'] = -1
+                row_dict['reco_particle_count2'] = -1
+                row_dict['reco_particle_count3'] = -1
+                row_dict['reco_particle_count4'] = -1
+                row_dict['reco_flash_volume_id0'] = -1
+                row_dict['reco_flash_time0'] = -9999
+                row_dict['reco_flash_id0'] = -1
+                row_dict['reco_flash_score0'] = -9999
+                row_dict['reco_flash_volume_id1'] = -1
+                row_dict['reco_flash_time1'] = -9999
+                row_dict['reco_flash_id1'] = -1
+                row_dict['reco_flash_score1'] = -9999
+                row_dict['reco_flash_total_pe'] = -1
+                row_dict['reco_flash_hypo_pe'] = -1
+                row_dict['reco_reduced_flash_score'] = -9999
+                row_dict['reco_calo_energy'] = -1
+                row_dict['reco_in_bnb'] = False
+            else:
+                row_dict['reco_interaction_id'] = reco_inter.id
+                row_dict['reco_is_contained'] = reco_inter.is_contained
+                row_dict['reco_is_fiducial'] = reco_inter.is_fiducial
+                row_dict['reco_vtx_x'] = reco_inter.vertex[0]
+                row_dict['reco_vtx_y'] = reco_inter.vertex[1]
+                row_dict['reco_vtx_z'] = reco_inter.vertex[2]
+                row_dict['reco_particle_count0'] = reco_inter.particle_counts[0]
+                row_dict['reco_particle_count1'] = reco_inter.particle_counts[1]
+                row_dict['reco_particle_count2'] = reco_inter.particle_counts[2]
+                row_dict['reco_particle_count3'] = reco_inter.particle_counts[3]
+                row_dict['reco_particle_count4'] = reco_inter.particle_counts[4]
+                
+                #Initialize flash info
+                for i in range(2): #2 volumes
+                    #Reco
+                    row_dict[f'reco_flash_volume_id{i}'] = -1
+                    row_dict[f'reco_flash_time{i}'] = -9999
+                    row_dict[f'reco_flash_id{i}'] = -1
+                    row_dict[f'reco_flash_score{i}'] = -9999
+                    
+                # Reco flash info
+                for i, fid in enumerate(reco_inter.flash_volume_ids):
+                    row_dict[f'reco_flash_volume_id{fid}'] = fid
+                    row_dict[f'reco_flash_time{fid}'] = reco_inter.flash_times[i]
+                    row_dict[f'reco_flash_id{fid}'] = reco_inter.flash_ids[i]
+                    row_dict[f'reco_flash_score{fid}'] = reco_inter.flash_scores[i]
+                
+                row_dict[f'reco_flash_total_pe'] = reco_inter.flash_total_pe
+                row_dict[f'reco_flash_hypo_pe'] = reco_inter.flash_hypo_pe
+                row_dict[f'reco_reduced_flash_score'] = (reco_inter.flash_total_pe - reco_inter.flash_hypo_pe) / reco_inter.flash_total_pe
+                
+                # Energy info - get KE of all reco particles
+                row_dict['reco_calo_energy'] = 0
+                for i,p in enumerate(reco_inter.particles):
+                    row_dict[f'reco_calo_energy'] += p.calo_ke
+                
+                if (row_dict['reco_flash_time0'] < (self.flash_tmax + self.flash_tolerance) and row_dict['reco_flash_time0'] > (self.flash_tmin - self.flash_tolerance))\
+                    or (row_dict['reco_flash_time1'] < (self.flash_tmax + self.flash_tolerance) and row_dict['reco_flash_time1'] > (self.flash_tmin - self.flash_tolerance)):
+                    row_dict['reco_in_bnb'] = True
+                else:    
+                    row_dict['reco_in_bnb'] = False
+                    
+                
+            
+            # Basic info - truth
+            row_dict['truth_interaction_id'] = true_inter.id
+            row_dict['truth_is_contained'] = true_inter.is_contained
+            row_dict['truth_is_fiducial'] = true_inter.is_fiducial
+            row_dict['truth_vtx_x'] = true_inter.vertex[0]
+            row_dict['truth_vtx_y'] = true_inter.vertex[1]
+            row_dict['truth_vtx_z'] = true_inter.vertex[2]
+            row_dict['truth_particle_count0'] = true_inter.particle_counts[0]
+            row_dict['truth_particle_count1'] = true_inter.particle_counts[1]
+            row_dict['truth_particle_count2'] = true_inter.particle_counts[2]
+            row_dict['truth_particle_count3'] = true_inter.particle_counts[3]
+            row_dict['truth_particle_count4'] = true_inter.particle_counts[4]
+            row_dict['truth_iscc'] = true_inter.current_type == 0
+            row_dict['truth_nu_id'] = true_inter.nu_id
+            
+            # Flash info
+            for i in range(2): #2 volumes
+                #Truth
+                row_dict[f'truth_flash_volume_id{i}'] = -1
+                row_dict[f'truth_flash_time{i}'] = -9999
+                row_dict[f'truth_flash_id{i}'] = -1
+                row_dict[f'truth_flash_score{i}'] = -9999
+            
+            # Truth flash info
+            for i, fid in enumerate(true_inter.flash_volume_ids):
+                row_dict[f'truth_flash_volume_id{fid}'] = fid
+                row_dict[f'truth_flash_time{fid}'] = true_inter.flash_times[i]
+                row_dict[f'truth_flash_id{fid}'] = true_inter.flash_ids[i]
+                #Check if the index for this score exists
+                if i < len(true_inter.flash_scores):
+                    row_dict[f'true_flash_score{fid}'] = true_inter.flash_scores[i]
+                row_dict[f'truth_flash_score{fid}'] = true_inter.flash_scores[i]
+            
+            row_dict[f'truth_flash_total_pe'] = true_inter.flash_total_pe
+            row_dict[f'truth_flash_hypo_pe'] = true_inter.flash_hypo_pe
+            row_dict[f'truth_reduced_flash_score'] = (true_inter.flash_total_pe - true_inter.flash_hypo_pe) / true_inter.flash_total_pe
+                
+            # Energy info - get KE of all true particles
+            row_dict['truth_calo_energy'] = 0
+            # Also get average time of particles
+            row_dict['truth_avg_time'] = 0
+            
+            nprim = 0
+            for i,p in enumerate(true_inter.particles):
+                row_dict[f'truth_calo_energy'] += p.calo_ke
+                if p.is_primary:
+                    row_dict[f'truth_avg_time'] += p.t*1e-3 #us
+                    nprim += 1
+            if nprim > 0:
+                row_dict['truth_avg_time'] /= nprim
+            
+            #Efficiency - is the flash time within the BNB window?
+            # Truth values get some slack because of the time resolution +- 0.2 us
+            if row_dict['truth_avg_time'] < (self.flash_tmax + self.flash_tolerance) and row_dict['truth_avg_time'] > (self.flash_tmin - self.flash_tolerance):
+                row_dict['truth_in_bnb'] = True
+            else:
+                row_dict['truth_in_bnb'] = False
+            
+            # Overlap
+            overlap = data['interaction_matches_t2r_overlap'][idx]
+            row_dict.update({'match_overlap': overlap})
+            self.append(f'{self.log_name}_fmatch_performance', **row_dict)
\ No newline at end of file
diff --git a/spine/ana/script/save.py b/spine/ana/script/save.py
index 33e5fb0d2..b4f4ce7c8 100644
--- a/spine/ana/script/save.py
+++ b/spine/ana/script/save.py
@@ -139,7 +139,9 @@ def process(self, data):
             prefix, obj_type = key.split('_')
             other = other_prefix[prefix]
             attrs = self.attrs[key]
+            attrs_other = self.attrs[f'{other}_{obj_type}']
             lengths = self.lengths
+            lengths_other = self.lengths
             if (self.match_mode is None or
                 self.match_mode == f'{other}_to_{prefix}'):
                 # If there is no matches, save objects by themselves
@@ -151,8 +153,6 @@ def process(self, data):
                 # match on a single row
                 match_suffix = f'{prefix[0]}2{other[0]}'
                 match_key = f'{obj_type[:-1]}_matches_{match_suffix}'
-                attrs_other = self.attrs[f'{other}_{obj_type}']
-                lengths_other = self.lengths # TODO
                 for idx, (obj_i, obj_j) in enumerate(data[match_key]):
                     src_dict = obj_i.scalar_dict(attrs, lengths)
                     if obj_j is not None:

From e36aa626058fcad522e21e235cf9a64e6af3c8af Mon Sep 17 00:00:00 2001
From: Bear Carlson <bearc@sdfiana008.sdf.slac.stanford.edu>
Date: Fri, 31 Jan 2025 10:13:35 -0800
Subject: [PATCH 2/7] Add flash match performance script

---
 spine/ana/base.py          |  79 +---------------
 spine/ana/diag/__init__.py |   2 -
 spine/ana/diag/track.py    | 189 -------------------------------------
 spine/ana/factories.py     |   4 +-
 4 files changed, 4 insertions(+), 270 deletions(-)
 delete mode 100644 spine/ana/diag/track.py

diff --git a/spine/ana/base.py b/spine/ana/base.py
index 0ad4a410c..db1d3a188 100644
--- a/spine/ana/base.py
+++ b/spine/ana/base.py
@@ -44,15 +44,8 @@ class AnaBase(ABC):
     # Valid run modes
     _run_modes = ('reco', 'truth', 'both', 'all')
 
-    # List of known point modes for true particles and their corresponding keys
-    _point_modes = (
-            ('points', 'points_label'),
-            ('points_adapt', 'points'),
-            ('points_g4', 'points_g4')
-    )
-
-    def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
-                 append=False, overwrite=False, log_dir=None, prefix=None):
+    def __init__(self, obj_type=None, run_mode=None, append=False,
+                 overwrite=False, log_dir=None, prefix=None):
         """Initialize default anlysis script object properties.
 
         Parameters
@@ -63,10 +56,6 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
             If specified, tells whether the analysis script must run on
             reconstructed ('reco'), true ('true') or both objects
             ('both' or 'all')
-        truth_point_mode : str, optional
-            If specified, tells which attribute of the :class:`TruthFragment`,
-            :class:`TruthParticle` or :class:`TruthInteraction` object to use
-            to fetch its point coordinates
         append : bool, default False
             If True, appends existing CSV files instead of creating new ones
         overwrite : bool, default False
@@ -125,14 +114,6 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
         # Update underlying keys, if needed
         self.update_keys({k:True for k in self.obj_keys})
 
-        # If a truth point mode is specified, store it
-        if truth_point_mode is not None:
-            assert truth_point_mode in self.point_modes, (
-                     "The `truth_point_mode` argument must be one of "
-                    f"{self.point_modes.keys()}. Got `{truth_point_mode}` instead.")
-            self.truth_point_mode = truth_point_mode
-            self.truth_index_mode = truth_point_mode.replace('points', 'index')
-
         # Store the append flag
         self.append_file = append
         self.overwrite_file = overwrite
@@ -186,18 +167,6 @@ def keys(self, keys):
         """
         self._keys = tuple(keys.items())
 
-    @property
-    def point_modes(self):
-        """Dictionary which makes the correspondance between the name of a true
-        object point attribute with the underlying point tensor it points to.
-
-        Returns
-        -------
-        Dict[str, str]
-            Dictionary of (attribute, key) mapping for point coordinates
-        """
-        return dict(self._point_modes)
-
     def update_keys(self, update_dict):
         """Update the underlying set of keys and their necessity in place.
 
@@ -280,50 +249,6 @@ def __call__(self, data, entry=None):
         # Run the analysis script
         return self.process(data_filter)
 
-    def get_index(self, obj):
-        """Get a certain pre-defined index attribute of an object.
-
-        The :class:`TruthFragment`, :class:`TruthParticle` and
-        :class:`TruthInteraction` objects index are obtained using the
-        `truth_index_mode` attribute of the class.
-
-        Parameters
-        ----------
-        obj : Union[FragmentBase, ParticleBase, InteractionBase]
-            Fragment, Particle or Interaction object
-
-        Results
-        -------
-        np.ndarray
-           (N) Object index
-        """
-        if not obj.is_truth:
-            return obj.index
-        else:
-            return getattr(obj, self.truth_index_mode)
-
-    def get_points(self, obj):
-        """Get a certain pre-defined point attribute of an object.
-
-        The :class:`TruthFragment`, :class:`TruthParticle` and
-        :class:`TruthInteraction` objects points are obtained using the
-        `truth_point_mode` attribute of the class.
-
-        Parameters
-        ----------
-        obj : Union[FragmentBase, ParticleBase, InteractionBase]
-            Fragment, Particle or Interaction object
-
-        Results
-        -------
-        np.ndarray
-           (N, 3) Point coordinates
-        """
-        if not obj.is_truth:
-            return obj.points
-        else:
-            return getattr(obj, self.truth_point_mode)
-
     @abstractmethod
     def process(self, data):
         """Place-holder method to be defined in each analysis script.
diff --git a/spine/ana/diag/__init__.py b/spine/ana/diag/__init__.py
index 63f7c32c5..257eaec49 100644
--- a/spine/ana/diag/__init__.py
+++ b/spine/ana/diag/__init__.py
@@ -3,10 +3,8 @@
 This submodule is use to run basic diagnostics analyses such as:
 - Track dE/dx profile
 - Track energy reconstruction
-- Track completeness
 - Shower start dE/dx
 - ...
 '''
 
 from .shower import *
-from .track import *
diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py
deleted file mode 100644
index 0bc721be0..000000000
--- a/spine/ana/diag/track.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Module to evaluate diagnostic metrics on tracks."""
-
-import numpy as np
-from scipy.spatial.distance import cdist
-
-from spine.ana.base import AnaBase
-
-from spine.utils.globals import TRACK_SHP
-from spine.utils.numba_local import principal_components
-
-
-__all__ = ['TrackCompletenessAna']
-
-
-class TrackCompletenessAna(AnaBase):
-    """This analysis script identifies gaps in tracks and measures the
-    cumulative length of these gaps relative to the track length.
-
-    This is a useful diagnostic tool to evaluate the space-point efficiency
-    on tracks (good standard candal as track should have exactly no gap in
-    a perfectly efficient detector).
-    """
-
-    # Name of the analysis script (as specified in the configuration)
-    name = 'track_completeness'
-
-    def __init__(self, time_window=None, run_mode='both',
-                 truth_point_mode='points', **kwargs):
-        """Initialize the analysis script.
-
-        Parameters
-        ----------
-        time_window : List[float]
-            Time window within which to include particle (only works for `truth`)
-        **kwargs : dict, optional
-            Additional arguments to pass to :class:`AnaBase`
-        """
-        # Initialize the parent class
-        super().__init__('particle', run_mode, truth_point_mode, **kwargs)
-
-        # Store the time window
-        self.time_window = time_window
-        assert time_window is None or len(time_window) == 2, (
-                "Time window must be specified as an array of two values.")
-        assert time_window is None or run_mode == 'truth', (
-                "Time of reconstructed particle is unknown.")
-
-        # Make sure the metadata is provided (rasterization needed)
-        self.update_keys({'meta': True})
-
-        # Initialize the CSV writer(s) you want
-        for prefix in self.prefixes:
-            self.initialize_writer(prefix)
-
-    def process(self, data):
-        """Evaluate track completeness for tracks in one entry.
-
-        Parameters
-        ----------
-        data : dict
-            Dictionary of data products
-        """
-        # Fetch the pixel size in this image (assume cubic cells)
-        pixel_size = data['meta'].size[0]
-
-        # Loop over the types of particle data products
-        for key in self.obj_keys:
-            # Fetch the prefix ('reco' or 'truth')
-            prefix = key.split('_')[0]
-
-            # Loop over particle objects
-            for part in data[key]:
-                # Check that the particle is a track
-                if part.shape != TRACK_SHP:
-                    continue
-
-                # If needed, check on the particle time
-                if self.time_window is not None:
-                    if part.t < self.time_window[0] or part.t > self.time_window[1]:
-                        continue
-
-                # Initialize the particle dictionary
-                comp_dict = {'particle_id': part.id}
-
-                # Fetch the particle point coordinates
-                points = self.get_points(part)
-
-                # Find start/end points, collapse onto track cluster
-                start = points[np.argmin(cdist([part.start_point], points))]
-                end = points[np.argmin(cdist([part.end_point], points))]
-
-                # Add the direction of the track
-                vec = end - start
-                length = np.linalg.norm(vec)
-                if length:
-                    vec /= length
-
-                comp_dict['size'] = len(points)
-                comp_dict['length'] = length
-                comp_dict.update(
-                        {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]})
-
-                # Chunk out the track along gaps, estimate gap length
-                chunk_labels = self.cluster_track_chunks(
-                        points, start, end, pixel_size)
-                gaps = self.sequential_cluster_distances(
-                        points, chunk_labels, start)
-
-                # Substract minimum gap distance due to rasterization
-                min_gap = pixel_size/np.max(np.abs(vec))
-                gaps -= min_gap
-
-                # Store gap information
-                comp_dict['num_gaps'] = len(gaps)
-                comp_dict['gap_length'] = np.sum(gaps)
-                comp_dict['gap_frac'] = np.sum(gaps)/length
-
-                # Append the dictionary to the CSV log
-                self.append(prefix, **comp_dict)
-
-    @staticmethod
-    def cluster_track_chunks(points, start_point, end_point, pixel_size):
-        """Find point where the track is broken, divide out the track
-        into self-contained chunks which are Linf connect (Moore neighbors).
-
-        Parameters
-        ----------
-        points : np.ndarray
-            (N, 3) List of track cluster point coordinates
-        start_point : np.ndarray
-            (3) Start point of the track cluster
-        end_point : np.ndarray
-            (3) End point of the track cluster
-        pixel_size : float
-            Dimension of one pixel, used to identify what is big enough to
-            constitute a break
-
-        Returns
-        -------
-        np.ndarray
-            (N) Track chunk labels
-        """
-        # Project and cluster on the projected axis
-        direction = (end_point-start_point)/np.linalg.norm(end_point-start_point)
-        projs = np.dot(points - start_point, direction)
-        perm = np.argsort(projs)
-        seps = projs[perm][1:] - projs[perm][:-1]
-        breaks = np.where(seps > pixel_size*1.1)[0] + 1
-        cluster_labels = np.empty(len(projs), dtype=int)
-        for i, index in enumerate(np.split(np.arange(len(projs)), breaks)):
-            cluster_labels[perm[index]] = i
-            
-        return cluster_labels
-
-    @staticmethod
-    def sequential_cluster_distances(points, labels, start_point):
-        """Order clusters in order of distance from a starting point, compute
-        the distances between successive clusters. 
-
-        Parameters
-        ----------
-        points : np.ndarray
-            (N, 3) List of track cluster point coordinates
-        labels : np.ndarray
-            (N) Track chunk labels
-        start_point : np.ndarray
-            (3) Start point of the track cluster
-        """
-        # If there's only one cluster, nothing to do here
-        unique_labels = np.unique(labels)
-        if len(unique_labels) < 2:
-            return np.empty(0, dtype=float), np.empty(0, dtype=float)
-        
-        # Order clusters
-        start_dist = cdist([start_point], points).flatten()
-        start_clust_dist = np.empty(len(unique_labels))
-        for i, c in enumerate(unique_labels):
-            start_clust_dist[i] = np.min(start_dist[labels == c])
-        ordered_labels = unique_labels[np.argsort(start_clust_dist)]
-        
-        # Compute the intercluster distance and relative angle
-        n_gaps = len(ordered_labels) - 1
-        dists = np.empty(n_gaps, dtype=float)
-        for i in range(n_gaps):
-            points_i = points[labels == ordered_labels[i]]
-            points_j = points[labels == ordered_labels[i + 1]]
-            dists[i] = np.min(cdist(points_i, points_j))
-            
-        return dists
diff --git a/spine/ana/factories.py b/spine/ana/factories.py
index 1bc6f45c8..f00b4625c 100644
--- a/spine/ana/factories.py
+++ b/spine/ana/factories.py
@@ -2,11 +2,11 @@
 
 from spine.utils.factory import module_dict, instantiate
 
-from . import diag, metric, script
+from . import metric, script
 
 # Build a dictionary of available calibration modules
 ANA_DICT = {}
-for module in [diag, metric, script]:
+for module in [metric, script]:
     ANA_DICT.update(**module_dict(module))
 
 

From 496d29f4b69c157642bd8b6a3d885a400b034dfd Mon Sep 17 00:00:00 2001
From: Bear Carlson <bcarlson1@ufl.edu>
Date: Fri, 31 Jan 2025 10:19:20 -0800
Subject: [PATCH 3/7] Update spine/ana/base.py

---
 spine/ana/base.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/spine/ana/base.py b/spine/ana/base.py
index db1d3a188..4387a08c5 100644
--- a/spine/ana/base.py
+++ b/spine/ana/base.py
@@ -44,8 +44,15 @@ class AnaBase(ABC):
     # Valid run modes
     _run_modes = ('reco', 'truth', 'both', 'all')
 
-    def __init__(self, obj_type=None, run_mode=None, append=False,
-                 overwrite=False, log_dir=None, prefix=None):
+    # List of known point modes for true particles and their corresponding keys
+    _point_modes = (
+            ('points', 'points_label'),
+            ('points_adapt', 'points'),
+            ('points_g4', 'points_g4')
+    )
+
+    def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
+                 append=False, overwrite=False, log_dir=None, prefix=None):
         """Initialize default anlysis script object properties.
 
         Parameters

From 75e58daeb6a2a24de6cf3914870f40ef68a0a0d8 Mon Sep 17 00:00:00 2001
From: Bear Carlson <bcarlson1@ufl.edu>
Date: Fri, 31 Jan 2025 10:20:36 -0800
Subject: [PATCH 4/7] Update spine/ana/base.py

---
 spine/ana/base.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/spine/ana/base.py b/spine/ana/base.py
index 4387a08c5..663a596df 100644
--- a/spine/ana/base.py
+++ b/spine/ana/base.py
@@ -62,7 +62,10 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
         run_mode : str, optional
             If specified, tells whether the analysis script must run on
             reconstructed ('reco'), true ('true') or both objects
-            ('both' or 'all')
+        truth_point_mode : str, optional
+            If specified, tells which attribute of the :class:`TruthFragment`,
+            :class:`TruthParticle` or :class:`TruthInteraction` object to use
+            to fetch its point coordinates
         append : bool, default False
             If True, appends existing CSV files instead of creating new ones
         overwrite : bool, default False

From eac30dbcf8977e82fcd97a79267899d524acf82a Mon Sep 17 00:00:00 2001
From: Bear Carlson <bcarlson1@ufl.edu>
Date: Fri, 31 Jan 2025 10:20:53 -0800
Subject: [PATCH 5/7] Update spine/ana/base.py

---
 spine/ana/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spine/ana/base.py b/spine/ana/base.py
index 663a596df..71238a40e 100644
--- a/spine/ana/base.py
+++ b/spine/ana/base.py
@@ -62,6 +62,7 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
         run_mode : str, optional
             If specified, tells whether the analysis script must run on
             reconstructed ('reco'), true ('true') or both objects
+            ('both' or 'all')
         truth_point_mode : str, optional
             If specified, tells which attribute of the :class:`TruthFragment`,
             :class:`TruthParticle` or :class:`TruthInteraction` object to use

From 438a4ea1e385bf2bc7491fe042269e7dcf8832bd Mon Sep 17 00:00:00 2001
From: Bear Carlson <bearc@sdfiana008.sdf.slac.stanford.edu>
Date: Fri, 31 Jan 2025 10:24:54 -0800
Subject: [PATCH 6/7] Keep changes from develop

---
 spine/ana/base.py          |  81 +++++++++++++++-
 spine/ana/diag/__init__.py |   2 +
 spine/ana/diag/track.py    | 189 +++++++++++++++++++++++++++++++++++++
 spine/ana/factories.py     |   6 +-
 spine/ana/script/save.py   |   6 +-
 5 files changed, 275 insertions(+), 9 deletions(-)
 create mode 100644 spine/ana/diag/track.py

diff --git a/spine/ana/base.py b/spine/ana/base.py
index db1d3a188..c36c56b2a 100644
--- a/spine/ana/base.py
+++ b/spine/ana/base.py
@@ -44,8 +44,15 @@ class AnaBase(ABC):
     # Valid run modes
     _run_modes = ('reco', 'truth', 'both', 'all')
 
-    def __init__(self, obj_type=None, run_mode=None, append=False,
-                 overwrite=False, log_dir=None, prefix=None):
+    # List of known point modes for true particles and their corresponding keys
+    _point_modes = (
+            ('points', 'points_label'),
+            ('points_adapt', 'points'),
+            ('points_g4', 'points_g4')
+    )
+
+    def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
+                 append=False, overwrite=False, log_dir=None, prefix=None):
         """Initialize default anlysis script object properties.
 
         Parameters
@@ -56,6 +63,10 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
             If specified, tells whether the analysis script must run on
             reconstructed ('reco'), true ('true') or both objects
             ('both' or 'all')
+        truth_point_mode : str, optional
+            If specified, tells which attribute of the :class:`TruthFragment`,
+            :class:`TruthParticle` or :class:`TruthInteraction` object to use
+            to fetch its point coordinates
         append : bool, default False
             If True, appends existing CSV files instead of creating new ones
         overwrite : bool, default False
@@ -114,6 +125,14 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
         # Update underlying keys, if needed
         self.update_keys({k:True for k in self.obj_keys})
 
+        # If a truth point mode is specified, store it
+        if truth_point_mode is not None:
+            assert truth_point_mode in self.point_modes, (
+                     "The `truth_point_mode` argument must be one of "
+                    f"{self.point_modes.keys()}. Got `{truth_point_mode}` instead.")
+            self.truth_point_mode = truth_point_mode
+            self.truth_index_mode = truth_point_mode.replace('points', 'index')
+
         # Store the append flag
         self.append_file = append
         self.overwrite_file = overwrite
@@ -167,6 +186,18 @@ def keys(self, keys):
         """
         self._keys = tuple(keys.items())
 
+    @property
+    def point_modes(self):
+        """Dictionary which makes the correspondance between the name of a true
+        object point attribute with the underlying point tensor it points to.
+
+        Returns
+        -------
+        Dict[str, str]
+            Dictionary of (attribute, key) mapping for point coordinates
+        """
+        return dict(self._point_modes)
+
     def update_keys(self, update_dict):
         """Update the underlying set of keys and their necessity in place.
 
@@ -249,6 +280,50 @@ def __call__(self, data, entry=None):
         # Run the analysis script
         return self.process(data_filter)
 
+    def get_index(self, obj):
+        """Get a certain pre-defined index attribute of an object.
+
+        The :class:`TruthFragment`, :class:`TruthParticle` and
+        :class:`TruthInteraction` objects index are obtained using the
+        `truth_index_mode` attribute of the class.
+
+        Parameters
+        ----------
+        obj : Union[FragmentBase, ParticleBase, InteractionBase]
+            Fragment, Particle or Interaction object
+
+        Results
+        -------
+        np.ndarray
+           (N) Object index
+        """
+        if not obj.is_truth:
+            return obj.index
+        else:
+            return getattr(obj, self.truth_index_mode)
+
+    def get_points(self, obj):
+        """Get a certain pre-defined point attribute of an object.
+
+        The :class:`TruthFragment`, :class:`TruthParticle` and
+        :class:`TruthInteraction` objects points are obtained using the
+        `truth_point_mode` attribute of the class.
+
+        Parameters
+        ----------
+        obj : Union[FragmentBase, ParticleBase, InteractionBase]
+            Fragment, Particle or Interaction object
+
+        Results
+        -------
+        np.ndarray
+           (N, 3) Point coordinates
+        """
+        if not obj.is_truth:
+            return obj.points
+        else:
+            return getattr(obj, self.truth_point_mode)
+
     @abstractmethod
     def process(self, data):
         """Place-holder method to be defined in each analysis script.
@@ -258,4 +333,4 @@ def process(self, data):
         data : dict
             Filtered data dictionary for one entry
         """
-        raise NotImplementedError('Must define the `process` function')
+        raise NotImplementedError('Must define the `process` function')
\ No newline at end of file
diff --git a/spine/ana/diag/__init__.py b/spine/ana/diag/__init__.py
index 257eaec49..2f80173ae 100644
--- a/spine/ana/diag/__init__.py
+++ b/spine/ana/diag/__init__.py
@@ -3,8 +3,10 @@
 This submodule is use to run basic diagnostics analyses such as:
 - Track dE/dx profile
 - Track energy reconstruction
+- Track Completeness
 - Shower start dE/dx
 - ...
 '''
 
 from .shower import *
+from .track import *
diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py
new file mode 100644
index 000000000..702489f58
--- /dev/null
+++ b/spine/ana/diag/track.py
@@ -0,0 +1,189 @@
+"""Module to evaluate diagnostic metrics on tracks."""
+
+import numpy as np
+from scipy.spatial.distance import cdist
+
+from spine.ana.base import AnaBase
+
+from spine.utils.globals import TRACK_SHP
+from spine.utils.numba_local import principal_components
+
+
+__all__ = ['TrackCompletenessAna']
+
+
+class TrackCompletenessAna(AnaBase):
+    """This analysis script identifies gaps in tracks and measures the
+    cumulative length of these gaps relative to the track length.
+
+    This is a useful diagnostic tool to evaluate the space-point efficiency
+    on tracks (good standard candal as track should have exactly no gap in
+    a perfectly efficient detector).
+    """
+
+    # Name of the analysis script (as specified in the configuration)
+    name = 'track_completeness'
+
+    def __init__(self, time_window=None, run_mode='both',
+                 truth_point_mode='points', **kwargs):
+        """Initialize the analysis script.
+
+        Parameters
+        ----------
+        time_window : List[float]
+            Time window within which to include particle (only works for `truth`)
+        **kwargs : dict, optional
+            Additional arguments to pass to :class:`AnaBase`
+        """
+        # Initialize the parent class
+        super().__init__('particle', run_mode, truth_point_mode, **kwargs)
+
+        # Store the time window
+        self.time_window = time_window
+        assert time_window is None or len(time_window) == 2, (
+                "Time window must be specified as an array of two values.")
+        assert time_window is None or run_mode == 'truth', (
+                "Time of reconstructed particle is unknown.")
+
+        # Make sure the metadata is provided (rasterization needed)
+        self.update_keys({'meta': True})
+
+        # Initialize the CSV writer(s) you want
+        for prefix in self.prefixes:
+            self.initialize_writer(prefix)
+
+    def process(self, data):
+        """Evaluate track completeness for tracks in one entry.
+
+        Parameters
+        ----------
+        data : dict
+            Dictionary of data products
+        """
+        # Fetch the pixel size in this image (assume cubic cells)
+        pixel_size = data['meta'].size[0]
+
+        # Loop over the types of particle data products
+        for key in self.obj_keys:
+            # Fetch the prefix ('reco' or 'truth')
+            prefix = key.split('_')[0]
+
+            # Loop over particle objects
+            for part in data[key]:
+                # Check that the particle is a track
+                if part.shape != TRACK_SHP:
+                    continue
+
+                # If needed, check on the particle time
+                if self.time_window is not None:
+                    if part.t < self.time_window[0] or part.t > self.time_window[1]:
+                        continue
+
+                # Initialize the particle dictionary
+                comp_dict = {'particle_id': part.id}
+
+                # Fetch the particle point coordinates
+                points = self.get_points(part)
+
+                # Find start/end points, collapse onto track cluster
+                start = points[np.argmin(cdist([part.start_point], points))]
+                end = points[np.argmin(cdist([part.end_point], points))]
+
+                # Add the direction of the track
+                vec = end - start
+                length = np.linalg.norm(vec)
+                if length:
+                    vec /= length
+
+                comp_dict['size'] = len(points)
+                comp_dict['length'] = length
+                comp_dict.update(
+                        {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]})
+
+                # Chunk out the track along gaps, estimate gap length
+                chunk_labels = self.cluster_track_chunks(
+                        points, start, end, pixel_size)
+                gaps = self.sequential_cluster_distances(
+                        points, chunk_labels, start)
+
+                # Substract minimum gap distance due to rasterization
+                min_gap = pixel_size/np.max(np.abs(vec))
+                gaps -= min_gap
+
+                # Store gap information
+                comp_dict['num_gaps'] = len(gaps)
+                comp_dict['gap_length'] = np.sum(gaps)
+                comp_dict['gap_frac'] = np.sum(gaps)/length
+
+                # Append the dictionary to the CSV log
+                self.append(prefix, **comp_dict)
+
+    @staticmethod
+    def cluster_track_chunks(points, start_point, end_point, pixel_size):
+        """Find point where the track is broken, divide out the track
+        into self-contained chunks which are Linf connect (Moore neighbors).
+
+        Parameters
+        ----------
+        points : np.ndarray
+            (N, 3) List of track cluster point coordinates
+        start_point : np.ndarray
+            (3) Start point of the track cluster
+        end_point : np.ndarray
+            (3) End point of the track cluster
+        pixel_size : float
+            Dimension of one pixel, used to identify what is big enough to
+            constitute a break
+
+        Returns
+        -------
+        np.ndarray
+            (N) Track chunk labels
+        """
+        # Project and cluster on the projected axis
+        direction = (end_point-start_point)/np.linalg.norm(end_point-start_point)
+        projs = np.dot(points - start_point, direction)
+        perm = np.argsort(projs)
+        seps = projs[perm][1:] - projs[perm][:-1]
+        breaks = np.where(seps > pixel_size*1.1)[0] + 1
+        cluster_labels = np.empty(len(projs), dtype=int)
+        for i, index in enumerate(np.split(np.arange(len(projs)), breaks)):
+            cluster_labels[perm[index]] = i
+            
+        return cluster_labels
+
+    @staticmethod
+    def sequential_cluster_distances(points, labels, start_point):
+        """Order clusters in order of distance from a starting point, compute
+        the distances between successive clusters. 
+
+        Parameters
+        ----------
+        points : np.ndarray
+            (N, 3) List of track cluster point coordinates
+        labels : np.ndarray
+            (N) Track chunk labels
+        start_point : np.ndarray
+            (3) Start point of the track cluster
+        """
+        # If there's only one cluster, nothing to do here
+        unique_labels = np.unique(labels)
+        if len(unique_labels) < 2:
+            return np.empty(0, dtype=float), np.empty(0, dtype=float)
+        
+        # Order clusters
+        start_dist = cdist([start_point], points).flatten()
+        start_clust_dist = np.empty(len(unique_labels))
+        for i, c in enumerate(unique_labels):
+            start_clust_dist[i] = np.min(start_dist[labels == c])
+        ordered_labels = unique_labels[np.argsort(start_clust_dist)]
+        
+        # Compute the intercluster distance and relative angle
+        n_gaps = len(ordered_labels) - 1
+        dists = np.empty(n_gaps, dtype=float)
+        for i in range(n_gaps):
+            points_i = points[labels == ordered_labels[i]]
+            points_j = points[labels == ordered_labels[i + 1]]
+            dists[i] = np.min(cdist(points_i, points_j))
+            
+        return dists
\ No newline at end of file
diff --git a/spine/ana/factories.py b/spine/ana/factories.py
index f00b4625c..8ae50a4b4 100644
--- a/spine/ana/factories.py
+++ b/spine/ana/factories.py
@@ -2,11 +2,11 @@
 
 from spine.utils.factory import module_dict, instantiate
 
-from . import metric, script
+from . import diag, metric, script
 
 # Build a dictionary of available calibration modules
 ANA_DICT = {}
-for module in [metric, script]:
+for module in [diag, metric, script]:
     ANA_DICT.update(**module_dict(module))
 
 
@@ -44,4 +44,4 @@ def ana_script_factory(name, cfg, overwrite=None, log_dir=None, prefix=None):
                 ANA_DICT, cfg, overwrite=overwrite, log_dir=log_dir, prefix=prefix)
     else:
         return instantiate(
-                ANA_DICT, cfg, log_dir=log_dir, prefix=prefix)
+                ANA_DICT, cfg, log_dir=log_dir, prefix=prefix)
\ No newline at end of file
diff --git a/spine/ana/script/save.py b/spine/ana/script/save.py
index b4f4ce7c8..bbfdcc5e0 100644
--- a/spine/ana/script/save.py
+++ b/spine/ana/script/save.py
@@ -139,9 +139,7 @@ def process(self, data):
             prefix, obj_type = key.split('_')
             other = other_prefix[prefix]
             attrs = self.attrs[key]
-            attrs_other = self.attrs[f'{other}_{obj_type}']
             lengths = self.lengths
-            lengths_other = self.lengths
             if (self.match_mode is None or
                 self.match_mode == f'{other}_to_{prefix}'):
                 # If there is no matches, save objects by themselves
@@ -153,6 +151,8 @@ def process(self, data):
                 # match on a single row
                 match_suffix = f'{prefix[0]}2{other[0]}'
                 match_key = f'{obj_type[:-1]}_matches_{match_suffix}'
+                attrs_other = self.attrs[f'{other}_{obj_type}']
+                lengths_other = self.lengths # TODO
                 for idx, (obj_i, obj_j) in enumerate(data[match_key]):
                     src_dict = obj_i.scalar_dict(attrs, lengths)
                     if obj_j is not None:
@@ -167,4 +167,4 @@ def process(self, data):
 
                     row_dict = {**src_dict, **tgt_dict}
                     row_dict.update({'match_overlap': overlap})
-                    self.append(key, **row_dict)
+                    self.append(key, **row_dict)
\ No newline at end of file

From e7ef639c91c6207ce50dc25f1cddb0ddd0a041aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Drielsma?= <francois.drielsma@gmail.com>
Date: Mon, 3 Mar 2025 15:53:42 -0800
Subject: [PATCH 7/7] Changes for flash matching

---
 spine/ana/script/fmatch_performance.py | 19 +++++++++++++++----
 spine/version.py                       |  2 +-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/spine/ana/script/fmatch_performance.py b/spine/ana/script/fmatch_performance.py
index aa8520c0a..20a7a84c1 100644
--- a/spine/ana/script/fmatch_performance.py
+++ b/spine/ana/script/fmatch_performance.py
@@ -47,6 +47,9 @@ def __init__(self, log_name, flash_tmin=-0.4, flash_tmax=1.5, flash_tolerance=0.
         # Add additional required data products
         self.update_keys({'interaction_matches_t2r': True}) 
         self.update_keys({'interaction_matches_t2r_overlap': True}) 
+
+        self.update_keys({'interaction_matches_r2t': True}) 
+        self.update_keys({'interaction_matches_r2t_overlap': True}) 
         
         # Initialize the flash time window
         self.flash_tmin = flash_tmin
@@ -62,12 +65,14 @@ def process(self, data):
             Dictionary of data products
         """
         # Fetch the keys you want
+        interaction_matches_r2t = data['interaction_matches_r2t']
         interaction_matches_t2r = data['interaction_matches_t2r']
         
         # Loop over matched interactions (t2r)
         for idx, (true_inter, reco_inter) in enumerate(interaction_matches_t2r):
-            
-            if true_inter == None: continue
+        #for idx, (reco_inter, true_inter) in enumerate(interaction_matches_r2t):
+            #if true_inter == None: continue
+            #if reco_inter == None: continue
             
             # Storage
             row_dict = {}
@@ -181,6 +186,8 @@ def process(self, data):
             row_dict[f'truth_flash_hypo_pe'] = true_inter.flash_hypo_pe
             row_dict[f'truth_reduced_flash_score'] = (true_inter.flash_total_pe - true_inter.flash_hypo_pe) / true_inter.flash_total_pe
                 
+            #Energy info - get E of all true particles
+            row_dict['truth_energy_init'] = 0
             # Energy info - get KE of all true particles
             row_dict['truth_calo_energy'] = 0
             # Also get average time of particles
@@ -189,11 +196,14 @@ def process(self, data):
             nprim = 0
             for i,p in enumerate(true_inter.particles):
                 row_dict[f'truth_calo_energy'] += p.calo_ke
+                row_dict['truth_energy_init'] += p.energy_init
                 if p.is_primary:
                     row_dict[f'truth_avg_time'] += p.t*1e-3 #us
                     nprim += 1
             if nprim > 0:
                 row_dict['truth_avg_time'] /= nprim
+            else:
+                row_dict['truth_avg_time'] = -9999
             
             #Efficiency - is the flash time within the BNB window?
             # Truth values get some slack because of the time resolution +- 0.2 us
@@ -203,6 +213,7 @@ def process(self, data):
                 row_dict['truth_in_bnb'] = False
             
             # Overlap
-            overlap = data['interaction_matches_t2r_overlap'][idx]
-            row_dict.update({'match_overlap': overlap})
+            #overlap = data['interaction_matches_t2r_overlap'][idx]
+            #overlap = data['interaction_matches_r2t_overlap'][idx]
+            #row_dict.update({'match_overlap': overlap})
             self.append(f'{self.log_name}_fmatch_performance', **row_dict)
\ No newline at end of file
diff --git a/spine/version.py b/spine/version.py
index 4c0a99926..7f684ea71 100644
--- a/spine/version.py
+++ b/spine/version.py
@@ -1,3 +1,3 @@
 """Module which stores the current software version."""
 
-__version__ = '0.2.1'
+__version__ = '0.3.0'