From c78667952071de052eb4a0fb410ab1b36b510f1e Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:13:20 -0800 Subject: [PATCH 1/7] Add flash match performance script --- spine/ana/script/__init__.py | 1 + spine/ana/script/fmatch_performance.py | 208 +++++++++++++++++++++++++ spine/ana/script/save.py | 4 +- 3 files changed, 211 insertions(+), 2 deletions(-) create mode 100644 spine/ana/script/fmatch_performance.py diff --git a/spine/ana/script/__init__.py b/spine/ana/script/__init__.py index e6e218909..9cffc8486 100644 --- a/spine/ana/script/__init__.py +++ b/spine/ana/script/__init__.py @@ -4,3 +4,4 @@ from .event import * from .metrics import * from .colinear_tracks import * +from .fmatch_performance import * diff --git a/spine/ana/script/fmatch_performance.py b/spine/ana/script/fmatch_performance.py new file mode 100644 index 000000000..aa8520c0a --- /dev/null +++ b/spine/ana/script/fmatch_performance.py @@ -0,0 +1,208 @@ +"""Analysis module template. + +Evaluate performance of the fmatch algorithm for SBND. +""" + +# Add the imports specific to this module here +import numpy as np + +# Must import the analysis script base class +from spine.ana.base import AnaBase + +# Must list the analysis script(s) here to be found by the factory. +# You must also add it to the list of imported modules in the +# `spine.ana.factories`! +__all__ = ['FMatchPerformance'] + + +class FMatchPerformance(AnaBase): + """ + Evaluate performance of the fmatch algorithm for SBND. + """ + + # Name of the analysis script (as specified in the configuration) + name = 'fmatch_performance' + + def __init__(self, log_name, flash_tmin=-0.4, flash_tmax=1.5, flash_tolerance=0.5, **kwargs): + """Initialize the analysis script. + + Parameters + ---------- + log_name : str + name of the analysis script + flash_tmin : float, optional + Minimum flash time to be within beam window, by default -0.4 (us) + flash_tmax : float, optional + Maximum flash time to be within beam window, by default 1.5 (us) + flash_tolerance : float, optional + Tolerance for flash time, by default 0.5 (us) + """ + # Initialize the parent class + super().__init__(**kwargs) + + # Initialize the CSV writer(s) you want + self.log_name = log_name + self.initialize_writer(f'{log_name}_fmatch_performance') + + # Add additional required data products + self.update_keys({'interaction_matches_t2r': True}) + self.update_keys({'interaction_matches_t2r_overlap': True}) + + # Initialize the flash time window + self.flash_tmin = flash_tmin + self.flash_tmax = flash_tmax + self.flash_tolerance = flash_tolerance + + def process(self, data): + """Pass data products corresponding to one entry through the analysis. + + Parameters + ---------- + data : dict + Dictionary of data products + """ + # Fetch the keys you want + interaction_matches_t2r = data['interaction_matches_t2r'] + + # Loop over matched interactions (t2r) + for idx, (true_inter, reco_inter) in enumerate(interaction_matches_t2r): + + if true_inter == None: continue + + # Storage + row_dict = {} + + + # Basic info - reco + if reco_inter == None: + row_dict['reco_interaction_id'] = -1 + row_dict['reco_is_contained'] = False + row_dict['reco_is_fiducial'] = False + row_dict['reco_vtx_x'] = -9999 + row_dict['reco_vtx_y'] = -9999 + row_dict['reco_vtx_z'] = -9999 + row_dict['reco_particle_count0'] = -1 + row_dict['reco_particle_count1'] = -1 + row_dict['reco_particle_count2'] = -1 + row_dict['reco_particle_count3'] = -1 + row_dict['reco_particle_count4'] = -1 + row_dict['reco_flash_volume_id0'] = -1 + row_dict['reco_flash_time0'] = -9999 + row_dict['reco_flash_id0'] = -1 + row_dict['reco_flash_score0'] = -9999 + row_dict['reco_flash_volume_id1'] = -1 + row_dict['reco_flash_time1'] = -9999 + row_dict['reco_flash_id1'] = -1 + row_dict['reco_flash_score1'] = -9999 + row_dict['reco_flash_total_pe'] = -1 + row_dict['reco_flash_hypo_pe'] = -1 + row_dict['reco_reduced_flash_score'] = -9999 + row_dict['reco_calo_energy'] = -1 + row_dict['reco_in_bnb'] = False + else: + row_dict['reco_interaction_id'] = reco_inter.id + row_dict['reco_is_contained'] = reco_inter.is_contained + row_dict['reco_is_fiducial'] = reco_inter.is_fiducial + row_dict['reco_vtx_x'] = reco_inter.vertex[0] + row_dict['reco_vtx_y'] = reco_inter.vertex[1] + row_dict['reco_vtx_z'] = reco_inter.vertex[2] + row_dict['reco_particle_count0'] = reco_inter.particle_counts[0] + row_dict['reco_particle_count1'] = reco_inter.particle_counts[1] + row_dict['reco_particle_count2'] = reco_inter.particle_counts[2] + row_dict['reco_particle_count3'] = reco_inter.particle_counts[3] + row_dict['reco_particle_count4'] = reco_inter.particle_counts[4] + + #Initialize flash info + for i in range(2): #2 volumes + #Reco + row_dict[f'reco_flash_volume_id{i}'] = -1 + row_dict[f'reco_flash_time{i}'] = -9999 + row_dict[f'reco_flash_id{i}'] = -1 + row_dict[f'reco_flash_score{i}'] = -9999 + + # Reco flash info + for i, fid in enumerate(reco_inter.flash_volume_ids): + row_dict[f'reco_flash_volume_id{fid}'] = fid + row_dict[f'reco_flash_time{fid}'] = reco_inter.flash_times[i] + row_dict[f'reco_flash_id{fid}'] = reco_inter.flash_ids[i] + row_dict[f'reco_flash_score{fid}'] = reco_inter.flash_scores[i] + + row_dict[f'reco_flash_total_pe'] = reco_inter.flash_total_pe + row_dict[f'reco_flash_hypo_pe'] = reco_inter.flash_hypo_pe + row_dict[f'reco_reduced_flash_score'] = (reco_inter.flash_total_pe - reco_inter.flash_hypo_pe) / reco_inter.flash_total_pe + + # Energy info - get KE of all reco particles + row_dict['reco_calo_energy'] = 0 + for i,p in enumerate(reco_inter.particles): + row_dict[f'reco_calo_energy'] += p.calo_ke + + if (row_dict['reco_flash_time0'] < (self.flash_tmax + self.flash_tolerance) and row_dict['reco_flash_time0'] > (self.flash_tmin - self.flash_tolerance))\ + or (row_dict['reco_flash_time1'] < (self.flash_tmax + self.flash_tolerance) and row_dict['reco_flash_time1'] > (self.flash_tmin - self.flash_tolerance)): + row_dict['reco_in_bnb'] = True + else: + row_dict['reco_in_bnb'] = False + + + + # Basic info - truth + row_dict['truth_interaction_id'] = true_inter.id + row_dict['truth_is_contained'] = true_inter.is_contained + row_dict['truth_is_fiducial'] = true_inter.is_fiducial + row_dict['truth_vtx_x'] = true_inter.vertex[0] + row_dict['truth_vtx_y'] = true_inter.vertex[1] + row_dict['truth_vtx_z'] = true_inter.vertex[2] + row_dict['truth_particle_count0'] = true_inter.particle_counts[0] + row_dict['truth_particle_count1'] = true_inter.particle_counts[1] + row_dict['truth_particle_count2'] = true_inter.particle_counts[2] + row_dict['truth_particle_count3'] = true_inter.particle_counts[3] + row_dict['truth_particle_count4'] = true_inter.particle_counts[4] + row_dict['truth_iscc'] = true_inter.current_type == 0 + row_dict['truth_nu_id'] = true_inter.nu_id + + # Flash info + for i in range(2): #2 volumes + #Truth + row_dict[f'truth_flash_volume_id{i}'] = -1 + row_dict[f'truth_flash_time{i}'] = -9999 + row_dict[f'truth_flash_id{i}'] = -1 + row_dict[f'truth_flash_score{i}'] = -9999 + + # Truth flash info + for i, fid in enumerate(true_inter.flash_volume_ids): + row_dict[f'truth_flash_volume_id{fid}'] = fid + row_dict[f'truth_flash_time{fid}'] = true_inter.flash_times[i] + row_dict[f'truth_flash_id{fid}'] = true_inter.flash_ids[i] + #Check if the index for this score exists + if i < len(true_inter.flash_scores): + row_dict[f'true_flash_score{fid}'] = true_inter.flash_scores[i] + row_dict[f'truth_flash_score{fid}'] = true_inter.flash_scores[i] + + row_dict[f'truth_flash_total_pe'] = true_inter.flash_total_pe + row_dict[f'truth_flash_hypo_pe'] = true_inter.flash_hypo_pe + row_dict[f'truth_reduced_flash_score'] = (true_inter.flash_total_pe - true_inter.flash_hypo_pe) / true_inter.flash_total_pe + + # Energy info - get KE of all true particles + row_dict['truth_calo_energy'] = 0 + # Also get average time of particles + row_dict['truth_avg_time'] = 0 + + nprim = 0 + for i,p in enumerate(true_inter.particles): + row_dict[f'truth_calo_energy'] += p.calo_ke + if p.is_primary: + row_dict[f'truth_avg_time'] += p.t*1e-3 #us + nprim += 1 + if nprim > 0: + row_dict['truth_avg_time'] /= nprim + + #Efficiency - is the flash time within the BNB window? + # Truth values get some slack because of the time resolution +- 0.2 us + if row_dict['truth_avg_time'] < (self.flash_tmax + self.flash_tolerance) and row_dict['truth_avg_time'] > (self.flash_tmin - self.flash_tolerance): + row_dict['truth_in_bnb'] = True + else: + row_dict['truth_in_bnb'] = False + + # Overlap + overlap = data['interaction_matches_t2r_overlap'][idx] + row_dict.update({'match_overlap': overlap}) + self.append(f'{self.log_name}_fmatch_performance', **row_dict) \ No newline at end of file diff --git a/spine/ana/script/save.py b/spine/ana/script/save.py index 33e5fb0d2..b4f4ce7c8 100644 --- a/spine/ana/script/save.py +++ b/spine/ana/script/save.py @@ -139,7 +139,9 @@ def process(self, data): prefix, obj_type = key.split('_') other = other_prefix[prefix] attrs = self.attrs[key] + attrs_other = self.attrs[f'{other}_{obj_type}'] lengths = self.lengths + lengths_other = self.lengths if (self.match_mode is None or self.match_mode == f'{other}_to_{prefix}'): # If there is no matches, save objects by themselves @@ -151,8 +153,6 @@ def process(self, data): # match on a single row match_suffix = f'{prefix[0]}2{other[0]}' match_key = f'{obj_type[:-1]}_matches_{match_suffix}' - attrs_other = self.attrs[f'{other}_{obj_type}'] - lengths_other = self.lengths # TODO for idx, (obj_i, obj_j) in enumerate(data[match_key]): src_dict = obj_i.scalar_dict(attrs, lengths) if obj_j is not None: From e36aa626058fcad522e21e235cf9a64e6af3c8af Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:13:35 -0800 Subject: [PATCH 2/7] Add flash match performance script --- spine/ana/base.py | 79 +--------------- spine/ana/diag/__init__.py | 2 - spine/ana/diag/track.py | 189 ------------------------------------- spine/ana/factories.py | 4 +- 4 files changed, 4 insertions(+), 270 deletions(-) delete mode 100644 spine/ana/diag/track.py diff --git a/spine/ana/base.py b/spine/ana/base.py index 0ad4a410c..db1d3a188 100644 --- a/spine/ana/base.py +++ b/spine/ana/base.py @@ -44,15 +44,8 @@ class AnaBase(ABC): # Valid run modes _run_modes = ('reco', 'truth', 'both', 'all') - # List of known point modes for true particles and their corresponding keys - _point_modes = ( - ('points', 'points_label'), - ('points_adapt', 'points'), - ('points_g4', 'points_g4') - ) - - def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, - append=False, overwrite=False, log_dir=None, prefix=None): + def __init__(self, obj_type=None, run_mode=None, append=False, + overwrite=False, log_dir=None, prefix=None): """Initialize default anlysis script object properties. Parameters @@ -63,10 +56,6 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, If specified, tells whether the analysis script must run on reconstructed ('reco'), true ('true') or both objects ('both' or 'all') - truth_point_mode : str, optional - If specified, tells which attribute of the :class:`TruthFragment`, - :class:`TruthParticle` or :class:`TruthInteraction` object to use - to fetch its point coordinates append : bool, default False If True, appends existing CSV files instead of creating new ones overwrite : bool, default False @@ -125,14 +114,6 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, # Update underlying keys, if needed self.update_keys({k:True for k in self.obj_keys}) - # If a truth point mode is specified, store it - if truth_point_mode is not None: - assert truth_point_mode in self.point_modes, ( - "The `truth_point_mode` argument must be one of " - f"{self.point_modes.keys()}. Got `{truth_point_mode}` instead.") - self.truth_point_mode = truth_point_mode - self.truth_index_mode = truth_point_mode.replace('points', 'index') - # Store the append flag self.append_file = append self.overwrite_file = overwrite @@ -186,18 +167,6 @@ def keys(self, keys): """ self._keys = tuple(keys.items()) - @property - def point_modes(self): - """Dictionary which makes the correspondance between the name of a true - object point attribute with the underlying point tensor it points to. - - Returns - ------- - Dict[str, str] - Dictionary of (attribute, key) mapping for point coordinates - """ - return dict(self._point_modes) - def update_keys(self, update_dict): """Update the underlying set of keys and their necessity in place. @@ -280,50 +249,6 @@ def __call__(self, data, entry=None): # Run the analysis script return self.process(data_filter) - def get_index(self, obj): - """Get a certain pre-defined index attribute of an object. - - The :class:`TruthFragment`, :class:`TruthParticle` and - :class:`TruthInteraction` objects index are obtained using the - `truth_index_mode` attribute of the class. - - Parameters - ---------- - obj : Union[FragmentBase, ParticleBase, InteractionBase] - Fragment, Particle or Interaction object - - Results - ------- - np.ndarray - (N) Object index - """ - if not obj.is_truth: - return obj.index - else: - return getattr(obj, self.truth_index_mode) - - def get_points(self, obj): - """Get a certain pre-defined point attribute of an object. - - The :class:`TruthFragment`, :class:`TruthParticle` and - :class:`TruthInteraction` objects points are obtained using the - `truth_point_mode` attribute of the class. - - Parameters - ---------- - obj : Union[FragmentBase, ParticleBase, InteractionBase] - Fragment, Particle or Interaction object - - Results - ------- - np.ndarray - (N, 3) Point coordinates - """ - if not obj.is_truth: - return obj.points - else: - return getattr(obj, self.truth_point_mode) - @abstractmethod def process(self, data): """Place-holder method to be defined in each analysis script. diff --git a/spine/ana/diag/__init__.py b/spine/ana/diag/__init__.py index 63f7c32c5..257eaec49 100644 --- a/spine/ana/diag/__init__.py +++ b/spine/ana/diag/__init__.py @@ -3,10 +3,8 @@ This submodule is use to run basic diagnostics analyses such as: - Track dE/dx profile - Track energy reconstruction -- Track completeness - Shower start dE/dx - ... ''' from .shower import * -from .track import * diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py deleted file mode 100644 index 0bc721be0..000000000 --- a/spine/ana/diag/track.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Module to evaluate diagnostic metrics on tracks.""" - -import numpy as np -from scipy.spatial.distance import cdist - -from spine.ana.base import AnaBase - -from spine.utils.globals import TRACK_SHP -from spine.utils.numba_local import principal_components - - -__all__ = ['TrackCompletenessAna'] - - -class TrackCompletenessAna(AnaBase): - """This analysis script identifies gaps in tracks and measures the - cumulative length of these gaps relative to the track length. - - This is a useful diagnostic tool to evaluate the space-point efficiency - on tracks (good standard candal as track should have exactly no gap in - a perfectly efficient detector). - """ - - # Name of the analysis script (as specified in the configuration) - name = 'track_completeness' - - def __init__(self, time_window=None, run_mode='both', - truth_point_mode='points', **kwargs): - """Initialize the analysis script. - - Parameters - ---------- - time_window : List[float] - Time window within which to include particle (only works for `truth`) - **kwargs : dict, optional - Additional arguments to pass to :class:`AnaBase` - """ - # Initialize the parent class - super().__init__('particle', run_mode, truth_point_mode, **kwargs) - - # Store the time window - self.time_window = time_window - assert time_window is None or len(time_window) == 2, ( - "Time window must be specified as an array of two values.") - assert time_window is None or run_mode == 'truth', ( - "Time of reconstructed particle is unknown.") - - # Make sure the metadata is provided (rasterization needed) - self.update_keys({'meta': True}) - - # Initialize the CSV writer(s) you want - for prefix in self.prefixes: - self.initialize_writer(prefix) - - def process(self, data): - """Evaluate track completeness for tracks in one entry. - - Parameters - ---------- - data : dict - Dictionary of data products - """ - # Fetch the pixel size in this image (assume cubic cells) - pixel_size = data['meta'].size[0] - - # Loop over the types of particle data products - for key in self.obj_keys: - # Fetch the prefix ('reco' or 'truth') - prefix = key.split('_')[0] - - # Loop over particle objects - for part in data[key]: - # Check that the particle is a track - if part.shape != TRACK_SHP: - continue - - # If needed, check on the particle time - if self.time_window is not None: - if part.t < self.time_window[0] or part.t > self.time_window[1]: - continue - - # Initialize the particle dictionary - comp_dict = {'particle_id': part.id} - - # Fetch the particle point coordinates - points = self.get_points(part) - - # Find start/end points, collapse onto track cluster - start = points[np.argmin(cdist([part.start_point], points))] - end = points[np.argmin(cdist([part.end_point], points))] - - # Add the direction of the track - vec = end - start - length = np.linalg.norm(vec) - if length: - vec /= length - - comp_dict['size'] = len(points) - comp_dict['length'] = length - comp_dict.update( - {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]}) - - # Chunk out the track along gaps, estimate gap length - chunk_labels = self.cluster_track_chunks( - points, start, end, pixel_size) - gaps = self.sequential_cluster_distances( - points, chunk_labels, start) - - # Substract minimum gap distance due to rasterization - min_gap = pixel_size/np.max(np.abs(vec)) - gaps -= min_gap - - # Store gap information - comp_dict['num_gaps'] = len(gaps) - comp_dict['gap_length'] = np.sum(gaps) - comp_dict['gap_frac'] = np.sum(gaps)/length - - # Append the dictionary to the CSV log - self.append(prefix, **comp_dict) - - @staticmethod - def cluster_track_chunks(points, start_point, end_point, pixel_size): - """Find point where the track is broken, divide out the track - into self-contained chunks which are Linf connect (Moore neighbors). - - Parameters - ---------- - points : np.ndarray - (N, 3) List of track cluster point coordinates - start_point : np.ndarray - (3) Start point of the track cluster - end_point : np.ndarray - (3) End point of the track cluster - pixel_size : float - Dimension of one pixel, used to identify what is big enough to - constitute a break - - Returns - ------- - np.ndarray - (N) Track chunk labels - """ - # Project and cluster on the projected axis - direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) - projs = np.dot(points - start_point, direction) - perm = np.argsort(projs) - seps = projs[perm][1:] - projs[perm][:-1] - breaks = np.where(seps > pixel_size*1.1)[0] + 1 - cluster_labels = np.empty(len(projs), dtype=int) - for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): - cluster_labels[perm[index]] = i - - return cluster_labels - - @staticmethod - def sequential_cluster_distances(points, labels, start_point): - """Order clusters in order of distance from a starting point, compute - the distances between successive clusters. - - Parameters - ---------- - points : np.ndarray - (N, 3) List of track cluster point coordinates - labels : np.ndarray - (N) Track chunk labels - start_point : np.ndarray - (3) Start point of the track cluster - """ - # If there's only one cluster, nothing to do here - unique_labels = np.unique(labels) - if len(unique_labels) < 2: - return np.empty(0, dtype=float), np.empty(0, dtype=float) - - # Order clusters - start_dist = cdist([start_point], points).flatten() - start_clust_dist = np.empty(len(unique_labels)) - for i, c in enumerate(unique_labels): - start_clust_dist[i] = np.min(start_dist[labels == c]) - ordered_labels = unique_labels[np.argsort(start_clust_dist)] - - # Compute the intercluster distance and relative angle - n_gaps = len(ordered_labels) - 1 - dists = np.empty(n_gaps, dtype=float) - for i in range(n_gaps): - points_i = points[labels == ordered_labels[i]] - points_j = points[labels == ordered_labels[i + 1]] - dists[i] = np.min(cdist(points_i, points_j)) - - return dists diff --git a/spine/ana/factories.py b/spine/ana/factories.py index 1bc6f45c8..f00b4625c 100644 --- a/spine/ana/factories.py +++ b/spine/ana/factories.py @@ -2,11 +2,11 @@ from spine.utils.factory import module_dict, instantiate -from . import diag, metric, script +from . import metric, script # Build a dictionary of available calibration modules ANA_DICT = {} -for module in [diag, metric, script]: +for module in [metric, script]: ANA_DICT.update(**module_dict(module)) From 496d29f4b69c157642bd8b6a3d885a400b034dfd Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:19:20 -0800 Subject: [PATCH 3/7] Update spine/ana/base.py --- spine/ana/base.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/spine/ana/base.py b/spine/ana/base.py index db1d3a188..4387a08c5 100644 --- a/spine/ana/base.py +++ b/spine/ana/base.py @@ -44,8 +44,15 @@ class AnaBase(ABC): # Valid run modes _run_modes = ('reco', 'truth', 'both', 'all') - def __init__(self, obj_type=None, run_mode=None, append=False, - overwrite=False, log_dir=None, prefix=None): + # List of known point modes for true particles and their corresponding keys + _point_modes = ( + ('points', 'points_label'), + ('points_adapt', 'points'), + ('points_g4', 'points_g4') + ) + + def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, + append=False, overwrite=False, log_dir=None, prefix=None): """Initialize default anlysis script object properties. Parameters From 75e58daeb6a2a24de6cf3914870f40ef68a0a0d8 Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:20:36 -0800 Subject: [PATCH 4/7] Update spine/ana/base.py --- spine/ana/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spine/ana/base.py b/spine/ana/base.py index 4387a08c5..663a596df 100644 --- a/spine/ana/base.py +++ b/spine/ana/base.py @@ -62,7 +62,10 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, run_mode : str, optional If specified, tells whether the analysis script must run on reconstructed ('reco'), true ('true') or both objects - ('both' or 'all') + truth_point_mode : str, optional + If specified, tells which attribute of the :class:`TruthFragment`, + :class:`TruthParticle` or :class:`TruthInteraction` object to use + to fetch its point coordinates append : bool, default False If True, appends existing CSV files instead of creating new ones overwrite : bool, default False From eac30dbcf8977e82fcd97a79267899d524acf82a Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:20:53 -0800 Subject: [PATCH 5/7] Update spine/ana/base.py --- spine/ana/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spine/ana/base.py b/spine/ana/base.py index 663a596df..71238a40e 100644 --- a/spine/ana/base.py +++ b/spine/ana/base.py @@ -62,6 +62,7 @@ def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, run_mode : str, optional If specified, tells whether the analysis script must run on reconstructed ('reco'), true ('true') or both objects + ('both' or 'all') truth_point_mode : str, optional If specified, tells which attribute of the :class:`TruthFragment`, :class:`TruthParticle` or :class:`TruthInteraction` object to use From 438a4ea1e385bf2bc7491fe042269e7dcf8832bd Mon Sep 17 00:00:00 2001 From: Bear Carlson Date: Fri, 31 Jan 2025 10:24:54 -0800 Subject: [PATCH 6/7] Keep changes from develop --- spine/ana/base.py | 81 +++++++++++++++- spine/ana/diag/__init__.py | 2 + spine/ana/diag/track.py | 189 +++++++++++++++++++++++++++++++++++++ spine/ana/factories.py | 6 +- spine/ana/script/save.py | 6 +- 5 files changed, 275 insertions(+), 9 deletions(-) create mode 100644 spine/ana/diag/track.py diff --git a/spine/ana/base.py b/spine/ana/base.py index db1d3a188..c36c56b2a 100644 --- a/spine/ana/base.py +++ b/spine/ana/base.py @@ -44,8 +44,15 @@ class AnaBase(ABC): # Valid run modes _run_modes = ('reco', 'truth', 'both', 'all') - def __init__(self, obj_type=None, run_mode=None, append=False, - overwrite=False, log_dir=None, prefix=None): + # List of known point modes for true particles and their corresponding keys + _point_modes = ( + ('points', 'points_label'), + ('points_adapt', 'points'), + ('points_g4', 'points_g4') + ) + + def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None, + append=False, overwrite=False, log_dir=None, prefix=None): """Initialize default anlysis script object properties. Parameters @@ -56,6 +63,10 @@ def __init__(self, obj_type=None, run_mode=None, append=False, If specified, tells whether the analysis script must run on reconstructed ('reco'), true ('true') or both objects ('both' or 'all') + truth_point_mode : str, optional + If specified, tells which attribute of the :class:`TruthFragment`, + :class:`TruthParticle` or :class:`TruthInteraction` object to use + to fetch its point coordinates append : bool, default False If True, appends existing CSV files instead of creating new ones overwrite : bool, default False @@ -114,6 +125,14 @@ def __init__(self, obj_type=None, run_mode=None, append=False, # Update underlying keys, if needed self.update_keys({k:True for k in self.obj_keys}) + # If a truth point mode is specified, store it + if truth_point_mode is not None: + assert truth_point_mode in self.point_modes, ( + "The `truth_point_mode` argument must be one of " + f"{self.point_modes.keys()}. Got `{truth_point_mode}` instead.") + self.truth_point_mode = truth_point_mode + self.truth_index_mode = truth_point_mode.replace('points', 'index') + # Store the append flag self.append_file = append self.overwrite_file = overwrite @@ -167,6 +186,18 @@ def keys(self, keys): """ self._keys = tuple(keys.items()) + @property + def point_modes(self): + """Dictionary which makes the correspondance between the name of a true + object point attribute with the underlying point tensor it points to. + + Returns + ------- + Dict[str, str] + Dictionary of (attribute, key) mapping for point coordinates + """ + return dict(self._point_modes) + def update_keys(self, update_dict): """Update the underlying set of keys and their necessity in place. @@ -249,6 +280,50 @@ def __call__(self, data, entry=None): # Run the analysis script return self.process(data_filter) + def get_index(self, obj): + """Get a certain pre-defined index attribute of an object. + + The :class:`TruthFragment`, :class:`TruthParticle` and + :class:`TruthInteraction` objects index are obtained using the + `truth_index_mode` attribute of the class. + + Parameters + ---------- + obj : Union[FragmentBase, ParticleBase, InteractionBase] + Fragment, Particle or Interaction object + + Results + ------- + np.ndarray + (N) Object index + """ + if not obj.is_truth: + return obj.index + else: + return getattr(obj, self.truth_index_mode) + + def get_points(self, obj): + """Get a certain pre-defined point attribute of an object. + + The :class:`TruthFragment`, :class:`TruthParticle` and + :class:`TruthInteraction` objects points are obtained using the + `truth_point_mode` attribute of the class. + + Parameters + ---------- + obj : Union[FragmentBase, ParticleBase, InteractionBase] + Fragment, Particle or Interaction object + + Results + ------- + np.ndarray + (N, 3) Point coordinates + """ + if not obj.is_truth: + return obj.points + else: + return getattr(obj, self.truth_point_mode) + @abstractmethod def process(self, data): """Place-holder method to be defined in each analysis script. @@ -258,4 +333,4 @@ def process(self, data): data : dict Filtered data dictionary for one entry """ - raise NotImplementedError('Must define the `process` function') + raise NotImplementedError('Must define the `process` function') \ No newline at end of file diff --git a/spine/ana/diag/__init__.py b/spine/ana/diag/__init__.py index 257eaec49..2f80173ae 100644 --- a/spine/ana/diag/__init__.py +++ b/spine/ana/diag/__init__.py @@ -3,8 +3,10 @@ This submodule is use to run basic diagnostics analyses such as: - Track dE/dx profile - Track energy reconstruction +- Track Completeness - Shower start dE/dx - ... ''' from .shower import * +from .track import * diff --git a/spine/ana/diag/track.py b/spine/ana/diag/track.py new file mode 100644 index 000000000..702489f58 --- /dev/null +++ b/spine/ana/diag/track.py @@ -0,0 +1,189 @@ +"""Module to evaluate diagnostic metrics on tracks.""" + +import numpy as np +from scipy.spatial.distance import cdist + +from spine.ana.base import AnaBase + +from spine.utils.globals import TRACK_SHP +from spine.utils.numba_local import principal_components + + +__all__ = ['TrackCompletenessAna'] + + +class TrackCompletenessAna(AnaBase): + """This analysis script identifies gaps in tracks and measures the + cumulative length of these gaps relative to the track length. + + This is a useful diagnostic tool to evaluate the space-point efficiency + on tracks (good standard candal as track should have exactly no gap in + a perfectly efficient detector). + """ + + # Name of the analysis script (as specified in the configuration) + name = 'track_completeness' + + def __init__(self, time_window=None, run_mode='both', + truth_point_mode='points', **kwargs): + """Initialize the analysis script. + + Parameters + ---------- + time_window : List[float] + Time window within which to include particle (only works for `truth`) + **kwargs : dict, optional + Additional arguments to pass to :class:`AnaBase` + """ + # Initialize the parent class + super().__init__('particle', run_mode, truth_point_mode, **kwargs) + + # Store the time window + self.time_window = time_window + assert time_window is None or len(time_window) == 2, ( + "Time window must be specified as an array of two values.") + assert time_window is None or run_mode == 'truth', ( + "Time of reconstructed particle is unknown.") + + # Make sure the metadata is provided (rasterization needed) + self.update_keys({'meta': True}) + + # Initialize the CSV writer(s) you want + for prefix in self.prefixes: + self.initialize_writer(prefix) + + def process(self, data): + """Evaluate track completeness for tracks in one entry. + + Parameters + ---------- + data : dict + Dictionary of data products + """ + # Fetch the pixel size in this image (assume cubic cells) + pixel_size = data['meta'].size[0] + + # Loop over the types of particle data products + for key in self.obj_keys: + # Fetch the prefix ('reco' or 'truth') + prefix = key.split('_')[0] + + # Loop over particle objects + for part in data[key]: + # Check that the particle is a track + if part.shape != TRACK_SHP: + continue + + # If needed, check on the particle time + if self.time_window is not None: + if part.t < self.time_window[0] or part.t > self.time_window[1]: + continue + + # Initialize the particle dictionary + comp_dict = {'particle_id': part.id} + + # Fetch the particle point coordinates + points = self.get_points(part) + + # Find start/end points, collapse onto track cluster + start = points[np.argmin(cdist([part.start_point], points))] + end = points[np.argmin(cdist([part.end_point], points))] + + # Add the direction of the track + vec = end - start + length = np.linalg.norm(vec) + if length: + vec /= length + + comp_dict['size'] = len(points) + comp_dict['length'] = length + comp_dict.update( + {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]}) + + # Chunk out the track along gaps, estimate gap length + chunk_labels = self.cluster_track_chunks( + points, start, end, pixel_size) + gaps = self.sequential_cluster_distances( + points, chunk_labels, start) + + # Substract minimum gap distance due to rasterization + min_gap = pixel_size/np.max(np.abs(vec)) + gaps -= min_gap + + # Store gap information + comp_dict['num_gaps'] = len(gaps) + comp_dict['gap_length'] = np.sum(gaps) + comp_dict['gap_frac'] = np.sum(gaps)/length + + # Append the dictionary to the CSV log + self.append(prefix, **comp_dict) + + @staticmethod + def cluster_track_chunks(points, start_point, end_point, pixel_size): + """Find point where the track is broken, divide out the track + into self-contained chunks which are Linf connect (Moore neighbors). + + Parameters + ---------- + points : np.ndarray + (N, 3) List of track cluster point coordinates + start_point : np.ndarray + (3) Start point of the track cluster + end_point : np.ndarray + (3) End point of the track cluster + pixel_size : float + Dimension of one pixel, used to identify what is big enough to + constitute a break + + Returns + ------- + np.ndarray + (N) Track chunk labels + """ + # Project and cluster on the projected axis + direction = (end_point-start_point)/np.linalg.norm(end_point-start_point) + projs = np.dot(points - start_point, direction) + perm = np.argsort(projs) + seps = projs[perm][1:] - projs[perm][:-1] + breaks = np.where(seps > pixel_size*1.1)[0] + 1 + cluster_labels = np.empty(len(projs), dtype=int) + for i, index in enumerate(np.split(np.arange(len(projs)), breaks)): + cluster_labels[perm[index]] = i + + return cluster_labels + + @staticmethod + def sequential_cluster_distances(points, labels, start_point): + """Order clusters in order of distance from a starting point, compute + the distances between successive clusters. + + Parameters + ---------- + points : np.ndarray + (N, 3) List of track cluster point coordinates + labels : np.ndarray + (N) Track chunk labels + start_point : np.ndarray + (3) Start point of the track cluster + """ + # If there's only one cluster, nothing to do here + unique_labels = np.unique(labels) + if len(unique_labels) < 2: + return np.empty(0, dtype=float), np.empty(0, dtype=float) + + # Order clusters + start_dist = cdist([start_point], points).flatten() + start_clust_dist = np.empty(len(unique_labels)) + for i, c in enumerate(unique_labels): + start_clust_dist[i] = np.min(start_dist[labels == c]) + ordered_labels = unique_labels[np.argsort(start_clust_dist)] + + # Compute the intercluster distance and relative angle + n_gaps = len(ordered_labels) - 1 + dists = np.empty(n_gaps, dtype=float) + for i in range(n_gaps): + points_i = points[labels == ordered_labels[i]] + points_j = points[labels == ordered_labels[i + 1]] + dists[i] = np.min(cdist(points_i, points_j)) + + return dists \ No newline at end of file diff --git a/spine/ana/factories.py b/spine/ana/factories.py index f00b4625c..8ae50a4b4 100644 --- a/spine/ana/factories.py +++ b/spine/ana/factories.py @@ -2,11 +2,11 @@ from spine.utils.factory import module_dict, instantiate -from . import metric, script +from . import diag, metric, script # Build a dictionary of available calibration modules ANA_DICT = {} -for module in [metric, script]: +for module in [diag, metric, script]: ANA_DICT.update(**module_dict(module)) @@ -44,4 +44,4 @@ def ana_script_factory(name, cfg, overwrite=None, log_dir=None, prefix=None): ANA_DICT, cfg, overwrite=overwrite, log_dir=log_dir, prefix=prefix) else: return instantiate( - ANA_DICT, cfg, log_dir=log_dir, prefix=prefix) + ANA_DICT, cfg, log_dir=log_dir, prefix=prefix) \ No newline at end of file diff --git a/spine/ana/script/save.py b/spine/ana/script/save.py index b4f4ce7c8..bbfdcc5e0 100644 --- a/spine/ana/script/save.py +++ b/spine/ana/script/save.py @@ -139,9 +139,7 @@ def process(self, data): prefix, obj_type = key.split('_') other = other_prefix[prefix] attrs = self.attrs[key] - attrs_other = self.attrs[f'{other}_{obj_type}'] lengths = self.lengths - lengths_other = self.lengths if (self.match_mode is None or self.match_mode == f'{other}_to_{prefix}'): # If there is no matches, save objects by themselves @@ -153,6 +151,8 @@ def process(self, data): # match on a single row match_suffix = f'{prefix[0]}2{other[0]}' match_key = f'{obj_type[:-1]}_matches_{match_suffix}' + attrs_other = self.attrs[f'{other}_{obj_type}'] + lengths_other = self.lengths # TODO for idx, (obj_i, obj_j) in enumerate(data[match_key]): src_dict = obj_i.scalar_dict(attrs, lengths) if obj_j is not None: @@ -167,4 +167,4 @@ def process(self, data): row_dict = {**src_dict, **tgt_dict} row_dict.update({'match_overlap': overlap}) - self.append(key, **row_dict) + self.append(key, **row_dict) \ No newline at end of file From e7ef639c91c6207ce50dc25f1cddb0ddd0a041aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Drielsma?= Date: Mon, 3 Mar 2025 15:53:42 -0800 Subject: [PATCH 7/7] Changes for flash matching --- spine/ana/script/fmatch_performance.py | 19 +++++++++++++++---- spine/version.py | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/spine/ana/script/fmatch_performance.py b/spine/ana/script/fmatch_performance.py index aa8520c0a..20a7a84c1 100644 --- a/spine/ana/script/fmatch_performance.py +++ b/spine/ana/script/fmatch_performance.py @@ -47,6 +47,9 @@ def __init__(self, log_name, flash_tmin=-0.4, flash_tmax=1.5, flash_tolerance=0. # Add additional required data products self.update_keys({'interaction_matches_t2r': True}) self.update_keys({'interaction_matches_t2r_overlap': True}) + + self.update_keys({'interaction_matches_r2t': True}) + self.update_keys({'interaction_matches_r2t_overlap': True}) # Initialize the flash time window self.flash_tmin = flash_tmin @@ -62,12 +65,14 @@ def process(self, data): Dictionary of data products """ # Fetch the keys you want + interaction_matches_r2t = data['interaction_matches_r2t'] interaction_matches_t2r = data['interaction_matches_t2r'] # Loop over matched interactions (t2r) for idx, (true_inter, reco_inter) in enumerate(interaction_matches_t2r): - - if true_inter == None: continue + #for idx, (reco_inter, true_inter) in enumerate(interaction_matches_r2t): + #if true_inter == None: continue + #if reco_inter == None: continue # Storage row_dict = {} @@ -181,6 +186,8 @@ def process(self, data): row_dict[f'truth_flash_hypo_pe'] = true_inter.flash_hypo_pe row_dict[f'truth_reduced_flash_score'] = (true_inter.flash_total_pe - true_inter.flash_hypo_pe) / true_inter.flash_total_pe + #Energy info - get E of all true particles + row_dict['truth_energy_init'] = 0 # Energy info - get KE of all true particles row_dict['truth_calo_energy'] = 0 # Also get average time of particles @@ -189,11 +196,14 @@ def process(self, data): nprim = 0 for i,p in enumerate(true_inter.particles): row_dict[f'truth_calo_energy'] += p.calo_ke + row_dict['truth_energy_init'] += p.energy_init if p.is_primary: row_dict[f'truth_avg_time'] += p.t*1e-3 #us nprim += 1 if nprim > 0: row_dict['truth_avg_time'] /= nprim + else: + row_dict['truth_avg_time'] = -9999 #Efficiency - is the flash time within the BNB window? # Truth values get some slack because of the time resolution +- 0.2 us @@ -203,6 +213,7 @@ def process(self, data): row_dict['truth_in_bnb'] = False # Overlap - overlap = data['interaction_matches_t2r_overlap'][idx] - row_dict.update({'match_overlap': overlap}) + #overlap = data['interaction_matches_t2r_overlap'][idx] + #overlap = data['interaction_matches_r2t_overlap'][idx] + #row_dict.update({'match_overlap': overlap}) self.append(f'{self.log_name}_fmatch_performance', **row_dict) \ No newline at end of file diff --git a/spine/version.py b/spine/version.py index 4c0a99926..7f684ea71 100644 --- a/spine/version.py +++ b/spine/version.py @@ -1,3 +1,3 @@ """Module which stores the current software version.""" -__version__ = '0.2.1' +__version__ = '0.3.0'