emdgroup · maxscheurer · Jun 12, 2026 · Jun 18, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ on:
   pull_request:
     branches:
       - main
+      - develop
   workflow_dispatch:
 
 jobs:

diff --git a/config_templates/config.ini b/config_templates/config.ini
@@ -40,3 +40,10 @@ ANALYSIS_NAME = analysis
 RUN_DB_PATH = /Group Functions/mdfactory/runs
 ANALYSIS_DB_PATH = /Group Functions/mdfactory/analysis
 ARTIFACT_DB_PATH = /Group Functions/mdfactory/artifacts
+
+[slurm]
+; Optional manual overrides. When empty, values are autodiscovered via sinfo/sacctmgr.
+ACCOUNT =
+PARTITION_CPU =
+PARTITION_GPU =
+DEFAULT_QOS =
diff --git a/mdfactory/analysis/store.py b/mdfactory/analysis/store.py
@@ -40,6 +40,7 @@ def __init__(
         roots: list[Path | str] | Path | str,
         trajectory_file: str = "prod.xtc",
         structure_file: str = "system.pdb",
+        min_status: str = "production",
     ):
         """Initialize store with one or more root paths.
 
@@ -51,6 +52,9 @@ def __init__(
             Trajectory filename to discover
         structure_file : str
             Structure filename to discover
+        min_status : str
+            Minimum simulation status to include in discovery. One of:
+            "build", "equilibrated", "production", "completed".
 
         """
         # Normalize roots to list of Paths
@@ -61,6 +65,7 @@ def __init__(
 
         self.trajectory_file = trajectory_file
         self.structure_file = structure_file
+        self.min_status = min_status
 
         self._simulations: dict[str, Simulation] = {}  # hash -> Simulation
         self._discovery_df: pd.DataFrame | None = None
@@ -105,6 +110,7 @@ def discover(self, refresh: bool = False) -> pd.DataFrame:
                 root,
                 trajectory_file=self.trajectory_file,
                 structure_file=self.structure_file,
+                min_status=self.min_status,
             )
             dfs.append(df)
 
@@ -240,13 +246,137 @@ def build_metadata_table(
                 "path": path,
                 **flattened,
             }
+
+            # Merge tags into metadata row
+            if build_input.tags:
+                for tag_key, tag_val in build_input.tags.items():
+                    metadata_row[f"tag_{tag_key}"] = tag_val
+
             metadata_rows.append(metadata_row)
 
         metadata_df = pd.DataFrame(metadata_rows)
         logger.info(f"Built metadata table with {len(metadata_df)} rows")
 
         return metadata_df
 
+    def search(
+        self,
+        *,
+        simulation_type: str | None = None,
+        status: str | None = None,
+        hash_prefix: str | None = None,
+        tags: dict[str, str] | None = None,
+        smiles: str | None = None,
+    ) -> pd.DataFrame:
+        """Search and filter discovered simulations.
+
+        Applies all provided filters conjunctively (AND logic).
+
+        Parameters
+        ----------
+        simulation_type : str | None
+            Filter by simulation type (exact match).
+        status : str | None
+            Filter by minimum status threshold.
+        hash_prefix : str | None
+            Filter by hash prefix (case-insensitive).
+        tags : dict[str, str] | None
+            Filter by tag key-value pairs (all must match).
+        smiles : str | None
+            Filter by SMILES substructure match against any species.
+
+        Returns
+        -------
+        pd.DataFrame
+            Filtered DataFrame with columns: hash, path, simulation_type,
+            status, tags (dict or None).
+
+        """
+        from loguru import logger
+
+        self._ensure_discovered()
+
+        if len(self._discovery_df) == 0:
+            logger.info("No simulations to search")
+            return pd.DataFrame(columns=["hash", "path", "simulation_type", "status", "tags"])
+
+        # Pre-validate and import dependencies before iterating
+        from .constants import STATUS_ORDER
+
+        if status is not None and status not in STATUS_ORDER:
+            raise ValueError(f"Invalid status '{status}'. Must be one of: {STATUS_ORDER}")
+
+        smiles_substructure_match = None
+        if smiles is not None:
+            try:
+                from mdfactory.utils.chemistry_utilities import (
+                    smiles_substructure_match,
+                )
+            except ImportError:
+                raise ImportError(
+                    "RDKit is required for SMILES search. "
+                    "Install it via conda: conda install -c conda-forge rdkit"
+                )
+
+        # Build result rows from discovery
+        rows = []
+        for _, row in self._discovery_df.iterrows():
+            sim = row["simulation"]
+            bi = sim.build_input
+            sim_hash = row["hash"]
+            sim_path = row["path"]
+            sim_status = sim.status
+
+            # Filter: simulation_type
+            if simulation_type is not None and bi.simulation_type != simulation_type:
+                continue
+
+            # Filter: status (minimum threshold)
+            if status is not None:
+                status_idx = STATUS_ORDER.index(sim_status)
+                min_idx = STATUS_ORDER.index(status)
+                if status_idx < min_idx:
+                    continue
+
+            # Filter: hash prefix
+            if hash_prefix is not None:
+                if not sim_hash.upper().startswith(hash_prefix.upper()):
+                    continue
+
+            # Filter: tags
+            if tags is not None:
+                if bi.tags is None:
+                    continue
+                if not all(bi.tags.get(k) == v for k, v in tags.items()):
+                    continue
+
+            # Filter: SMILES substructure
+            if smiles is not None:
+                match_found = False
+                for species in bi.system.species:
+                    species_smiles = getattr(species, "smiles", None)
+                    if species_smiles and smiles_substructure_match(smiles, species_smiles):
+                        match_found = True
+                        break
+                if not match_found:
+                    continue
+
+            rows.append(
+                {
+                    "hash": sim_hash,
+                    "path": sim_path,
+                    "simulation_type": bi.simulation_type,
+                    "status": sim_status,
+                    "tags": bi.tags,
+                }
+            )
+
+        logger.info(f"Search returned {len(rows)} results")
+        return pd.DataFrame(
+            rows,
+            columns=["hash", "path", "simulation_type", "status", "tags"],
+        )
+
     def load_analysis_with_metadata(
         self,
         analysis_name: str,

diff --git a/mdfactory/analysis/submit.py b/mdfactory/analysis/submit.py
@@ -6,7 +6,6 @@
 
 import os
 import shutil
-from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 from typing import Iterable
@@ -16,42 +15,15 @@
 from mdfactory.analysis.artifacts import ARTIFACT_REGISTRY
 from mdfactory.analysis.simulation import ANALYSIS_REGISTRY, Simulation
 
-
-@dataclass(frozen=True)
-class SlurmConfig:
-    """Configuration for submitit/SLURM execution."""
-
-    account: str
-    partition: str = "cpu"
-    time: str = "2h"
-    cpus_per_task: int = 4
-    mem_gb: int = 8
-    qos: str | None = None
-    constraint: str | None = None
-    job_name_prefix: str = "mdfactory-analysis"
-
-
-def normalize_slurm_time(value: str) -> str:
-    """Normalize SLURM time strings to accepted formats."""
-    raw = value.strip()
-    if ":" in raw:
-        return raw
-    lowered = raw.lower()
-    if lowered.endswith("d"):
-        days = int(lowered[:-1])
-        return f"{days}-00:00:00"
-    if lowered.endswith("h"):
-        hours = int(lowered[:-1])
-        return f"{hours:02d}:00:00"
-    if lowered.endswith("m"):
-        minutes = int(lowered[:-1])
-        hours, minutes = divmod(minutes, 60)
-        return f"{hours:02d}:{minutes:02d}:00"
-    if lowered.isdigit():
-        minutes = int(lowered)
-        hours, minutes = divmod(minutes, 60)
-        return f"{hours:02d}:{minutes:02d}:00"
-    return raw
+# SlurmConfig and normalize_slurm_time live in the performance package so that
+# every SLURM-facing backend (submitit, Parsl, Nextflow) can share them.
+# Re-exported here for backward compatibility:
+#   from mdfactory.analysis.submit import SlurmConfig        # still works
+#   from mdfactory.analysis.submit import normalize_slurm_time  # still works
+from mdfactory.performance.slurm_config import (  # noqa: F401  (re-export)
+    SlurmConfig,
+    normalize_slurm_time,
+)
 
 
 def resolve_simulation_paths(
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ on: @@
       pull_request:
         branches:
           - main
+          - develop
       workflow_dispatch:
     jobs:
@@ Expand Down @@