From 1381f6f5b69ab8336adab02fd190475074bc174a Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Sun, 12 Apr 2026 18:05:09 -0600
Subject: [PATCH 01/35] feat: Initializing transmission schema

---
 .../transmission/transmission_schema.json5    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 compass/extraction/transmission/transmission_schema.json5

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
new file mode 100644
index 000000000..30ff2a24c
--- /dev/null
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -0,0 +1,25 @@
+{
+  "title": "Transmission Ordinance Extraction Schema",
+  "description": "Single-shot structured extraction schema for energy transmission ordinances. This schema guides an LLM to extract all r    elevant features in one call and returns an outputs array where each object represents one row in the extracted long-form table.",
+  "version": "1.0.0",
+  "type": "object",
+  "required": ["outputs"],
+  "additionalProperties": false,
+  "properties": {
+    "outputs": {
+      "type": "array",
+      "description": "Sparse long-form extraction table. Include only features with an enacted, explicit requirement and emit **at most one row per feature**. Never infer, imply, or guess a requirement from related context.",
+      "items": {
+        "type": "object",
+        "required": [
+          "feature",
+          "value",
+          "units",
+          "section",
+          "summary"
+        ],
+        "additionalProperties": false,
+      }
+    }
+  }
+}

From f6adb03403cd859daa31cee978e2038c737b4632 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Sun, 12 Apr 2026 18:09:18 -0600
Subject: [PATCH 02/35] feat: Initializing some properties

---
 .../transmission/transmission_schema.json5           | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 30ff2a24c..9f1ce5371 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -19,6 +19,18 @@
           "summary"
         ],
         "additionalProperties": false,
+        "properties": {
+          "feature": {
+            "type": "string",
+            "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs.",
+            "enum": [
+              "noise",
+              "maximum height",
+              "color",
+              "lighting",
+            ]
+          }
+        }
       }
     }
   }

From a62b1d120aef5519e853159db132f1b5d6bc9d5f Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Sun, 12 Apr 2026 18:13:52 -0600
Subject: [PATCH 03/35] Defining `value`

---
 .../extraction/transmission/transmission_schema.json5 | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 9f1ce5371..889dbd948 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -29,7 +29,16 @@
               "color",
               "lighting",
             ]
-          }
+          },
+          "value": {
+            "description": "The extracted ordinance value.",
+            "anyOf": [
+              {"type": "number"},
+              {"type": "string"},
+              {"type": "array", "items": {"type": "string"}, "additionalProperties": false},
+              {"type": "null"}
+            ]
+          },
         }
       }
     }

From 295b963863dfe78ebc6faaead1c09e6785a725a1 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Sun, 12 Apr 2026 19:53:22 -0600
Subject: [PATCH 04/35] Adding some qualitative_features

---
 compass/extraction/transmission/transmission_schema.json5 | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 889dbd948..94b564cb5 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -43,4 +43,10 @@
       }
     }
   }
+  "$qualitative_features": [
+      "color",
+      "decommissioning",
+      "lighting",
+      "signage",
+  ]
 }

From bd72ca60b72544e2bd84c59dce683f670ad9ef88 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 19:43:54 -0600
Subject: [PATCH 05/35] typo:

---
 compass/extraction/transmission/transmission_schema.json5 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 94b564cb5..7894ae156 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -1,6 +1,6 @@
 {
   "title": "Transmission Ordinance Extraction Schema",
-  "description": "Single-shot structured extraction schema for energy transmission ordinances. This schema guides an LLM to extract all r    elevant features in one call and returns an outputs array where each object represents one row in the extracted long-form table.",
+  "description": "Single-shot structured extraction schema for energy transmission ordinances. This schema guides an LLM to extract all relevant features in one call and returns an outputs array where each object represents one row in the extracted long-form table.",
   "version": "1.0.0",
   "type": "object",
   "required": ["outputs"],

From 47221e99d4070498ca7dc9b978911d27d181b556 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 19:58:35 -0600
Subject: [PATCH 06/35] Complete and improved required fields

---
 .../transmission/transmission_schema.json5    | 35 ++++++++++++++-----
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 7894ae156..66f6028f9 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -16,29 +16,48 @@
           "value",
           "units",
           "section",
-          "summary"
+          "summary",
+          "explanation"
         ],
         "additionalProperties": false,
         "properties": {
           "feature": {
             "type": "string",
-            "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs.",
+            "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs. Do not invent aliases, prefixes, or synonym variants.",
             "enum": [
-              "noise",
-              "maximum height",
-              "color",
-              "lighting",
+              "noise"
             ]
           },
           "value": {
-            "description": "The extracted ordinance value.",
+            "description": "The extracted ordinance value. For numerical setbacks/limits, use a number. For categorical outcomes, use a string. For fields that list multiple named items, use an array of strings. Use null only for qualitative features, and only when an enacted, explicit, enforceable ordinance requirement for that feature is present. Null must never be used to indicate absence. If a feature has no enacted, explicit requirement in the ordinance text, omit that feature from outputs.",
             "anyOf": [
               {"type": "number"},
               {"type": "string"},
-              {"type": "array", "items": {"type": "string"}, "additionalProperties": false},
+              {
+                "type": "array",
+                "items": {"type": "string"},
+                "additionalProperties": false
+              },
               {"type": "null"}
             ]
           },
+          "units": {
+            "type": ["string", "null"],
+            "description": "Units for the extracted value. Preserve the ordinance unit wording exactly whenever possible. For setbacks/depth, use linear units such as 'feet' or 'meters' as stated in text. For noise, use 'dBA' only if the ordinance says 'dBA' or 'dB(A)'; if it says 'dB' without A-weighting, keep 'dB'. Use null for qualitative fields without measurable units."
+          },
+          "section": {
+            "type": ["string", "null"],
+            "description": "The section title or number from the ordinance where this requirement appears. Include numerical labels if provided. Null if no section identifier is available."
+          },
+          "summary": {
+            "type": "string",
+            "description": "A short summary of the relevant ordinance requirement using direct text excerpts and quotes as much as possible. If multiple options exist and a selection was made, list all other options and their conditions in the summary. For qualitative restrictions, this is the primary output field containing the full extracted text. Can be null if no requirement found."
+            //"description": "A short summary with direct ordinance excerpts/quotes whenever possible. For qualitative features (definitions, permitting, screening, inspection, decommissioning, prohibitions), this is the primary output field and should contain a direct ordinance excerpt. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders (for example, 'No explicit ... found'); omit the feature instead when no requirement is present."
+          },
+          "explanation": {
+            "type": "string",
+            "description": "Brief rationale explaining why this row matches the selected feature under this schema. Reference the specific evidence in summary and how it supports the extracted value and units or, for qualitative features, the inclusion criteria. Must be a non-null, non-empty string and must not use absence placeholders."
+          }
         }
       }
     }

From ca5a5f111eafcb18c722309f36c49f1626089463 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 20:13:17 -0600
Subject: [PATCH 07/35] Simplified summary request

There is some sensitivity on the summary requirements. Some of the
wording used in other technologies can make it miss completely the
field.
---
 compass/extraction/transmission/transmission_schema.json5 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 66f6028f9..4f0c642ff 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -51,7 +51,12 @@
           },
           "summary": {
             "type": "string",
-            "description": "A short summary of the relevant ordinance requirement using direct text excerpts and quotes as much as possible. If multiple options exist and a selection was made, list all other options and their conditions in the summary. For qualitative restrictions, this is the primary output field containing the full extracted text. Can be null if no requirement found."
+            "description": "A short summary of the relevant ordinance requiremets."
+            /*
+            Byron's
+            "description": "A short summary with direct ordinance excerpts or quotes whenever possible. For qualitative features such as permitting, fencing, lighting, seismic monitoring, decommissioning, and prohibitions, this is the primary output field and should contain direct ordinance language. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders such as 'No explicit requirement found'; omit the feature instead when no requirement is present."
+            */
+            // "description": "A short summary of the relevant ordinance requirement using direct text excerpts and quotes as much as possible. If multiple options exist and a selection was made, list all other options and their conditions in the summary. For qualitative restrictions, this is the primary output field containing the full extracted text. Can be null if no requirement found."
             //"description": "A short summary with direct ordinance excerpts/quotes whenever possible. For qualitative features (definitions, permitting, screening, inspection, decommissioning, prohibitions), this is the primary output field and should contain a direct ordinance excerpt. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders (for example, 'No explicit ... found'); omit the feature instead when no requirement is present."
           },
           "explanation": {

From a7df8991599224cacdc08799adcba3a0711fc9a7 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 22:03:34 -0600
Subject: [PATCH 08/35] Defining noise

---
 .../extraction/transmission/transmission_schema.json5  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 4f0c642ff..0c0d45a8d 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -66,6 +66,16 @@
         }
       }
     }
+  },
+  "$definitions": {
+    "numerical_features": {
+      "description": "Non-setback numerical restriction features. Only extract if numerical values are explicitly given in the text.",
+      "properties": {
+        "noise": {
+          "description": "Extract maximum allowable operational noise for electric transmission lines and related facilities only when an explicit numeric limit is stated. Normalize A-weighted units to 'dBA' in units and preserve verbatim wording in summary. If the ordinance only references compliance with external standards or provides no numeric noise limit, omit this feature entirely."
+        }
+      }
+    }
   }
   "$qualitative_features": [
       "color",

From 74793d61362b683aa416b96903c3e36fadf209d7 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 22:11:22 -0600
Subject: [PATCH 09/35] examples of noise

---
 .../transmission/transmission_schema.json5     | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 0c0d45a8d..f8c3f2f9e 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -76,11 +76,15 @@
         }
       }
     }
-  }
-  "$qualitative_features": [
-      "color",
-      "decommissioning",
-      "lighting",
-      "signage",
-  ]
+  },
+  "$examples": [
+    {
+      "feature": "noise",
+      "value": 50,
+      "units": "dBA",
+      "section": "SECTION 1308 – Performance / Construction Standards",
+      // Need some editing
+      "summary": "The transmission line and or facility shall not generate noise in excess of Fifty (50) decibel levels at ground level to the property lines or at the nearest residence. Such measurements shall be signed by a qualified engineer, stating that noise levels are being met, per ordinance."
+    }
+  ],
 }

From 9182cf36e875f61b5e6816d6fe031b83c34447f3 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 22:15:04 -0600
Subject: [PATCH 10/35] Adding instructions - general

---
 compass/extraction/transmission/transmission_schema.json5 | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index f8c3f2f9e..532da480d 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -87,4 +87,9 @@
       "summary": "The transmission line and or facility shall not generate noise in excess of Fifty (50) decibel levels at ground level to the property lines or at the nearest residence. Such measurements shall be signed by a qualified engineer, stating that noise levels are being met, per ordinance."
     }
   ],
+  "$instructions": {
+    "general": [
+      "Use direct text excerpts and quotes in summary whenever possible.",
+    ]
+  }
 }

From b37faee71f6feb230f2f9655d7d3d4680f15c782 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 13 Apr 2026 22:34:17 -0600
Subject: [PATCH 11/35] Adding decomissioning as an operational feature

---
 .../transmission/transmission_schema.json5     | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 532da480d..7ae89a961 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -25,7 +25,8 @@
             "type": "string",
             "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs. Do not invent aliases, prefixes, or synonym variants.",
             "enum": [
-              "noise"
+              "noise",
+              "decommissioning",
             ]
           },
           "value": {
@@ -75,7 +76,15 @@
           "description": "Extract maximum allowable operational noise for electric transmission lines and related facilities only when an explicit numeric limit is stated. Normalize A-weighted units to 'dBA' in units and preserve verbatim wording in summary. If the ordinance only references compliance with external standards or provides no numeric noise limit, omit this feature entirely."
         }
       }
-    }
+    },
+    "operational_features": {
+      "description": "Operational, licensing, permitting, inspection, and abandonment requirements. These features require text extraction only.",
+      "properties": {
+        "decommissioning": {
+          "description": "Extract requirements for abandonment, plugging, sealing, removal, and site restoration when systems are retired, fail, or wells are abandoned, including responsible party and timeline details when explicitly stated."
+        }
+      }
+    },
   },
   "$examples": [
     {
@@ -91,5 +100,8 @@
     "general": [
       "Use direct text excerpts and quotes in summary whenever possible.",
     ]
-  }
+  },
+  "$qualitative_features": [
+    "decommissioning",
+  ]
 }

From 1090cb5b36087f97bd7c367e8d9bee1f4e1e2bf3 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 14 Apr 2026 09:36:37 -0600
Subject: [PATCH 12/35] feat: Adding Maximum Height

Need some improvements to avoid confusion with facilities.
---
 compass/extraction/transmission/transmission_schema.json5 | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 7ae89a961..04683f781 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -25,6 +25,8 @@
             "type": "string",
             "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs. Do not invent aliases, prefixes, or synonym variants.",
             "enum": [
+              // Tower vs facility height. Do I want both?
+              "maximum height",
               "noise",
               "decommissioning",
             ]
@@ -74,6 +76,9 @@
       "properties": {
         "noise": {
           "description": "Extract maximum allowable operational noise for electric transmission lines and related facilities only when an explicit numeric limit is stated. Normalize A-weighted units to 'dBA' in units and preserve verbatim wording in summary. If the ordinance only references compliance with external standards or provides no numeric noise limit, omit this feature entirely."
+        },
+        "maximum-height": {
+          "description": "Extract maximum structure height allowed."
         }
       }
     },

From e9b4a9c867592dff4d2129cda4dfc8ccdc67944f Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 14 Apr 2026 18:15:21 -0600
Subject: [PATCH 13/35] feat: Property line setback

---
 .../transmission/transmission_schema.json5        | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 04683f781..96703da90 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -25,6 +25,7 @@
             "type": "string",
             "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs. Do not invent aliases, prefixes, or synonym variants.",
             "enum": [
+              "property lines",
               // Tower vs facility height. Do I want both?
               "maximum height",
               "noise",
@@ -71,6 +72,14 @@
     }
   },
   "$definitions": {
+    "setback_features": {
+      "description": "Setback features for electrical transmission lines and related infrastructure. Treat each setback feature independently and do not cross-apply a setback unless the ordinance text explicitly states that it applies to multiple target types. When a single clause explicitly lists multiple target types and one shared numeric setback, emit one row per explicitly listed feature using the same numeric value and units and cite the same clause in summary. Apply the shared numeric prioritization rules in $core_principles when multiple numeric values explicitly apply to the same feature.",
+      "properties": {
+        "property lines distance": {
+          "description": "Minimum required separation from explicit front yard, side yard, rear yard, or similar yard-based setback areas. Extract this feature only when the ordinance states the electric transmission line and related facilities must meet a yard setback or be located within/outside a named yard area. Do not remap yard setbacks to 'property lines' unless the ordinance explicitly measures the requirement from the property line itself rather than from a yard classification. IGNORE: Do not respond based on generic property-line, road, or right-of-way setbacks unless the ordinance explicitly frames the requirement as a yard setback."
+        },
+      }
+    },
     "numerical_features": {
       "description": "Non-setback numerical restriction features. Only extract if numerical values are explicitly given in the text.",
       "properties": {
@@ -104,6 +113,12 @@
   "$instructions": {
     "general": [
       "Use direct text excerpts and quotes in summary whenever possible.",
+    ],
+    "setbacks": [
+      "Setbacks should be extracted as minimum separation distances.",
+      "Prefer numeric values with units ('feet', 'meters').",
+      "Setback rows must contain numeric value and non-null units; never emit qualitative-only setback rows.",
+      "Treat property-line, lot-line, and parcel-boundary setbacks as 'property lines' when the ordinance measures the setback from that boundary.",
     ]
   },
   "$qualitative_features": [

From cabab9f1a97132d469edd3dfa9c5b8ca7510847a Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 15 Apr 2026 22:27:38 -0600
Subject: [PATCH 14/35] feat: core_principles

It's incomplete.
---
 .../transmission/transmission_schema.json5    | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 96703da90..25cd64e14 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -71,6 +71,26 @@
       }
     }
   },
+  "$core_principles": {
+    /*
+    "scope_context": {
+    },
+    */
+    "strict_evidence_gate": {
+      "description": "Extract a feature only when the ordinance text explicitly states a requirement, definition, or prohibition for that same feature. Never infer, assume, extrapolate, or guess from related context, implications, headings, or nearby provisions. If the ordinance points to an outside document or standard without restating the controlling requirement in the ordinance text itself, do not import missing values from that outside source."
+    },
+    "data_omission": {
+      "description": "Emit only positively matched features. If a feature is not explicitly present, omit it entirely rather than returning placeholder text. For qualitative features, use value=null and units=null only when an enacted, explicit requirement or definition for that same feature is present. For numeric features, extract only when an explicit numeric threshold is stated in the ordinance text; otherwise omit the feature instead of returning null, empty, or qualitative-only values. Never emit absence placeholders such as 'not found', 'no explicit requirement', 'none', or similar text in any field."
+    },
+    "numeric_prioritization": {
+      // Review numeric features if I specify all conditions here.
+      "description": "When multiple numeric values apply to the same feature, keep one row and select the controlling most restrictive value for that feature. Restrictiveness rules: setbacks -> choose the largest minimum separation distance; tower height -> choose the highest maximum height; noise -> choose the lowest allowed noise limit. Keep condition-specific alternatives in summary only when the ordinance text explicitly shows they all apply to the same feature for GHP systems."
+    },
+    /*
+    "definition_v_rule_test": {
+    }
+    */
+  },
   "$definitions": {
     "setback_features": {
       "description": "Setback features for electrical transmission lines and related infrastructure. Treat each setback feature independently and do not cross-apply a setback unless the ordinance text explicitly states that it applies to multiple target types. When a single clause explicitly lists multiple target types and one shared numeric setback, emit one row per explicitly listed feature using the same numeric value and units and cite the same clause in summary. Apply the shared numeric prioritization rules in $core_principles when multiple numeric values explicitly apply to the same feature.",

From 2cfd2da1d1ac33a48288b02a730dc3cfdab46aaa Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Thu, 16 Apr 2026 13:10:46 -0600
Subject: [PATCH 15/35] feat: Adding setbacks' properties

---
 .../transmission/transmission_schema.json5    | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 25cd64e14..236dc4c6d 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -95,9 +95,27 @@
     "setback_features": {
       "description": "Setback features for electrical transmission lines and related infrastructure. Treat each setback feature independently and do not cross-apply a setback unless the ordinance text explicitly states that it applies to multiple target types. When a single clause explicitly lists multiple target types and one shared numeric setback, emit one row per explicitly listed feature using the same numeric value and units and cite the same clause in summary. Apply the shared numeric prioritization rules in $core_principles when multiple numeric values explicitly apply to the same feature.",
       "properties": {
-        "property lines distance": {
-          "description": "Minimum required separation from explicit front yard, side yard, rear yard, or similar yard-based setback areas. Extract this feature only when the ordinance states the electric transmission line and related facilities must meet a yard setback or be located within/outside a named yard area. Do not remap yard setbacks to 'property lines' unless the ordinance explicitly measures the requirement from the property line itself rather than from a yard classification. IGNORE: Do not respond based on generic property-line, road, or right-of-way setbacks unless the ordinance explicitly frames the requirement as a yard setback."
+        "structures": {
+          "description": "Minimum required separation from structures and buildings that do not match the definition of 'residential buildings'."
         },
+        "residential buildings": {
+          "description": "Minimum required separation from residential buildings, occupied dwellings, occupied buildings, residences, homes, residential receptors, or residential uses."
+        },
+        "property lines": {
+          "description": "Minimum required separation from property lines, lot lines, parcel boundaries, or lease boundaries when the ordinance explicitly states the distance is measured from that boundary. Do not remap property-line distances to roads or residential zones unless the text explicitly makes them equivalent for that requirement. Distances to official plan lines or specific plan lines for public highways do not belong here unless the ordinance expressly defines those lines as property boundaries for the same requirement."
+        },
+        "roads": {
+          "description": "Minimum required separation from public road rights-of-way. Property-line setbacks do NOT count for this feature unless the ordinance text explicitly states that the property line is the road right-of-way or otherwise makes them the same boundary for that requirement. IGNORE: Do not respond based on setbacks from other kinds of right-of-way such as utility easements, etc."
+        },
+        "airport": {
+          "description":"Minimum required separation from airport runways and heliports."
+        },
+        "OHWM": {
+          "description":"Minimum required separation from Ordinary High Water Mark (OHWM)."
+        },
+        "railroads": {
+          "description": "Minimum required separation from railroads, railroad rights-of-way, rail corridors, or active rail lines. Extract only when rail infrastructure is explicitly named."
+        }
       }
     },
     "numerical_features": {

From 129cdef39e5b52a4feaa0a5040ca1c5654c0665a Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Thu, 16 Apr 2026 17:43:51 -0600
Subject: [PATCH 16/35] This is incomplete, so let's call it 0.0.1

---
 compass/extraction/transmission/transmission_schema.json5 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 236dc4c6d..97ab1672e 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -1,7 +1,7 @@
 {
   "title": "Transmission Ordinance Extraction Schema",
   "description": "Single-shot structured extraction schema for energy transmission ordinances. This schema guides an LLM to extract all relevant features in one call and returns an outputs array where each object represents one row in the extracted long-form table.",
-  "version": "1.0.0",
+  "version": "0.0.1",
   "type": "object",
   "required": ["outputs"],
   "additionalProperties": false,

From 0a40d4cc5636d28f1fb7246efd9c7b15c918ce98 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 20 Apr 2026 23:02:46 -0600
Subject: [PATCH 17/35] Finishing operational and including prohibitions

---
 .../transmission/transmission_schema.json5    | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index 97ab1672e..cac429ca2 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -132,8 +132,37 @@
     "operational_features": {
       "description": "Operational, licensing, permitting, inspection, and abandonment requirements. These features require text extraction only.",
       "properties": {
+        "bond": {
+          "description": "Extract requirements for bonds, sureties, or financial assurance instruments related to construction, operation, maintenance, or decommissioning of electric transmission lines and related infrastructure. Include any specific conditions, amounts, or criteria for bond release when explicitly stated."
+        },
         "decommissioning": {
-          "description": "Extract requirements for abandonment, plugging, sealing, removal, and site restoration when systems are retired, fail, or wells are abandoned, including responsible party and timeline details when explicitly stated."
+          "description": "Extract requirements for abandonment, removal, and site restoration and when systems are retired, fail, or towers are abandoned, including responsible party and timeline details when explicitly stated."
+        },
+        "permit": {
+          "description": "Extract requirements for conditional use permits, special use permits, or other discretionary land use approvals that apply to electric transmission lines and related infrastructure. Include any specific conditions or criteria for approval when explicitly stated."
+        },
+        "FAA": {
+          "description": "Extract requirements for compliance with Federal Aviation Administration (FAA) regulations and local approvals for transmission lines and related infrastructure near airports. Include any specific FAA standards or local approval processes when explicitly stated."
+        }
+      }
+    },
+    "prohibition_features": {
+      "description": "Prohibitions, bans, or moratoria on building, installing, siting, or otherwise deploying electric transmission lines and related infrastructure in certain areas or under certain conditions. These features require text extraction only.",
+      "properties": {
+        "rural zone": {
+          "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in rural zones, agricultural zones, or similar low-density land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure."
+        },
+        "residential zone": {
+          "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in residential zones, residential districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure."
+        },
+        "commercial zone": {
+          "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in commercial zones, commercial districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure."
+        },
+        "light industrial zone": {
+          "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in light industrial zones, light industrial districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure."
+        },
+        "agricultural zone": {
+          "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in agricultural zones, agricultural districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure. Include prohibitions in irrigated farmland, ranch land, or similar agricultural land when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure in those areas."
         }
       }
     },

From bd84fe6a1fe8ae2eec73a81bffe41c90aa0632cc Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Thu, 23 Apr 2026 14:50:02 -0600
Subject: [PATCH 18/35] feat: match_labels()

Pair labels with outputs.
---
 compass/qc/core.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 compass/qc/core.py

diff --git a/compass/qc/core.py b/compass/qc/core.py
new file mode 100644
index 000000000..b5b9b86bc
--- /dev/null
+++ b/compass/qc/core.py
@@ -0,0 +1,83 @@
+"""Core functionalities to validate CSV outputs with manual labels"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Generator
+
+import polars as pl
+
+from store import location_label
+
+logger = logging.getLogger(__name__)
+
+
+def match_labels(
+    truth: dict[str, dict],
+    lf: pl.LazyFrame,
+) -> Generator[tuple[dict, pl.DataFrame], None, None]:
+    """Pair ground-truth locations with matching run rows
+
+    Iterates over each location in *truth*, builds a
+    geographic filter (state, county, and subdivision when
+    defined), applies it to *lf*, and collects the result.
+    When the truth entry declares a FIPS code, the matched
+    rows are checked for agreement; mismatches are reported
+    via ``logging.error``.
+
+    Parameters
+    ----------
+    truth : dict[str, dict]
+        Ground-truth dict as returned by
+        ``store.load_truth()``, keyed by normalised
+        location string.
+    lf : pl.LazyFrame
+        Lazy representation of a run CSV, as produced by
+        ``load_run(path).lazy()``.
+
+    Yields
+    ------
+    tuple[dict, pl.DataFrame]
+        A pair ``(loc_data, loc_df)`` for each location in
+        *truth*:
+
+        loc_data
+            The truth dict for one location, containing
+            state, county, subdivision, FIPS, and features
+            with their check specs.
+        loc_df
+            Collected DataFrame with every run row that
+            matches the location geographically.  May be
+            empty when the run has no data for that
+            location.
+    """
+    for _loc_key, loc_data in truth.items():
+        mask = (
+            (pl.col("county") == loc_data["county"])
+            & (pl.col("state") == loc_data["state"])
+        )
+        subdiv = loc_data.get("subdivision")
+        if subdiv:
+            mask = mask & (pl.col("subdivision") == subdiv)
+        else:
+            mask = mask & pl.col("subdivision").is_null()
+
+        loc_df = lf.filter(mask).collect()
+
+        # Validate FIPS agreement
+        expected_fips = loc_data.get("FIPS")
+        if expected_fips is not None and not loc_df.is_empty():
+            run_fips = loc_df["FIPS"].unique().to_list()
+            mismatched = [
+                f for f in run_fips
+                if f is not None and f != expected_fips
+            ]
+            if mismatched:
+                loc_lbl = location_label(loc_data)
+                logger.error(
+                    "FIPS mismatch for %s: truth declares %s, "
+                    "run contains %s",
+                    loc_lbl, expected_fips, mismatched,
+                )
+
+        yield loc_data, loc_df

From 7c64ed437299eaa3f99d37996d477254909f819f Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Thu, 23 Apr 2026 14:54:13 -0600
Subject: [PATCH 19/35] feat: Load and parse labeled data

---
 compass/qc/store.py | 284 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 compass/qc/store.py

diff --git a/compass/qc/store.py b/compass/qc/store.py
new file mode 100644
index 000000000..f7b31c121
--- /dev/null
+++ b/compass/qc/store.py
@@ -0,0 +1,284 @@
+"""
+truth_store.py — Load, merge, and validate ground-truth YAML files.
+
+Handles single files, directories (recursive), and duplicate detection
+both within and across files.
+
+Location keys support two granularities:
+
+  County level  : "County, State"                → e.g. "Power, Idaho"
+  Township level: "Subdivision, County, State"    → e.g. "Springfield, Power, Idaho"
+
+load_truth() returns a dict grouped by location rather than a flat list,
+so downstream code can process and summarise per-location.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import yaml
+
+# ── Field definitions ────────────────────────────────────────────────────────
+# These define which CSV columns the ground-truth format understands.
+
+# Columns where exact string match is the natural comparison
+EXACT_FIELDS = ["value", "units", "adder", "min_dist", "max_dist", "year"]
+
+# Columns where text / keyword / not-null matching makes more sense
+TEXT_FIELDS = ["summary", "section", "source"]
+
+ALL_CHECK_FIELDS = EXACT_FIELDS + TEXT_FIELDS
+
+# ── Exceptions ───────────────────────────────────────────────────────────────
+
+
+class DuplicateLocationError(Exception):
+    """Raised when the same location key appears in more than one place."""
+
+
+# ── Location key parsing ────────────────────────────────────────────────────
+
+
+def parse_location_key(key: str) -> dict[str, str | None]:
+    """
+    Parse a YAML location key into its component parts.
+
+    Supports:
+      "County, State"                → county-level
+      "Subdivision, County, State"   → township-level
+
+    Returns a dict with keys: state, county, subdivision (None if county-level).
+    """
+    parts = [p.strip() for p in key.split(",")]
+
+    if len(parts) == 2:
+        return {
+            "state": parts[1].lower(),
+            "county": parts[0].lower(),
+            "subdivision": None,
+        }
+    elif len(parts) == 3:
+        return {
+            "state": parts[2].lower(),
+            "county": parts[1].lower(),
+            "subdivision": parts[0].lower(),
+        }
+    else:
+        raise ValueError(
+            f"Location key must have 2 parts (County, State) or "
+            f"3 parts (Subdivision, County, State), got {len(parts)}: '{key}'"
+        )
+
+
+def location_label(loc: dict[str, str | None]) -> str:
+    """
+    Build a human-readable label from parsed location components.
+
+    Returns "County, State" or "Subdivision, County, State".
+    """
+    parts = []
+    if loc.get("subdivision"):
+        parts.append(loc["subdivision"].title())
+    parts.append(loc["county"].title())
+    parts.append(loc["state"].title())
+    return ", ".join(parts)
+
+
+# ── File collection ──────────────────────────────────────────────────────────
+
+
+def collect_truth_files(path: str | Path) -> list[Path]:
+    """
+    Walk *path* and return every .yaml / .yml file found.
+
+    If *path* is a single file, return it in a one-element list.
+    If *path* is a directory, recurse into all sub-folders (sorted for
+    deterministic ordering).
+    """
+    p = Path(path)
+    if p.is_file():
+        return [p]
+    if p.is_dir():
+        files = sorted(p.rglob("*.yaml")) + sorted(p.rglob("*.yml"))
+        # rglob may return .yml files that also matched .yaml; deduplicate
+        seen: set[Path] = set()
+        unique: list[Path] = []
+        for f in files:
+            resolved = f.resolve()
+            if resolved not in seen:
+                seen.add(resolved)
+                unique.append(f)
+        return unique
+    raise FileNotFoundError(f"Ground-truth path not found: {p}")
+
+
+# ── Merging & duplicate detection ────────────────────────────────────────────
+
+
+def merge_truth_dicts(files: list[Path]) -> dict:
+    """
+    Load every YAML file and merge into one dict.
+
+    Raises DuplicateLocationError if a top-level location key (e.g.
+    "Power, Idaho") appears more than once — whether across different
+    files or duplicated inside the same file.
+    """
+    merged: dict = {}
+    # Track where each key was first seen for the error message
+    origin: dict[str, Path] = {}
+
+    for fpath in files:
+        raw = yaml.safe_load(fpath.read_text())
+        if raw is None:
+            continue
+        if not isinstance(raw, dict):
+            raise ValueError(
+                f"Expected a YAML mapping at the top level of {fpath}, "
+                f"got {type(raw).__name__}"
+            )
+
+        # Check for intra-file duplicates.  PyYAML silently keeps the last
+        # occurrence when a key is repeated, so we do a quick text-level
+        # scan to catch that case before it's swallowed.
+        _check_intra_file_duplicates(fpath)
+
+        for key in raw:
+            norm = _normalise_location_key(key)
+            if norm in origin:
+                raise DuplicateLocationError(
+                    f"Duplicate location '{key}' — already defined in "
+                    f"{origin[norm]}, found again in {fpath}"
+                )
+            origin[norm] = fpath
+            merged[key] = raw[key]
+
+    return merged
+
+
+def _normalise_location_key(key: str) -> str:
+    """Lowercase + strip so 'Power, Idaho' and ' power , idaho ' collide."""
+    return ", ".join(p.strip().lower() for p in key.split(","))
+
+
+def _check_intra_file_duplicates(fpath: Path) -> None:
+    """
+    Detect duplicate top-level keys inside a single YAML file.
+
+    PyYAML's safe_load silently drops all-but-the-last duplicate key,
+    so we scan the raw text for top-level keys (lines that start at
+    column 0 and end with ':') and flag repeats.
+    """
+    seen: dict[str, int] = {}
+    for lineno, line in enumerate(fpath.read_text().splitlines(), start=1):
+        stripped = line.rstrip()
+        # Skip blank lines, comments, and indented lines
+        if not stripped or stripped.startswith("#") or line[0] in (" ", "\t"):
+            continue
+        # A top-level key line looks like  `"Power, Idaho":` or `Power, Idaho:`
+        if stripped.endswith(":"):
+            raw_key = stripped[:-1].strip().strip('"').strip("'")
+            norm = _normalise_location_key(raw_key)
+            if norm in seen:
+                raise DuplicateLocationError(
+                    f"Duplicate location '{raw_key}' inside {fpath} "
+                    f"(lines {seen[norm]} and {lineno})"
+                )
+            seen[norm] = lineno
+
+
+# ── Check-spec builder (internal) ───────────────────────────────────────────
+
+
+def _build_checks(field_checks: dict) -> dict[str, dict]:
+    """
+    Convert a raw YAML feature block into a checks dict.
+
+    Each key is a field name, each value is a dict describing the match mode.
+    """
+    checks: dict[str, dict] = {}
+    for fld, spec in field_checks.items():
+        if fld not in ALL_CHECK_FIELDS:
+            continue
+        if isinstance(spec, dict) and "keywords" in spec:
+            checks[fld] = {
+                "mode": "keywords",
+                "keywords": [str(k).lower() for k in spec["keywords"]],
+            }
+        elif spec == "not_null":
+            checks[fld] = {"mode": "not_null"}
+        elif spec == "absent":
+            checks[fld] = {"mode": "absent"}
+        else:
+            checks[fld] = {"mode": "exact", "expected": str(spec).strip().lower()}
+    return checks
+
+
+# ── Main loader ──────────────────────────────────────────────────────────────
+
+
+def load_truth(path: str | Path) -> dict[str, dict]:
+    """
+    Parse ground-truth YAML(s) into a dict grouped by location.
+
+    *path* can be:
+      - a single .yaml / .yml file
+      - a directory — every .yaml / .yml underneath is collected and merged
+
+    Raises DuplicateLocationError if any location key appears more than once.
+
+    Returns a dict keyed by normalised location string::
+
+        {
+            "power, idaho": {
+                "state": "idaho",
+                "county": "power",
+                "subdivision": None,
+                "FIPS": "16077",
+                "features": {
+                    "residential buildings": {
+                        "value": {"mode": "exact", "expected": "1500"},
+                        "units": {"mode": "exact", "expected": "feet"},
+                        "summary": {"mode": "keywords", "keywords": [...]},
+                        ...
+                    },
+                    "property lines": { ... },
+                }
+            },
+            "springfield, power, idaho": {
+                "state": "idaho",
+                "county": "power",
+                "subdivision": "springfield",
+                ...
+            },
+        }
+    """
+    files = collect_truth_files(path)
+    if not files:
+        raise FileNotFoundError(f"No .yaml / .yml files found under {path}")
+
+    raw = merge_truth_dicts(files)
+    result: dict[str, dict] = {}
+
+    for location_key, loc_data in raw.items():
+        loc = parse_location_key(location_key)
+        norm_key = _normalise_location_key(location_key)
+
+        fips = loc_data.get("FIPS")
+        raw_features = loc_data.get("features", {})
+
+        parsed_features: dict[str, dict] = {}
+        for feat_name, field_checks in raw_features.items():
+            if field_checks is None:
+                continue
+            checks = _build_checks(field_checks)
+            if checks:
+                parsed_features[feat_name.lower()] = checks
+
+        result[norm_key] = {
+            **loc,
+            "FIPS": str(fips) if fips is not None else None,
+            "features": parsed_features,
+        }
+
+    return result

From ab7a123be48249e4c3bf75083b82047b5441f835 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 18 May 2026 16:13:32 -0600
Subject: [PATCH 20/35] feat: find_missing_features()

---
 compass/qc/core.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/compass/qc/core.py b/compass/qc/core.py
index b5b9b86bc..fd611cc31 100644
--- a/compass/qc/core.py
+++ b/compass/qc/core.py
@@ -81,3 +81,56 @@ def match_labels(
                 )
 
         yield loc_data, loc_df
+
+
+def find_missing_features(
+    loc_data: dict,
+    lf: pl.LazyFrame,
+) -> list[str]:
+    """
+    Find features declared in truth but absent from the run.
+
+    Compares the feature names listed in *loc_data* against
+    the distinct ``feature`` values present in *lf*.  Any
+    feature that appears in the truth but has no matching
+    row in the run is considered missing.
+
+    Designed to compose with :func:`match_labels`::
+
+        for loc_data, loc_df in match_labels(truth, run_lf):
+            missing = find_missing_features(
+                loc_data, loc_df.lazy(),
+            )
+
+    Parameters
+    ----------
+    loc_data : dict
+        Truth dict for a single location, as yielded by
+        :func:`match_labels`.  Must contain a ``features``
+        key mapping feature names to check specs (which
+        may be empty dicts for presence-only features).
+    lf : pl.LazyFrame
+        Lazy representation of the run rows already scoped
+        to this location.
+
+    Returns
+    -------
+    list[str]
+        Feature names present in the truth but not found
+        in the run, in the order they appear in
+        ``loc_data["features"]``.  Empty list when all
+        features are present.
+    """
+    expected = set(loc_data.get("features", {}).keys())
+    if not expected:
+        return []
+
+    present = set(
+        lf.select("feature")
+        .unique()
+        .collect()
+        .get_column("feature")
+        .to_list()
+    )
+
+    return [f for f in loc_data["features"] if f not in present]

From 60d58614c7d9b8feb0fb28afb80736559c7fea00 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 07:14:11 -0600
Subject: [PATCH 21/35] refact: Renaming store.py to reference.py

---
 compass/qc/{store.py => reference.py} | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)
 rename compass/qc/{store.py => reference.py} (92%)

diff --git a/compass/qc/store.py b/compass/qc/reference.py
similarity index 92%
rename from compass/qc/store.py
rename to compass/qc/reference.py
index f7b31c121..dc6f0a11b 100644
--- a/compass/qc/store.py
+++ b/compass/qc/reference.py
@@ -1,5 +1,5 @@
 """
-truth_store.py — Load, merge, and validate ground-truth YAML files.
+reference.py — Load, merge, and validate reference YAML files.
 
 Handles single files, directories (recursive), and duplicate detection
 both within and across files.
@@ -9,7 +9,7 @@
   County level  : "County, State"                → e.g. "Power, Idaho"
   Township level: "Subdivision, County, State"    → e.g. "Springfield, Power, Idaho"
 
-load_truth() returns a dict grouped by location rather than a flat list,
+load_reference() returns a dict grouped by location rather than a flat list,
 so downstream code can process and summarise per-location.
 """
 
@@ -20,7 +20,7 @@
 import yaml
 
 # ── Field definitions ────────────────────────────────────────────────────────
-# These define which CSV columns the ground-truth format understands.
+# These define which CSV columns the reference format understands.
 
 # Columns where exact string match is the natural comparison
 EXACT_FIELDS = ["value", "units", "adder", "min_dist", "max_dist", "year"]
@@ -88,7 +88,7 @@ def location_label(loc: dict[str, str | None]) -> str:
 # ── File collection ──────────────────────────────────────────────────────────
 
 
-def collect_truth_files(path: str | Path) -> list[Path]:
+def collect_reference_files(path: str | Path) -> list[Path]:
     """
     Walk *path* and return every .yaml / .yml file found.
 
@@ -110,13 +110,13 @@ def collect_truth_files(path: str | Path) -> list[Path]:
                 seen.add(resolved)
                 unique.append(f)
         return unique
-    raise FileNotFoundError(f"Ground-truth path not found: {p}")
+    raise FileNotFoundError(f"Reference path not found: {p}")
 
 
 # ── Merging & duplicate detection ────────────────────────────────────────────
 
 
-def merge_truth_dicts(files: list[Path]) -> dict:
+def merge_reference_dicts(files: list[Path]) -> dict:
     """
     Load every YAML file and merge into one dict.
 
@@ -217,9 +217,9 @@ def _build_checks(field_checks: dict) -> dict[str, dict]:
 # ── Main loader ──────────────────────────────────────────────────────────────
 
 
-def load_truth(path: str | Path) -> dict[str, dict]:
+def load_reference(path: str | Path) -> dict[str, dict]:
     """
-    Parse ground-truth YAML(s) into a dict grouped by location.
+    Parse reference YAML(s) into a dict grouped by location.
 
     *path* can be:
       - a single .yaml / .yml file
@@ -253,11 +253,11 @@ def load_truth(path: str | Path) -> dict[str, dict]:
             },
         }
     """
-    files = collect_truth_files(path)
+    files = collect_reference_files(path)
     if not files:
         raise FileNotFoundError(f"No .yaml / .yml files found under {path}")
 
-    raw = merge_truth_dicts(files)
+    raw = merge_reference_dicts(files)
     result: dict[str, dict] = {}
 
     for location_key, loc_data in raw.items():
@@ -270,10 +270,12 @@ def load_truth(path: str | Path) -> dict[str, dict]:
         parsed_features: dict[str, dict] = {}
         for feat_name, field_checks in raw_features.items():
             if field_checks is None:
+                # Feature listed with no arguments — still
+                # track it so presence checks can catch it.
+                parsed_features[feat_name.lower()] = {}
                 continue
             checks = _build_checks(field_checks)
-            if checks:
-                parsed_features[feat_name.lower()] = checks
+            parsed_features[feat_name.lower()] = checks
 
         result[norm_key] = {
             **loc,

From db133f881d22e1a06d746de332c8fa0d53026539 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 09:00:12 -0600
Subject: [PATCH 22/35] refact: Using 'reference' instead of 'truth'

---
 compass/qc/core.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/compass/qc/core.py b/compass/qc/core.py
index fd611cc31..5b24ebee7 100644
--- a/compass/qc/core.py
+++ b/compass/qc/core.py
@@ -7,29 +7,29 @@
 
 import polars as pl
 
-from store import location_label
+from reference import location_label
 
 logger = logging.getLogger(__name__)
 
 
 def match_labels(
-    truth: dict[str, dict],
+    ref: dict[str, dict],
     lf: pl.LazyFrame,
 ) -> Generator[tuple[dict, pl.DataFrame], None, None]:
     """Pair ground-truth locations with matching run rows
 
-    Iterates over each location in *truth*, builds a
+    Iterates over each location in *ref*, builds a
     geographic filter (state, county, and subdivision when
     defined), applies it to *lf*, and collects the result.
-    When the truth entry declares a FIPS code, the matched
+    When the reference entry declares a FIPS code, the matched
     rows are checked for agreement; mismatches are reported
     via ``logging.error``.
 
     Parameters
     ----------
-    truth : dict[str, dict]
-        Ground-truth dict as returned by
-        ``store.load_truth()``, keyed by normalised
+    ref : dict[str, dict]
+        Reference dict as returned by
+        ``reference.load_reference()``, keyed by normalised
         location string.
     lf : pl.LazyFrame
         Lazy representation of a run CSV, as produced by
@@ -39,10 +39,10 @@ def match_labels(
     ------
     tuple[dict, pl.DataFrame]
         A pair ``(loc_data, loc_df)`` for each location in
-        *truth*:
+        *ref*:
 
         loc_data
-            The truth dict for one location, containing
+            The reference dict for one location, containing
             state, county, subdivision, FIPS, and features
             with their check specs.
         loc_df
@@ -51,7 +51,7 @@ def match_labels(
             empty when the run has no data for that
             location.
     """
-    for _loc_key, loc_data in truth.items():
+    for _loc_key, loc_data in ref.items():
         mask = (
             (pl.col("county") == loc_data["county"])
             & (pl.col("state") == loc_data["state"])
@@ -75,7 +75,7 @@ def match_labels(
             if mismatched:
                 loc_lbl = location_label(loc_data)
                 logger.error(
-                    "FIPS mismatch for %s: truth declares %s, "
+                    "FIPS mismatch for %s: reference declares %s, "
                     "run contains %s",
                     loc_lbl, expected_fips, mismatched,
                 )
@@ -88,16 +88,16 @@ def find_missing_features(
     lf: pl.LazyFrame,
 ) -> list[str]:
     """
-    Find features declared in truth but absent from the run.
+    Find features declared in the reference but absent from the run.
 
     Compares the feature names listed in *loc_data* against
     the distinct ``feature`` values present in *lf*.  Any
-    feature that appears in the truth but has no matching
+    feature that appears in the reference but has no matching
     row in the run is considered missing.
 
     Designed to compose with :func:`match_labels`::
 
-        for loc_data, loc_df in match_labels(truth, run_lf):
+        for loc_data, loc_df in match_labels(ref, run_lf):
             missing = find_missing_features(
                 loc_data, loc_df.lazy(),
             )
@@ -116,7 +116,7 @@ def find_missing_features(
     Returns
     -------
     list[str]
-        Feature names present in the truth but not found
+        Feature names present in the reference but not found
         in the run, in the order they appear in
         ``loc_data["features"]``.  Empty list when all
         features are present.

From b28d495378400be82fe2548ec3292081446fc7e7 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 10:00:34 -0600
Subject: [PATCH 23/35] feat: extract_locations + find_missing_locations

---
 compass/qc/core.py | 95 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/compass/qc/core.py b/compass/qc/core.py
index 5b24ebee7..ee9e22dc5 100644
--- a/compass/qc/core.py
+++ b/compass/qc/core.py
@@ -1,9 +1,12 @@
-"""Core functionalities to validate CSV outputs with manual labels"""
+"""
+core.py — Composable building blocks for matching and validation
+"""
 
 from __future__ import annotations
 
 import logging
 from collections.abc import Generator
+from dataclasses import dataclass
 
 import polars as pl
 
@@ -12,6 +15,96 @@
 logger = logging.getLogger(__name__)
 
 
+# ── Data types ───────────────────────────────────────────────────────
+
+
+@dataclass
+class CheckResult:
+    """Outcome of a single field-level check"""
+
+    field: str
+    mode: str
+    passed: bool
+    expected: str
+    actual: str
+    detail: str = ""
+
+
+# ── Location-level functions ─────────────────────────────────────────
+
+
+def extract_locations(
+    lf: pl.LazyFrame,
+) -> set[tuple[str, str, str | None]]:
+    """
+    Collect distinct locations from a LazyFrame
+
+    Each location is a tuple of ``(state, county, subdivision)``
+    where *subdivision* is ``None`` for county-level records.
+
+    Parameters
+    ----------
+    lf : pl.LazyFrame
+        Lazy representation of run data containing at least
+        the columns ``state``, ``county``, and
+        ``subdivision``.
+
+    Returns
+    -------
+    set[tuple[str, str, str | None]]
+        Unique location tuples found in the data.
+    """
+    rows = (
+        lf.select("state", "county", "subdivision")
+        .unique()
+        .collect()
+        .iter_rows()
+    )
+    return {(state, county, subdiv) for state, county, subdiv in rows}
+
+
+def find_missing_locations(
+    ref: dict[str, dict],
+    lf: pl.LazyFrame,
+) -> list[dict]:
+    """
+    Find reference locations absent from the target
+
+    Compares the locations declared in *ref* against the
+    distinct locations present in *lf*.  Returns the
+    reference entries whose geographic key (state, county,
+    subdivision) has no matching rows in the target.
+
+    Parameters
+    ----------
+    ref : dict[str, dict]
+        Reference dict as returned by
+        ``reference.load_reference()``.
+    lf : pl.LazyFrame
+        Lazy representation of the target run data.
+
+    Returns
+    -------
+    list[dict]
+        The ``loc_data`` dicts for each reference location
+        not found in the target, in the order they appear
+        in *ref*.  Empty list when all reference locations
+        are present.
+    """
+    target_locs = extract_locations(lf)
+
+    missing = []
+    for _loc_key, loc_data in ref.items():
+        loc_tuple = (
+            loc_data["state"],
+            loc_data["county"],
+            loc_data.get("subdivision"),
+        )
+        if loc_tuple not in target_locs:
+            missing.append(loc_data)
+    return missing
+
+
 def match_labels(
     ref: dict[str, dict],
     lf: pl.LazyFrame,

From c586fbe32b7a4cc2b9db45fd9fde52030791bfd2 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 11:39:39 -0600
Subject: [PATCH 24/35] wip: CLI definition

---
 compass/qc/schema_eval.py | 513 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 513 insertions(+)
 create mode 100644 compass/qc/schema_eval.py

diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
new file mode 100644
index 000000000..94b847701
--- /dev/null
+++ b/compass/qc/schema_eval.py
@@ -0,0 +1,513 @@
+"""
+schema_eval.py — CLI entry point for extraction evaluation
+
+Subcommands
+-----------
+  init      Scaffold a reference YAML from an existing CSV run.
+  validate  Score one CSV run against reference.
+  compare   Diff two CSV runs; optionally score both against reference.
+
+Examples
+--------
+  python schema_eval.py init run1.csv -o reference.yaml
+  python schema_eval.py validate run1.csv -t reference.yaml
+  python schema_eval.py compare run1.csv run2.csv -t reference.yaml
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+from typing import Any
+
+import polars as pl
+
+from core import (
+    CheckResult,
+    extract_locations,
+    find_feature_row,
+    find_missing_features,
+    find_missing_locations,
+    match_labels,
+    run_checks,
+    score_run,
+    validate_formated,
+)
+from reference import (
+    ALL_CHECK_FIELDS,
+    EXACT_FIELDS,
+    TEXT_FIELDS,
+    load_reference,
+    location_label,
+)
+
+# ── Constants ────────────────────────────────────────────────────────
+
+KEY_COLS = ["county", "state", "subdivision", "feature"]
+
+# ── ANSI helpers ─────────────────────────────────────────────────────
+
+
+class C:
+    """Tiny ANSI colour helpers"""
+
+    GREEN  = "\033[92m"
+    RED    = "\033[91m"
+    YELLOW = "\033[93m"
+    CYAN   = "\033[96m"
+    BOLD   = "\033[1m"
+    DIM    = "\033[2m"
+    RESET  = "\033[0m"
+
+    @staticmethod
+    def ok(s: str)   -> str: return f"{C.GREEN}{s}{C.RESET}"
+    @staticmethod
+    def fail(s: str) -> str: return f"{C.RED}{s}{C.RESET}"
+    @staticmethod
+    def warn(s: str) -> str: return f"{C.YELLOW}{s}{C.RESET}"
+    @staticmethod
+    def bold(s: str) -> str: return f"{C.BOLD}{s}{C.RESET}"
+
+
+# ── Data loading ─────────────────────────────────────────────────────
+
+
+def load_run(path: str | Path) -> pl.DataFrame:
+    """Read a CSV run, normalise key columns to stripped lowercase"""
+    df = pl.read_csv(path, infer_schema_length=0)
+    if "subdivision" not in df.columns:
+        df = df.with_columns(
+            pl.lit(None).cast(pl.Utf8).alias("subdivision")
+        )
+    if "fips" in df.columns and "FIPS" not in df.columns:
+        df = df.rename({"fips": "FIPS"})
+    df = df.with_columns(
+        pl.col(c).str.strip_chars().str.to_lowercase()
+        for c in KEY_COLS if c in df.columns
+    )
+    for c in df.columns:
+        df = df.with_columns(
+            pl.when(pl.col(c).str.strip_chars() == "")
+            .then(None)
+            .otherwise(pl.col(c).str.strip_chars())
+            .alias(c)
+        )
+    return df
+
+
+# ── Formatting helpers ───────────────────────────────────────────────
+
+
+def _truncate(s: str | None, n: int) -> str:
+    """Shorten a string for display"""
+    if s is None:
+        return "(null)"
+    return s[:n] + "…" if len(s) > n else s
+
+
+def _pct_color(pct: float) -> callable:
+    """Pick a colour function based on percentage thresholds"""
+    if pct >= 90:
+        return C.ok
+    return C.warn if pct >= 70 else C.fail
+
+
+def _sortable_key(t: tuple) -> tuple:
+    """Replace None with '' so tuples are sortable"""
+    return tuple(v if v is not None else "" for v in t)
+
+
+# ── Validate subcommand ──────────────────────────────────────────────
+
+
+def cmd_validate(
+    run_path: str,
+    ref_path: str,
+    verbose: bool = False,
+    output_format: str = "text",
+):
+    df = load_run(run_path)
+    ref = load_reference(ref_path)
+
+    return validate_formated(
+        ref,
+        df.lazy(),
+        run_path,
+        ref_path,
+        output_format=output_format,
+        verbose=verbose,
+        style=C,
+    )
+
+
+# ── Compare subcommand ───────────────────────────────────────────────
+
+
+def cmd_compare(
+    run_a_path: str,
+    run_b_path: str,
+    ref_path: str | None = None,
+    verbose: bool = False,
+):
+    df_a = load_run(run_a_path)
+    df_b = load_run(run_b_path)
+
+    label_a = Path(run_a_path).stem
+    label_b = Path(run_b_path).stem
+
+    print(C.bold(f"\n{'='*70}"))
+    print(C.bold(f"  Comparison: {label_a}  vs  {label_b}"))
+    print(C.bold(f"{'='*70}\n"))
+
+    # Build key sets — tuples of (county, state, subdivision, feature)
+    def key_set(df: pl.DataFrame) -> set[tuple]:
+        return set(df.select(KEY_COLS).unique().iter_rows())
+
+    keys_a = key_set(df_a)
+    keys_b = key_set(df_b)
+
+    only_a = keys_a - keys_b
+    only_b = keys_b - keys_a
+    common = keys_a & keys_b
+
+    # ── Row-presence diff ────────────────────────────────────────
+    if only_a or only_b:
+        print(C.bold("  Row presence changes:"))
+        if only_a:
+            print(
+                f"\n  {C.fail(f'Removed in {label_b}')}"
+                f" ({len(only_a)}):"
+            )
+            for c, s, sd, f in sorted(only_a, key=_sortable_key):
+                loc = location_label(
+                    {"county": c, "state": s, "subdivision": sd}
+                )
+                print(f"    − {loc} → {f}")
+        if only_b:
+            print(
+                f"\n  {C.ok(f'Added in {label_b}')}"
+                f" ({len(only_b)}):"
+            )
+            for c, s, sd, f in sorted(only_b, key=_sortable_key):
+                loc = location_label(
+                    {"county": c, "state": s, "subdivision": sd}
+                )
+                print(f"    + {loc} → {f}")
+        print()
+
+    # ── Field-level diff on shared rows ──────────────────────────
+    compare_fields = [
+        f for f in ALL_CHECK_FIELDS
+        if f in df_a.columns and f in df_b.columns
+    ]
+    n_changed = 0
+    n_unchanged = 0
+
+    for key in sorted(common, key=_sortable_key):
+        county, state, subdiv, feature = key
+
+        def _filter(df, c, s, sd, f):
+            mask = (
+                (pl.col("county") == c)
+                & (pl.col("state") == s)
+                & (pl.col("feature") == f)
+            )
+            if sd:
+                mask &= pl.col("subdivision") == sd
+            else:
+                mask &= pl.col("subdivision").is_null()
+            return df.filter(mask)
+
+        row_a = _filter(
+            df_a, county, state, subdiv, feature
+        ).row(0, named=True)
+        row_b = _filter(
+            df_b, county, state, subdiv, feature
+        ).row(0, named=True)
+
+        diffs: list[tuple[str, str | None, str | None]] = []
+        for fld in compare_fields:
+            va = row_a.get(fld)
+            vb = row_b.get(fld)
+            na = va.strip().lower() if va else None
+            nb = vb.strip().lower() if vb else None
+            if na != nb:
+                diffs.append((fld, va, vb))
+
+        loc = location_label(
+            {"county": county, "state": state, "subdivision": subdiv}
+        )
+        label = f"{loc} → {feature}"
+
+        if diffs:
+            n_changed += 1
+            print(f"  {C.warn('CHANGED')}  {label}")
+            for fld, va, vb in diffs:
+                va_d = _truncate(va, 40) if va else "(null)"
+                vb_d = _truncate(vb, 40) if vb else "(null)"
+                print(
+                    f"           {C.DIM}├─{C.RESET} {fld}:"
+                    f" {C.fail(va_d)} → {C.ok(vb_d)}"
+                )
+        elif verbose:
+            n_unchanged += 1
+            print(f"  {C.DIM}SAME{C.RESET}     {label}")
+        else:
+            n_unchanged += 1
+
+    # ── Comparison summary ───────────────────────────────────────
+    print(C.bold(f"\n{'─'*70}"))
+    print(C.bold("  Comparison summary"))
+    print(f"{'─'*70}")
+    print(f"  Rows only in {label_a}: {len(only_a)}")
+    print(f"  Rows only in {label_b}: {len(only_b)}")
+    print(f"  Shared rows, changed : {C.warn(str(n_changed))}")
+    print(f"  Shared rows, same    : {n_unchanged}")
+    print()
+
+    # ── Optional: score both against reference ───────────────────
+    if ref_path:
+        _print_ref_scoring(
+            ref_path, df_a, df_b, label_a, label_b,
+        )
+
+
+def _print_ref_scoring(ref_path, df_a, df_b, label_a, label_b):
+    """Score both runs against reference and show divergences"""
+    ref = load_reference(ref_path)
+
+    print(C.bold(f"{'─'*70}"))
+    print(C.bold("  Reference scoring"))
+    print(f"{'─'*70}\n")
+
+    for label, df in [(label_a, df_a), (label_b, df_b)]:
+        passed, total = score_run(ref, df.lazy())
+        pct = (passed / total * 100) if total else 0
+        clr = _pct_color(pct)
+        print(f"  {label:.<40s} {clr(f'{passed}/{total}')} ({pct:.1f}%)")
+
+    print()
+
+    divergences = _find_divergences(ref, df_a, df_b)
+    if divergences:
+        print(f"  {C.bold('Divergent reference results')}:\n")
+        for d in divergences:
+            print(f"    {d['location']}  ·  {d['field']}")
+            sa = C.ok("✓") if d["a_pass"] else C.fail("✗")
+            sb = C.ok("✓") if d["b_pass"] else C.fail("✗")
+            print(
+                f"      {label_a}: {sa}  {label_b}: {sb}"
+                f"  — {d['detail']}"
+            )
+        print()
+
+
+def _find_divergences(
+    ref: dict[str, dict],
+    df_a: pl.DataFrame,
+    df_b: pl.DataFrame,
+) -> list[dict]:
+    """Find checks where two runs disagree against the reference"""
+    slices_a = {
+        location_label(loc_data): loc_df
+        for loc_data, loc_df in match_labels(ref, df_a.lazy())
+    }
+
+    divs = []
+    for loc_data, loc_df_b in match_labels(ref, df_b.lazy()):
+        loc_lbl = location_label(loc_data)
+        loc_df_a = slices_a.get(loc_lbl, pl.DataFrame())
+
+        for feat_name, checks in loc_data["features"].items():
+            feat_label = f"{loc_lbl} → {feat_name}"
+            row_a = find_feature_row(loc_df_a, feat_name)
+            row_b = find_feature_row(loc_df_b, feat_name)
+
+            for fld, check in checks.items():
+                res_a = (
+                    run_checks(row_a, {fld: check}) if row_a
+                    else [CheckResult(
+                        fld, check["mode"], False,
+                        "", "(missing)", "row missing",
+                    )]
+                )
+                res_b = (
+                    run_checks(row_b, {fld: check}) if row_b
+                    else [CheckResult(
+                        fld, check["mode"], False,
+                        "", "(missing)", "row missing",
+                    )]
+                )
+                if res_a[0].passed != res_b[0].passed:
+                    divs.append({
+                        "location": feat_label,
+                        "field": fld,
+                        "a_pass": res_a[0].passed,
+                        "b_pass": res_b[0].passed,
+                        "detail": (
+                            f"A: {res_a[0].actual[:50]}"
+                            f"  B: {res_b[0].actual[:50]}"
+                        ),
+                    })
+    return divs
+
+
+# ── Init subcommand ──────────────────────────────────────────────────
+
+
+def cmd_init(run_path: str, output_path: str):
+    """Generate a reference YAML template from an existing CSV run"""
+    df = load_run(run_path)
+
+    grouped: dict[str, dict[str, Any]] = {}
+    for row in df.iter_rows(named=True):
+        county  = row["county"] or "unknown"
+        state   = row["state"]  or "unknown"
+        subdiv  = row.get("subdivision")
+        feature = row["feature"] or "unknown"
+        fips    = row.get("FIPS", "")
+
+        loc_key = location_label({
+            "county": county, "state": state,
+            "subdivision": subdiv,
+        })
+
+        if loc_key not in grouped:
+            grouped[loc_key] = {"FIPS": fips, "features": {}}
+
+        feat_entry: dict[str, Any] = {}
+        for fld in EXACT_FIELDS:
+            v = row.get(fld)
+            if v:
+                feat_entry[fld] = v
+        for fld in TEXT_FIELDS:
+            v = row.get(fld)
+            if v:
+                feat_entry[fld] = "not_null"
+
+        grouped[loc_key]["features"][feature] = (
+            feat_entry if feat_entry else None
+        )
+
+    out = Path(output_path)
+    lines = [
+        "# Reference template — generated from: "
+        + Path(run_path).name,
+        "# Review each entry and adjust match modes:",
+        '#   exact value  →  value: "1500"',
+        "#   keywords     →  summary:",
+        "#                     keywords: [word1, word2]",
+        "#   not_null     →  section: not_null",
+        "#   absent       →  adder: absent",
+        "#   remove line  →  field won't be checked",
+        "#",
+        "# Location keys:",
+        '#   County level   →  "County, State"',
+        '#   Township level →  "Subdivision, County, State"',
+        "",
+    ]
+
+    for loc_key in sorted(grouped):
+        data = grouped[loc_key]
+        lines.append(f'"{loc_key}":')
+        if data["FIPS"]:
+            lines.append(f'  FIPS: "{data["FIPS"]}"')
+        lines.append("  features:")
+        for feat_name in sorted(data["features"]):
+            lines.append("")
+            lines.append(f"    {feat_name}:")
+            feat = data["features"][feat_name]
+            if feat is None:
+                lines.append("      # (no fields extracted)")
+                continue
+            for fld, val in feat.items():
+                if val == "not_null":
+                    lines.append(f"      {fld}: not_null")
+                else:
+                    lines.append(f'      {fld}: "{val}"')
+        lines.append("")
+
+    out.write_text("\n".join(lines))
+    print(f"\n  {C.ok('✓')} Template written to {C.bold(str(out))}")
+    print(
+        f"  {C.DIM}Edit the file to set expected values"
+        f" and match modes.{C.RESET}\n"
+    )
+
+
+# ── CLI ──────────────────────────────────────────────────────────────
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Evaluate and compare LLM extraction runs "
+            "against reference."
+        ),
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_init = sub.add_parser(
+        "init",
+        help="Scaffold reference YAML from a CSV run",
+    )
+    p_init.add_argument("run", help="Path to the CSV run file")
+    p_init.add_argument(
+        "-o", "--output",
+        default="ground_truth.yaml", help="Output YAML path",
+    )
+
+    p_val = sub.add_parser(
+        "validate",
+        help="Validate a CSV run against reference",
+    )
+    p_val.add_argument("run", help="Path to the CSV run file")
+    p_val.add_argument(
+        "-t", "--ref", required=True,
+        help="Path to reference YAML file or directory",
+    )
+    p_val.add_argument(
+        "-v", "--verbose", action="store_true",
+        help="Show passing checks too",
+    )
+    p_val.add_argument(
+        "-f", "--format",
+        choices=["text", "json"],
+        default="text",
+        help="Output format for validation report",
+    )
+
+    p_cmp = sub.add_parser(
+        "compare", help="Compare two CSV runs",
+    )
+    p_cmp.add_argument(
+        "run_a", help="Path to the first (baseline) CSV run",
+    )
+    p_cmp.add_argument(
+        "run_b", help="Path to the second (new) CSV run",
+    )
+    p_cmp.add_argument(
+        "-t", "--ref", default=None,
+        help="Optional reference YAML file or directory",
+    )
+    p_cmp.add_argument(
+        "-v", "--verbose", action="store_true",
+        help="Show unchanged rows too",
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "init":
+        cmd_init(args.run, args.output)
+    elif args.command == "validate":
+        print(cmd_validate(
+            args.run, args.ref, args.verbose, args.format
+        ))
+    elif args.command == "compare":
+        cmd_compare(args.run_a, args.run_b, args.ref, args.verbose)
+
+
+if __name__ == "__main__":
+    main()

From 05c7ce3ca5e959d7bc2ca1794ba6301fe1450c8e Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 11:48:37 -0600
Subject: [PATCH 25/35] refact: Moving load_run to core.py

---
 compass/qc/schema_eval.py | 56 +++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
index 94b847701..1c0f4d9f2 100644
--- a/compass/qc/schema_eval.py
+++ b/compass/qc/schema_eval.py
@@ -28,6 +28,7 @@
     find_feature_row,
     find_missing_features,
     find_missing_locations,
+    load_run,
     match_labels,
     run_checks,
     score_run,
@@ -69,32 +70,6 @@ def warn(s: str) -> str: return f"{C.YELLOW}{s}{C.RESET}"
     def bold(s: str) -> str: return f"{C.BOLD}{s}{C.RESET}"
 
 
-# ── Data loading ─────────────────────────────────────────────────────
-
-
-def load_run(path: str | Path) -> pl.DataFrame:
-    """Read a CSV run, normalise key columns to stripped lowercase"""
-    df = pl.read_csv(path, infer_schema_length=0)
-    if "subdivision" not in df.columns:
-        df = df.with_columns(
-            pl.lit(None).cast(pl.Utf8).alias("subdivision")
-        )
-    if "fips" in df.columns and "FIPS" not in df.columns:
-        df = df.rename({"fips": "FIPS"})
-    df = df.with_columns(
-        pl.col(c).str.strip_chars().str.to_lowercase()
-        for c in KEY_COLS if c in df.columns
-    )
-    for c in df.columns:
-        df = df.with_columns(
-            pl.when(pl.col(c).str.strip_chars() == "")
-            .then(None)
-            .otherwise(pl.col(c).str.strip_chars())
-            .alias(c)
-        )
-    return df
-
-
 # ── Formatting helpers ───────────────────────────────────────────────
 
 
@@ -126,12 +101,31 @@ def cmd_validate(
     verbose: bool = False,
     output_format: str = "text",
 ):
-    df = load_run(run_path)
+    """Validate a run against reference and return formatted output
+
+    Parameters
+    ----------
+    run_path : str
+        Path to the CSV run file to validate.
+    ref_path : str
+        Path to the reference YAML file or directory.
+    verbose : bool, default=False
+        Include passing checks in text output. By default, False.
+    output_format : str, default="text"
+        Output format to render. Supported values are ``"text"``
+        and ``"json"``. By default, text.
+
+    Returns
+    -------
+    str
+        Rendered validation report as text or JSON string.
+    """
+    lf = load_run(run_path)
     ref = load_reference(ref_path)
 
     return validate_formated(
         ref,
-        df.lazy(),
+        lf,
         run_path,
         ref_path,
         output_format=output_format,
@@ -149,8 +143,8 @@ def cmd_compare(
     ref_path: str | None = None,
     verbose: bool = False,
 ):
-    df_a = load_run(run_a_path)
-    df_b = load_run(run_b_path)
+    df_a = load_run(run_a_path).collect()
+    df_b = load_run(run_b_path).collect()
 
     label_a = Path(run_a_path).stem
     label_b = Path(run_b_path).stem
@@ -357,7 +351,7 @@ def _find_divergences(
 
 def cmd_init(run_path: str, output_path: str):
     """Generate a reference YAML template from an existing CSV run"""
-    df = load_run(run_path)
+    df = load_run(run_path).collect()
 
     grouped: dict[str, dict[str, Any]] = {}
     for row in df.iter_rows(named=True):

From 7ba8a052c1051d1fd2998410e8cfc198501a22eb Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Tue, 19 May 2026 14:49:17 -0600
Subject: [PATCH 26/35] refact: Moving to Click instead of Argparse

---
 compass/qc/schema_eval.py | 152 +++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 69 deletions(-)

diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
index 1c0f4d9f2..3ee7549c7 100644
--- a/compass/qc/schema_eval.py
+++ b/compass/qc/schema_eval.py
@@ -16,10 +16,10 @@
 
 from __future__ import annotations
 
-import argparse
 from pathlib import Path
 from typing import Any
 
+import click
 import polars as pl
 
 from core import (
@@ -432,75 +432,89 @@ def cmd_init(run_path: str, output_path: str):
 # ── CLI ──────────────────────────────────────────────────────────────
 
 
+@click.group(
+    epilog=__doc__,
+    context_settings={"help_option_names": ["-h", "--help"]},
+)
 def main():
-    parser = argparse.ArgumentParser(
-        description=(
-            "Evaluate and compare LLM extraction runs "
-            "against reference."
-        ),
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog=__doc__,
-    )
-    sub = parser.add_subparsers(dest="command", required=True)
-
-    p_init = sub.add_parser(
-        "init",
-        help="Scaffold reference YAML from a CSV run",
-    )
-    p_init.add_argument("run", help="Path to the CSV run file")
-    p_init.add_argument(
-        "-o", "--output",
-        default="ground_truth.yaml", help="Output YAML path",
-    )
-
-    p_val = sub.add_parser(
-        "validate",
-        help="Validate a CSV run against reference",
-    )
-    p_val.add_argument("run", help="Path to the CSV run file")
-    p_val.add_argument(
-        "-t", "--ref", required=True,
-        help="Path to reference YAML file or directory",
-    )
-    p_val.add_argument(
-        "-v", "--verbose", action="store_true",
-        help="Show passing checks too",
-    )
-    p_val.add_argument(
-        "-f", "--format",
-        choices=["text", "json"],
-        default="text",
-        help="Output format for validation report",
-    )
-
-    p_cmp = sub.add_parser(
-        "compare", help="Compare two CSV runs",
-    )
-    p_cmp.add_argument(
-        "run_a", help="Path to the first (baseline) CSV run",
-    )
-    p_cmp.add_argument(
-        "run_b", help="Path to the second (new) CSV run",
-    )
-    p_cmp.add_argument(
-        "-t", "--ref", default=None,
-        help="Optional reference YAML file or directory",
-    )
-    p_cmp.add_argument(
-        "-v", "--verbose", action="store_true",
-        help="Show unchanged rows too",
-    )
-
-    args = parser.parse_args()
-
-    if args.command == "init":
-        cmd_init(args.run, args.output)
-    elif args.command == "validate":
-        print(cmd_validate(
-            args.run, args.ref, args.verbose, args.format
-        ))
-    elif args.command == "compare":
-        cmd_compare(args.run_a, args.run_b, args.ref, args.verbose)
+    """Evaluate and compare LLM extraction runs against reference"""
+
+
+@main.command("init")
+@click.argument("run")
+@click.option(
+    "-o",
+    "--output",
+    "output_path",
+    default="ground_truth.yaml",
+    show_default=True,
+    help="Output YAML path",
+)
+def init_command(run: str, output_path: str):
+    """Scaffold reference YAML from a CSV run"""
+    cmd_init(run, output_path)
+
+
+@main.command("validate")
+@click.argument("run")
+@click.option(
+    "-t",
+    "--ref",
+    "ref_path",
+    required=True,
+    help="Path to reference YAML file or directory",
+)
+@click.option(
+    "-v",
+    "--verbose",
+    is_flag=True,
+    default=False,
+    help="Show passing checks too",
+)
+@click.option(
+    "-f",
+    "--format",
+    "output_format",
+    type=click.Choice(["text", "json"]),
+    default="text",
+    show_default=True,
+    help="Output format for validation report",
+)
+def validate_command(
+    run: str,
+    ref_path: str,
+    verbose: bool,
+    output_format: str,
+):
+    """Validate a CSV run against reference"""
+    print(cmd_validate(run, ref_path, verbose, output_format))
+
+
+@main.command("compare")
+@click.argument("run_a")
+@click.argument("run_b")
+@click.option(
+    "-t",
+    "--ref",
+    "ref_path",
+    default=None,
+    help="Optional reference YAML file or directory",
+)
+@click.option(
+    "-v",
+    "--verbose",
+    is_flag=True,
+    default=False,
+    help="Show unchanged rows too",
+)
+def compare_command(
+    run_a: str,
+    run_b: str,
+    ref_path: str | None,
+    verbose: bool,
+):
+    """Compare two CSV runs"""
+    cmd_compare(run_a, run_b, ref_path, verbose)
 
 
 if __name__ == "__main__":

From da7817a9182b521b5799360aa3e08c3201e7de22 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 25 May 2026 19:09:17 -0600
Subject: [PATCH 27/35] Initializing transmission plugin config

---
 .../transmission/transmission_plugin_config.yaml  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 compass/extraction/transmission/transmission_plugin_config.yaml

diff --git a/compass/extraction/transmission/transmission_plugin_config.yaml b/compass/extraction/transmission/transmission_plugin_config.yaml
new file mode 100644
index 000000000..8e272db22
--- /dev/null
+++ b/compass/extraction/transmission/transmission_plugin_config.yaml
@@ -0,0 +1,15 @@
+schema: ./transmission_schema.json5
+
+data_type_short_desc: high-voltage transmission lines ordinance
+
+query_templates:
+  - "filetype:pdf {jurisdiction} electric transmission line ordinance"
+
+website_keywords:
+  pdf: 92160
+
+heuristic_keywords: true
+
+collection_prompts: true
+
+cache_llm_generated_content: true

From 17caef4dc15ae4e23ae1e67c97d4ded04b0c053f Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 25 May 2026 20:41:48 -0600
Subject: [PATCH 28/35] Updating query_templates

---
 .../extraction/transmission/transmission_plugin_config.yaml   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compass/extraction/transmission/transmission_plugin_config.yaml b/compass/extraction/transmission/transmission_plugin_config.yaml
index 8e272db22..d15281303 100644
--- a/compass/extraction/transmission/transmission_plugin_config.yaml
+++ b/compass/extraction/transmission/transmission_plugin_config.yaml
@@ -4,6 +4,10 @@ data_type_short_desc: high-voltage transmission lines ordinance
 
 query_templates:
   - "filetype:pdf {jurisdiction} electric transmission line ordinance"
+  - "{jurisdiction} high voltage transmission line zoning ordinance"
+  - "{jurisdiction} electric transmission line siting code"
+  - "{jurisdiction} overhead electric utility line ordinance"
+  - "{jurisdiction} transmission line right-of-way regulation"
 
 website_keywords:
   pdf: 92160

From 04798e288bf55e81a47bf7ab562d9be1bc3353af Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 25 May 2026 22:13:39 -0600
Subject: [PATCH 29/35] Updating website_keyword

---
 .../transmission_plugin_config.yaml           | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/compass/extraction/transmission/transmission_plugin_config.yaml b/compass/extraction/transmission/transmission_plugin_config.yaml
index d15281303..341da9fa4 100644
--- a/compass/extraction/transmission/transmission_plugin_config.yaml
+++ b/compass/extraction/transmission/transmission_plugin_config.yaml
@@ -11,6 +11,25 @@ query_templates:
 
 website_keywords:
   pdf: 92160
+  transmission: 46080
+  ordinance: 23040
+  zoning: 11520
+  utility: 5760
+  electric: 5760
+  right-of-way: 1440
+  overhead: 720
+  permit: 720
+  land use: 720
+  municipal: 360
+  county: 360
+  code of ordinances: 360
+  ordinance code: 360
+  land use code: 360
+  conditional use permit: 180
+  special use permit: 180
+  statute: 180
+  administrative code: 180
+  government: 180
 
 heuristic_keywords: true
 

From 96ef47dd8bfc7db6316e571ca91bb90827757810 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Mon, 25 May 2026 22:30:38 -0600
Subject: [PATCH 30/35] refact: Moving back to plugin_config.yaml

---
 .../{transmission_plugin_config.yaml => plugin_config.yaml}       | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename compass/extraction/transmission/{transmission_plugin_config.yaml => plugin_config.yaml} (100%)

diff --git a/compass/extraction/transmission/transmission_plugin_config.yaml b/compass/extraction/transmission/plugin_config.yaml
similarity index 100%
rename from compass/extraction/transmission/transmission_plugin_config.yaml
rename to compass/extraction/transmission/plugin_config.yaml

From 17d5996e3943e865cce5da737657e62b663c7d92 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 3 Jun 2026 08:09:11 -0600
Subject: [PATCH 31/35] Registering transmission plugin

---
 compass/extraction/__init__.py              |  1 +
 compass/extraction/transmission/__init__.py | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 compass/extraction/transmission/__init__.py

diff --git a/compass/extraction/__init__.py b/compass/extraction/__init__.py
index fed7f61fe..172fcfc85 100644
--- a/compass/extraction/__init__.py
+++ b/compass/extraction/__init__.py
@@ -13,5 +13,6 @@
 from .ghp import COMPASSGeoHeatPumpExtractor
 from .small_wind import COMPASSSmallWindExtractor
 from .solar import COMPASSSolarExtractor
+from .transmission import COMPASSTransmissionExtractor
 from .water import TexasWaterRightsExtractor
 from .wind import COMPASSWindExtractor
diff --git a/compass/extraction/transmission/__init__.py b/compass/extraction/transmission/__init__.py
new file mode 100644
index 000000000..e0c458fc0
--- /dev/null
+++ b/compass/extraction/transmission/__init__.py
@@ -0,0 +1,12 @@
+"""COMPASS transmission extraction plugin"""
+
+import importlib.resources
+
+from compass.plugin import create_schema_based_one_shot_extraction_plugin
+
+
+COMPASSTransmissionExtractor = create_schema_based_one_shot_extraction_plugin(
+    importlib.resources.files("compass.extraction.transmission")
+    / "plugin_config.yaml",
+    tech="transmission",
+)

From 5a721cc15d7a293f0ff8e14210a2269402477216 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 3 Jun 2026 08:22:58 -0600
Subject: [PATCH 32/35] clean:

---
 compass/extraction/transmission/transmission_schema.json5 | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5
index cac429ca2..fc919663f 100644
--- a/compass/extraction/transmission/transmission_schema.json5
+++ b/compass/extraction/transmission/transmission_schema.json5
@@ -56,12 +56,6 @@
           "summary": {
             "type": "string",
             "description": "A short summary of the relevant ordinance requiremets."
-            /*
-            Byron's
-            "description": "A short summary with direct ordinance excerpts or quotes whenever possible. For qualitative features such as permitting, fencing, lighting, seismic monitoring, decommissioning, and prohibitions, this is the primary output field and should contain direct ordinance language. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders such as 'No explicit requirement found'; omit the feature instead when no requirement is present."
-            */
-            // "description": "A short summary of the relevant ordinance requirement using direct text excerpts and quotes as much as possible. If multiple options exist and a selection was made, list all other options and their conditions in the summary. For qualitative restrictions, this is the primary output field containing the full extracted text. Can be null if no requirement found."
-            //"description": "A short summary with direct ordinance excerpts/quotes whenever possible. For qualitative features (definitions, permitting, screening, inspection, decommissioning, prohibitions), this is the primary output field and should contain a direct ordinance excerpt. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders (for example, 'No explicit ... found'); omit the feature instead when no requirement is present."
           },
           "explanation": {
             "type": "string",

From a0a1040d40687cb5ed802ae2144ced731b955cc1 Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 3 Jun 2026 08:30:50 -0600
Subject: [PATCH 33/35] style:

---
 compass/qc/schema_eval.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
index 3ee7549c7..2635dc338 100644
--- a/compass/qc/schema_eval.py
+++ b/compass/qc/schema_eval.py
@@ -52,13 +52,13 @@
 class C:
     """Tiny ANSI colour helpers"""
 
-    GREEN  = "\033[92m"
-    RED    = "\033[91m"
+    GREEN = "\033[92m"
+    RED = "\033[91m"
     YELLOW = "\033[93m"
-    CYAN   = "\033[96m"
-    BOLD   = "\033[1m"
-    DIM    = "\033[2m"
-    RESET  = "\033[0m"
+    CYAN = "\033[96m"
+    BOLD = "\033[1m"
+    DIM = "\033[2m"
+    RESET = "\033[0m"
 
     @staticmethod
     def ok(s: str)   -> str: return f"{C.GREEN}{s}{C.RESET}"
@@ -149,9 +149,9 @@ def cmd_compare(
     label_a = Path(run_a_path).stem
     label_b = Path(run_b_path).stem
 
-    print(C.bold(f"\n{'='*70}"))
+    print(C.bold(f"\n{'=' * 70}"))
     print(C.bold(f"  Comparison: {label_a}  vs  {label_b}"))
-    print(C.bold(f"{'='*70}\n"))
+    print(C.bold(f"{'=' * 70}\n"))
 
     # Build key sets — tuples of (county, state, subdivision, feature)
     def key_set(df: pl.DataFrame) -> set[tuple]:
@@ -250,9 +250,9 @@ def _filter(df, c, s, sd, f):
             n_unchanged += 1
 
     # ── Comparison summary ───────────────────────────────────────
-    print(C.bold(f"\n{'─'*70}"))
+    print(C.bold(f"\n{'─' * 70}"))
     print(C.bold("  Comparison summary"))
-    print(f"{'─'*70}")
+    print(f"{'─' * 70}")
     print(f"  Rows only in {label_a}: {len(only_a)}")
     print(f"  Rows only in {label_b}: {len(only_b)}")
     print(f"  Shared rows, changed : {C.warn(str(n_changed))}")
@@ -270,9 +270,9 @@ def _print_ref_scoring(ref_path, df_a, df_b, label_a, label_b):
     """Score both runs against reference and show divergences"""
     ref = load_reference(ref_path)
 
-    print(C.bold(f"{'─'*70}"))
+    print(C.bold(f"{'─' * 70}"))
     print(C.bold("  Reference scoring"))
-    print(f"{'─'*70}\n")
+    print(f"{'─' * 70}\n")
 
     for label, df in [(label_a, df_a), (label_b, df_b)]:
         passed, total = score_run(ref, df.lazy())
@@ -355,11 +355,11 @@ def cmd_init(run_path: str, output_path: str):
 
     grouped: dict[str, dict[str, Any]] = {}
     for row in df.iter_rows(named=True):
-        county  = row["county"] or "unknown"
-        state   = row["state"]  or "unknown"
-        subdiv  = row.get("subdivision")
+        county = row["county"] or "unknown"
+        state = row["state"] or "unknown"
+        subdiv = row.get("subdivision")
         feature = row["feature"] or "unknown"
-        fips    = row.get("FIPS", "")
+        fips = row.get("FIPS", "")
 
         loc_key = location_label({
             "county": county, "state": state,
@@ -380,7 +380,7 @@ def cmd_init(run_path: str, output_path: str):
                 feat_entry[fld] = "not_null"
 
         grouped[loc_key]["features"][feature] = (
-            feat_entry if feat_entry else None
+            feat_entry or None
         )
 
     out = Path(output_path)

From dc77faa29e7b0bebd3e4654169e9236967441d1d Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 3 Jun 2026 08:32:06 -0600
Subject: [PATCH 34/35] style:

---
 compass/qc/schema_eval.py | 125 ++++++++++++++++++++++----------------
 1 file changed, 71 insertions(+), 54 deletions(-)

diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
index 2635dc338..58972fd45 100644
--- a/compass/qc/schema_eval.py
+++ b/compass/qc/schema_eval.py
@@ -61,13 +61,20 @@ class C:
     RESET = "\033[0m"
 
     @staticmethod
-    def ok(s: str)   -> str: return f"{C.GREEN}{s}{C.RESET}"
+    def ok(s: str) -> str:
+        return f"{C.GREEN}{s}{C.RESET}"
+
     @staticmethod
-    def fail(s: str) -> str: return f"{C.RED}{s}{C.RESET}"
+    def fail(s: str) -> str:
+        return f"{C.RED}{s}{C.RESET}"
+
     @staticmethod
-    def warn(s: str) -> str: return f"{C.YELLOW}{s}{C.RESET}"
+    def warn(s: str) -> str:
+        return f"{C.YELLOW}{s}{C.RESET}"
+
     @staticmethod
-    def bold(s: str) -> str: return f"{C.BOLD}{s}{C.RESET}"
+    def bold(s: str) -> str:
+        return f"{C.BOLD}{s}{C.RESET}"
 
 
 # ── Formatting helpers ───────────────────────────────────────────────
@@ -168,20 +175,14 @@ def key_set(df: pl.DataFrame) -> set[tuple]:
     if only_a or only_b:
         print(C.bold("  Row presence changes:"))
         if only_a:
-            print(
-                f"\n  {C.fail(f'Removed in {label_b}')}"
-                f" ({len(only_a)}):"
-            )
+            print(f"\n  {C.fail(f'Removed in {label_b}')} ({len(only_a)}):")
             for c, s, sd, f in sorted(only_a, key=_sortable_key):
                 loc = location_label(
                     {"county": c, "state": s, "subdivision": sd}
                 )
                 print(f"    − {loc} → {f}")
         if only_b:
-            print(
-                f"\n  {C.ok(f'Added in {label_b}')}"
-                f" ({len(only_b)}):"
-            )
+            print(f"\n  {C.ok(f'Added in {label_b}')} ({len(only_b)}):")
             for c, s, sd, f in sorted(only_b, key=_sortable_key):
                 loc = location_label(
                     {"county": c, "state": s, "subdivision": sd}
@@ -191,8 +192,7 @@ def key_set(df: pl.DataFrame) -> set[tuple]:
 
     # ── Field-level diff on shared rows ──────────────────────────
     compare_fields = [
-        f for f in ALL_CHECK_FIELDS
-        if f in df_a.columns and f in df_b.columns
+        f for f in ALL_CHECK_FIELDS if f in df_a.columns and f in df_b.columns
     ]
     n_changed = 0
     n_unchanged = 0
@@ -212,12 +212,12 @@ def _filter(df, c, s, sd, f):
                 mask &= pl.col("subdivision").is_null()
             return df.filter(mask)
 
-        row_a = _filter(
-            df_a, county, state, subdiv, feature
-        ).row(0, named=True)
-        row_b = _filter(
-            df_b, county, state, subdiv, feature
-        ).row(0, named=True)
+        row_a = _filter(df_a, county, state, subdiv, feature).row(
+            0, named=True
+        )
+        row_b = _filter(df_b, county, state, subdiv, feature).row(
+            0, named=True
+        )
 
         diffs: list[tuple[str, str | None, str | None]] = []
         for fld in compare_fields:
@@ -262,7 +262,11 @@ def _filter(df, c, s, sd, f):
     # ── Optional: score both against reference ───────────────────
     if ref_path:
         _print_ref_scoring(
-            ref_path, df_a, df_b, label_a, label_b,
+            ref_path,
+            df_a,
+            df_b,
+            label_a,
+            label_b,
         )
 
 
@@ -289,10 +293,7 @@ def _print_ref_scoring(ref_path, df_a, df_b, label_a, label_b):
             print(f"    {d['location']}  ·  {d['field']}")
             sa = C.ok("✓") if d["a_pass"] else C.fail("✗")
             sb = C.ok("✓") if d["b_pass"] else C.fail("✗")
-            print(
-                f"      {label_a}: {sa}  {label_b}: {sb}"
-                f"  — {d['detail']}"
-            )
+            print(f"      {label_a}: {sa}  {label_b}: {sb}  — {d['detail']}")
         print()
 
 
@@ -319,30 +320,46 @@ def _find_divergences(
 
             for fld, check in checks.items():
                 res_a = (
-                    run_checks(row_a, {fld: check}) if row_a
-                    else [CheckResult(
-                        fld, check["mode"], False,
-                        "", "(missing)", "row missing",
-                    )]
+                    run_checks(row_a, {fld: check})
+                    if row_a
+                    else [
+                        CheckResult(
+                            fld,
+                            check["mode"],
+                            False,
+                            "",
+                            "(missing)",
+                            "row missing",
+                        )
+                    ]
                 )
                 res_b = (
-                    run_checks(row_b, {fld: check}) if row_b
-                    else [CheckResult(
-                        fld, check["mode"], False,
-                        "", "(missing)", "row missing",
-                    )]
+                    run_checks(row_b, {fld: check})
+                    if row_b
+                    else [
+                        CheckResult(
+                            fld,
+                            check["mode"],
+                            False,
+                            "",
+                            "(missing)",
+                            "row missing",
+                        )
+                    ]
                 )
                 if res_a[0].passed != res_b[0].passed:
-                    divs.append({
-                        "location": feat_label,
-                        "field": fld,
-                        "a_pass": res_a[0].passed,
-                        "b_pass": res_b[0].passed,
-                        "detail": (
-                            f"A: {res_a[0].actual[:50]}"
-                            f"  B: {res_b[0].actual[:50]}"
-                        ),
-                    })
+                    divs.append(
+                        {
+                            "location": feat_label,
+                            "field": fld,
+                            "a_pass": res_a[0].passed,
+                            "b_pass": res_b[0].passed,
+                            "detail": (
+                                f"A: {res_a[0].actual[:50]}"
+                                f"  B: {res_b[0].actual[:50]}"
+                            ),
+                        }
+                    )
     return divs
 
 
@@ -361,10 +378,13 @@ def cmd_init(run_path: str, output_path: str):
         feature = row["feature"] or "unknown"
         fips = row.get("FIPS", "")
 
-        loc_key = location_label({
-            "county": county, "state": state,
-            "subdivision": subdiv,
-        })
+        loc_key = location_label(
+            {
+                "county": county,
+                "state": state,
+                "subdivision": subdiv,
+            }
+        )
 
         if loc_key not in grouped:
             grouped[loc_key] = {"FIPS": fips, "features": {}}
@@ -379,14 +399,11 @@ def cmd_init(run_path: str, output_path: str):
             if v:
                 feat_entry[fld] = "not_null"
 
-        grouped[loc_key]["features"][feature] = (
-            feat_entry or None
-        )
+        grouped[loc_key]["features"][feature] = feat_entry or None
 
     out = Path(output_path)
     lines = [
-        "# Reference template — generated from: "
-        + Path(run_path).name,
+        "# Reference template — generated from: " + Path(run_path).name,
         "# Review each entry and adjust match modes:",
         '#   exact value  →  value: "1500"',
         "#   keywords     →  summary:",

From 3551bf3b44f1e38ff0f3b3076c6322301fd45e6a Mon Sep 17 00:00:00 2001
From: Gui Castelao <guilherme@castelao.net>
Date: Wed, 3 Jun 2026 08:37:51 -0600
Subject: [PATCH 35/35] Moving QC module outside core COMPASS

Creating a new package just for those support functionalities.
---
 compass/qc/core.py        | 229 ----------------
 compass/qc/reference.py   | 286 --------------------
 compass/qc/schema_eval.py | 538 --------------------------------------
 3 files changed, 1053 deletions(-)
 delete mode 100644 compass/qc/core.py
 delete mode 100644 compass/qc/reference.py
 delete mode 100644 compass/qc/schema_eval.py

diff --git a/compass/qc/core.py b/compass/qc/core.py
deleted file mode 100644
index ee9e22dc5..000000000
--- a/compass/qc/core.py
+++ /dev/null
@@ -1,229 +0,0 @@
-"""
-core.py — Composable building blocks for matching and validation
-"""
-
-from __future__ import annotations
-
-import logging
-from collections.abc import Generator
-from dataclasses import dataclass
-
-import polars as pl
-
-from reference import location_label
-
-logger = logging.getLogger(__name__)
-
-
-# ── Data types ───────────────────────────────────────────────────────
-
-
-@dataclass
-class CheckResult:
-    """Outcome of a single field-level check"""
-
-    field: str
-    mode: str
-    passed: bool
-    expected: str
-    actual: str
-    detail: str = ""
-
-
-# ── Location-level functions ─────────────────────────────────────────
-
-
-def extract_locations(
-    lf: pl.LazyFrame,
-) -> set[tuple[str, str, str | None]]:
-    """
-    Collect distinct locations from a LazyFrame
-
-    Each location is a tuple of ``(state, county, subdivision)``
-    where *subdivision* is ``None`` for county-level records.
-
-    Parameters
-    ----------
-    lf : pl.LazyFrame
-        Lazy representation of run data containing at least
-        the columns ``state``, ``county``, and
-        ``subdivision``.
-
-    Returns
-    -------
-    set[tuple[str, str, str | None]]
-        Unique location tuples found in the data.
-    """
-    rows = (
-        lf.select("state", "county", "subdivision")
-        .unique()
-        .collect()
-        .iter_rows()
-    )
-    return {(state, county, subdiv) for state, county, subdiv in rows}
-
-
-def find_missing_locations(
-    ref: dict[str, dict],
-    lf: pl.LazyFrame,
-) -> list[dict]:
-    """
-    Find reference locations absent from the target
-
-    Compares the locations declared in *ref* against the
-    distinct locations present in *lf*.  Returns the
-    reference entries whose geographic key (state, county,
-    subdivision) has no matching rows in the target.
-
-    Parameters
-    ----------
-    ref : dict[str, dict]
-        Reference dict as returned by
-        ``reference.load_reference()``.
-    lf : pl.LazyFrame
-        Lazy representation of the target run data.
-
-    Returns
-    -------
-    list[dict]
-        The ``loc_data`` dicts for each reference location
-        not found in the target, in the order they appear
-        in *ref*.  Empty list when all reference locations
-        are present.
-    """
-    target_locs = extract_locations(lf)
-
-    missing = []
-    for _loc_key, loc_data in ref.items():
-        loc_tuple = (
-            loc_data["state"],
-            loc_data["county"],
-            loc_data.get("subdivision"),
-        )
-        if loc_tuple not in target_locs:
-            missing.append(loc_data)
-    return missing
-
-
-def match_labels(
-    ref: dict[str, dict],
-    lf: pl.LazyFrame,
-) -> Generator[tuple[dict, pl.DataFrame], None, None]:
-    """Pair ground-truth locations with matching run rows
-
-    Iterates over each location in *ref*, builds a
-    geographic filter (state, county, and subdivision when
-    defined), applies it to *lf*, and collects the result.
-    When the reference entry declares a FIPS code, the matched
-    rows are checked for agreement; mismatches are reported
-    via ``logging.error``.
-
-    Parameters
-    ----------
-    ref : dict[str, dict]
-        Reference dict as returned by
-        ``reference.load_reference()``, keyed by normalised
-        location string.
-    lf : pl.LazyFrame
-        Lazy representation of a run CSV, as produced by
-        ``load_run(path).lazy()``.
-
-    Yields
-    ------
-    tuple[dict, pl.DataFrame]
-        A pair ``(loc_data, loc_df)`` for each location in
-        *ref*:
-
-        loc_data
-            The reference dict for one location, containing
-            state, county, subdivision, FIPS, and features
-            with their check specs.
-        loc_df
-            Collected DataFrame with every run row that
-            matches the location geographically.  May be
-            empty when the run has no data for that
-            location.
-    """
-    for _loc_key, loc_data in ref.items():
-        mask = (
-            (pl.col("county") == loc_data["county"])
-            & (pl.col("state") == loc_data["state"])
-        )
-        subdiv = loc_data.get("subdivision")
-        if subdiv:
-            mask = mask & (pl.col("subdivision") == subdiv)
-        else:
-            mask = mask & pl.col("subdivision").is_null()
-
-        loc_df = lf.filter(mask).collect()
-
-        # Validate FIPS agreement
-        expected_fips = loc_data.get("FIPS")
-        if expected_fips is not None and not loc_df.is_empty():
-            run_fips = loc_df["FIPS"].unique().to_list()
-            mismatched = [
-                f for f in run_fips
-                if f is not None and f != expected_fips
-            ]
-            if mismatched:
-                loc_lbl = location_label(loc_data)
-                logger.error(
-                    "FIPS mismatch for %s: reference declares %s, "
-                    "run contains %s",
-                    loc_lbl, expected_fips, mismatched,
-                )
-
-        yield loc_data, loc_df
-
-
-def find_missing_features(
-    loc_data: dict,
-    lf: pl.LazyFrame,
-) -> list[str]:
-    """
-    Find features declared in the reference but absent from the run.
-
-    Compares the feature names listed in *loc_data* against
-    the distinct ``feature`` values present in *lf*.  Any
-    feature that appears in the reference but has no matching
-    row in the run is considered missing.
-
-    Designed to compose with :func:`match_labels`::
-
-        for loc_data, loc_df in match_labels(ref, run_lf):
-            missing = find_missing_features(
-                loc_data, loc_df.lazy(),
-            )
-
-    Parameters
-    ----------
-    loc_data : dict
-        Truth dict for a single location, as yielded by
-        :func:`match_labels`.  Must contain a ``features``
-        key mapping feature names to check specs (which
-        may be empty dicts for presence-only features).
-    lf : pl.LazyFrame
-        Lazy representation of the run rows already scoped
-        to this location.
-
-    Returns
-    -------
-    list[str]
-        Feature names present in the reference but not found
-        in the run, in the order they appear in
-        ``loc_data["features"]``.  Empty list when all
-        features are present.
-    """
-    expected = set(loc_data.get("features", {}).keys())
-    if not expected:
-        return []
-
-    present = set(
-        lf.select("feature")
-        .unique()
-        .collect()
-        .get_column("feature")
-        .to_list()
-    )
-
-    return [f for f in loc_data["features"] if f not in present]
diff --git a/compass/qc/reference.py b/compass/qc/reference.py
deleted file mode 100644
index dc6f0a11b..000000000
--- a/compass/qc/reference.py
+++ /dev/null
@@ -1,286 +0,0 @@
-"""
-reference.py — Load, merge, and validate reference YAML files.
-
-Handles single files, directories (recursive), and duplicate detection
-both within and across files.
-
-Location keys support two granularities:
-
-  County level  : "County, State"                → e.g. "Power, Idaho"
-  Township level: "Subdivision, County, State"    → e.g. "Springfield, Power, Idaho"
-
-load_reference() returns a dict grouped by location rather than a flat list,
-so downstream code can process and summarise per-location.
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-
-import yaml
-
-# ── Field definitions ────────────────────────────────────────────────────────
-# These define which CSV columns the reference format understands.
-
-# Columns where exact string match is the natural comparison
-EXACT_FIELDS = ["value", "units", "adder", "min_dist", "max_dist", "year"]
-
-# Columns where text / keyword / not-null matching makes more sense
-TEXT_FIELDS = ["summary", "section", "source"]
-
-ALL_CHECK_FIELDS = EXACT_FIELDS + TEXT_FIELDS
-
-# ── Exceptions ───────────────────────────────────────────────────────────────
-
-
-class DuplicateLocationError(Exception):
-    """Raised when the same location key appears in more than one place."""
-
-
-# ── Location key parsing ────────────────────────────────────────────────────
-
-
-def parse_location_key(key: str) -> dict[str, str | None]:
-    """
-    Parse a YAML location key into its component parts.
-
-    Supports:
-      "County, State"                → county-level
-      "Subdivision, County, State"   → township-level
-
-    Returns a dict with keys: state, county, subdivision (None if county-level).
-    """
-    parts = [p.strip() for p in key.split(",")]
-
-    if len(parts) == 2:
-        return {
-            "state": parts[1].lower(),
-            "county": parts[0].lower(),
-            "subdivision": None,
-        }
-    elif len(parts) == 3:
-        return {
-            "state": parts[2].lower(),
-            "county": parts[1].lower(),
-            "subdivision": parts[0].lower(),
-        }
-    else:
-        raise ValueError(
-            f"Location key must have 2 parts (County, State) or "
-            f"3 parts (Subdivision, County, State), got {len(parts)}: '{key}'"
-        )
-
-
-def location_label(loc: dict[str, str | None]) -> str:
-    """
-    Build a human-readable label from parsed location components.
-
-    Returns "County, State" or "Subdivision, County, State".
-    """
-    parts = []
-    if loc.get("subdivision"):
-        parts.append(loc["subdivision"].title())
-    parts.append(loc["county"].title())
-    parts.append(loc["state"].title())
-    return ", ".join(parts)
-
-
-# ── File collection ──────────────────────────────────────────────────────────
-
-
-def collect_reference_files(path: str | Path) -> list[Path]:
-    """
-    Walk *path* and return every .yaml / .yml file found.
-
-    If *path* is a single file, return it in a one-element list.
-    If *path* is a directory, recurse into all sub-folders (sorted for
-    deterministic ordering).
-    """
-    p = Path(path)
-    if p.is_file():
-        return [p]
-    if p.is_dir():
-        files = sorted(p.rglob("*.yaml")) + sorted(p.rglob("*.yml"))
-        # rglob may return .yml files that also matched .yaml; deduplicate
-        seen: set[Path] = set()
-        unique: list[Path] = []
-        for f in files:
-            resolved = f.resolve()
-            if resolved not in seen:
-                seen.add(resolved)
-                unique.append(f)
-        return unique
-    raise FileNotFoundError(f"Reference path not found: {p}")
-
-
-# ── Merging & duplicate detection ────────────────────────────────────────────
-
-
-def merge_reference_dicts(files: list[Path]) -> dict:
-    """
-    Load every YAML file and merge into one dict.
-
-    Raises DuplicateLocationError if a top-level location key (e.g.
-    "Power, Idaho") appears more than once — whether across different
-    files or duplicated inside the same file.
-    """
-    merged: dict = {}
-    # Track where each key was first seen for the error message
-    origin: dict[str, Path] = {}
-
-    for fpath in files:
-        raw = yaml.safe_load(fpath.read_text())
-        if raw is None:
-            continue
-        if not isinstance(raw, dict):
-            raise ValueError(
-                f"Expected a YAML mapping at the top level of {fpath}, "
-                f"got {type(raw).__name__}"
-            )
-
-        # Check for intra-file duplicates.  PyYAML silently keeps the last
-        # occurrence when a key is repeated, so we do a quick text-level
-        # scan to catch that case before it's swallowed.
-        _check_intra_file_duplicates(fpath)
-
-        for key in raw:
-            norm = _normalise_location_key(key)
-            if norm in origin:
-                raise DuplicateLocationError(
-                    f"Duplicate location '{key}' — already defined in "
-                    f"{origin[norm]}, found again in {fpath}"
-                )
-            origin[norm] = fpath
-            merged[key] = raw[key]
-
-    return merged
-
-
-def _normalise_location_key(key: str) -> str:
-    """Lowercase + strip so 'Power, Idaho' and ' power , idaho ' collide."""
-    return ", ".join(p.strip().lower() for p in key.split(","))
-
-
-def _check_intra_file_duplicates(fpath: Path) -> None:
-    """
-    Detect duplicate top-level keys inside a single YAML file.
-
-    PyYAML's safe_load silently drops all-but-the-last duplicate key,
-    so we scan the raw text for top-level keys (lines that start at
-    column 0 and end with ':') and flag repeats.
-    """
-    seen: dict[str, int] = {}
-    for lineno, line in enumerate(fpath.read_text().splitlines(), start=1):
-        stripped = line.rstrip()
-        # Skip blank lines, comments, and indented lines
-        if not stripped or stripped.startswith("#") or line[0] in (" ", "\t"):
-            continue
-        # A top-level key line looks like  `"Power, Idaho":` or `Power, Idaho:`
-        if stripped.endswith(":"):
-            raw_key = stripped[:-1].strip().strip('"').strip("'")
-            norm = _normalise_location_key(raw_key)
-            if norm in seen:
-                raise DuplicateLocationError(
-                    f"Duplicate location '{raw_key}' inside {fpath} "
-                    f"(lines {seen[norm]} and {lineno})"
-                )
-            seen[norm] = lineno
-
-
-# ── Check-spec builder (internal) ───────────────────────────────────────────
-
-
-def _build_checks(field_checks: dict) -> dict[str, dict]:
-    """
-    Convert a raw YAML feature block into a checks dict.
-
-    Each key is a field name, each value is a dict describing the match mode.
-    """
-    checks: dict[str, dict] = {}
-    for fld, spec in field_checks.items():
-        if fld not in ALL_CHECK_FIELDS:
-            continue
-        if isinstance(spec, dict) and "keywords" in spec:
-            checks[fld] = {
-                "mode": "keywords",
-                "keywords": [str(k).lower() for k in spec["keywords"]],
-            }
-        elif spec == "not_null":
-            checks[fld] = {"mode": "not_null"}
-        elif spec == "absent":
-            checks[fld] = {"mode": "absent"}
-        else:
-            checks[fld] = {"mode": "exact", "expected": str(spec).strip().lower()}
-    return checks
-
-
-# ── Main loader ──────────────────────────────────────────────────────────────
-
-
-def load_reference(path: str | Path) -> dict[str, dict]:
-    """
-    Parse reference YAML(s) into a dict grouped by location.
-
-    *path* can be:
-      - a single .yaml / .yml file
-      - a directory — every .yaml / .yml underneath is collected and merged
-
-    Raises DuplicateLocationError if any location key appears more than once.
-
-    Returns a dict keyed by normalised location string::
-
-        {
-            "power, idaho": {
-                "state": "idaho",
-                "county": "power",
-                "subdivision": None,
-                "FIPS": "16077",
-                "features": {
-                    "residential buildings": {
-                        "value": {"mode": "exact", "expected": "1500"},
-                        "units": {"mode": "exact", "expected": "feet"},
-                        "summary": {"mode": "keywords", "keywords": [...]},
-                        ...
-                    },
-                    "property lines": { ... },
-                }
-            },
-            "springfield, power, idaho": {
-                "state": "idaho",
-                "county": "power",
-                "subdivision": "springfield",
-                ...
-            },
-        }
-    """
-    files = collect_reference_files(path)
-    if not files:
-        raise FileNotFoundError(f"No .yaml / .yml files found under {path}")
-
-    raw = merge_reference_dicts(files)
-    result: dict[str, dict] = {}
-
-    for location_key, loc_data in raw.items():
-        loc = parse_location_key(location_key)
-        norm_key = _normalise_location_key(location_key)
-
-        fips = loc_data.get("FIPS")
-        raw_features = loc_data.get("features", {})
-
-        parsed_features: dict[str, dict] = {}
-        for feat_name, field_checks in raw_features.items():
-            if field_checks is None:
-                # Feature listed with no arguments — still
-                # track it so presence checks can catch it.
-                parsed_features[feat_name.lower()] = {}
-                continue
-            checks = _build_checks(field_checks)
-            parsed_features[feat_name.lower()] = checks
-
-        result[norm_key] = {
-            **loc,
-            "FIPS": str(fips) if fips is not None else None,
-            "features": parsed_features,
-        }
-
-    return result
diff --git a/compass/qc/schema_eval.py b/compass/qc/schema_eval.py
deleted file mode 100644
index 58972fd45..000000000
--- a/compass/qc/schema_eval.py
+++ /dev/null
@@ -1,538 +0,0 @@
-"""
-schema_eval.py — CLI entry point for extraction evaluation
-
-Subcommands
------------
-  init      Scaffold a reference YAML from an existing CSV run.
-  validate  Score one CSV run against reference.
-  compare   Diff two CSV runs; optionally score both against reference.
-
-Examples
---------
-  python schema_eval.py init run1.csv -o reference.yaml
-  python schema_eval.py validate run1.csv -t reference.yaml
-  python schema_eval.py compare run1.csv run2.csv -t reference.yaml
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-from typing import Any
-
-import click
-import polars as pl
-
-from core import (
-    CheckResult,
-    extract_locations,
-    find_feature_row,
-    find_missing_features,
-    find_missing_locations,
-    load_run,
-    match_labels,
-    run_checks,
-    score_run,
-    validate_formated,
-)
-from reference import (
-    ALL_CHECK_FIELDS,
-    EXACT_FIELDS,
-    TEXT_FIELDS,
-    load_reference,
-    location_label,
-)
-
-# ── Constants ────────────────────────────────────────────────────────
-
-KEY_COLS = ["county", "state", "subdivision", "feature"]
-
-# ── ANSI helpers ─────────────────────────────────────────────────────
-
-
-class C:
-    """Tiny ANSI colour helpers"""
-
-    GREEN = "\033[92m"
-    RED = "\033[91m"
-    YELLOW = "\033[93m"
-    CYAN = "\033[96m"
-    BOLD = "\033[1m"
-    DIM = "\033[2m"
-    RESET = "\033[0m"
-
-    @staticmethod
-    def ok(s: str) -> str:
-        return f"{C.GREEN}{s}{C.RESET}"
-
-    @staticmethod
-    def fail(s: str) -> str:
-        return f"{C.RED}{s}{C.RESET}"
-
-    @staticmethod
-    def warn(s: str) -> str:
-        return f"{C.YELLOW}{s}{C.RESET}"
-
-    @staticmethod
-    def bold(s: str) -> str:
-        return f"{C.BOLD}{s}{C.RESET}"
-
-
-# ── Formatting helpers ───────────────────────────────────────────────
-
-
-def _truncate(s: str | None, n: int) -> str:
-    """Shorten a string for display"""
-    if s is None:
-        return "(null)"
-    return s[:n] + "…" if len(s) > n else s
-
-
-def _pct_color(pct: float) -> callable:
-    """Pick a colour function based on percentage thresholds"""
-    if pct >= 90:
-        return C.ok
-    return C.warn if pct >= 70 else C.fail
-
-
-def _sortable_key(t: tuple) -> tuple:
-    """Replace None with '' so tuples are sortable"""
-    return tuple(v if v is not None else "" for v in t)
-
-
-# ── Validate subcommand ──────────────────────────────────────────────
-
-
-def cmd_validate(
-    run_path: str,
-    ref_path: str,
-    verbose: bool = False,
-    output_format: str = "text",
-):
-    """Validate a run against reference and return formatted output
-
-    Parameters
-    ----------
-    run_path : str
-        Path to the CSV run file to validate.
-    ref_path : str
-        Path to the reference YAML file or directory.
-    verbose : bool, default=False
-        Include passing checks in text output. By default, False.
-    output_format : str, default="text"
-        Output format to render. Supported values are ``"text"``
-        and ``"json"``. By default, text.
-
-    Returns
-    -------
-    str
-        Rendered validation report as text or JSON string.
-    """
-    lf = load_run(run_path)
-    ref = load_reference(ref_path)
-
-    return validate_formated(
-        ref,
-        lf,
-        run_path,
-        ref_path,
-        output_format=output_format,
-        verbose=verbose,
-        style=C,
-    )
-
-
-# ── Compare subcommand ───────────────────────────────────────────────
-
-
-def cmd_compare(
-    run_a_path: str,
-    run_b_path: str,
-    ref_path: str | None = None,
-    verbose: bool = False,
-):
-    df_a = load_run(run_a_path).collect()
-    df_b = load_run(run_b_path).collect()
-
-    label_a = Path(run_a_path).stem
-    label_b = Path(run_b_path).stem
-
-    print(C.bold(f"\n{'=' * 70}"))
-    print(C.bold(f"  Comparison: {label_a}  vs  {label_b}"))
-    print(C.bold(f"{'=' * 70}\n"))
-
-    # Build key sets — tuples of (county, state, subdivision, feature)
-    def key_set(df: pl.DataFrame) -> set[tuple]:
-        return set(df.select(KEY_COLS).unique().iter_rows())
-
-    keys_a = key_set(df_a)
-    keys_b = key_set(df_b)
-
-    only_a = keys_a - keys_b
-    only_b = keys_b - keys_a
-    common = keys_a & keys_b
-
-    # ── Row-presence diff ────────────────────────────────────────
-    if only_a or only_b:
-        print(C.bold("  Row presence changes:"))
-        if only_a:
-            print(f"\n  {C.fail(f'Removed in {label_b}')} ({len(only_a)}):")
-            for c, s, sd, f in sorted(only_a, key=_sortable_key):
-                loc = location_label(
-                    {"county": c, "state": s, "subdivision": sd}
-                )
-                print(f"    − {loc} → {f}")
-        if only_b:
-            print(f"\n  {C.ok(f'Added in {label_b}')} ({len(only_b)}):")
-            for c, s, sd, f in sorted(only_b, key=_sortable_key):
-                loc = location_label(
-                    {"county": c, "state": s, "subdivision": sd}
-                )
-                print(f"    + {loc} → {f}")
-        print()
-
-    # ── Field-level diff on shared rows ──────────────────────────
-    compare_fields = [
-        f for f in ALL_CHECK_FIELDS if f in df_a.columns and f in df_b.columns
-    ]
-    n_changed = 0
-    n_unchanged = 0
-
-    for key in sorted(common, key=_sortable_key):
-        county, state, subdiv, feature = key
-
-        def _filter(df, c, s, sd, f):
-            mask = (
-                (pl.col("county") == c)
-                & (pl.col("state") == s)
-                & (pl.col("feature") == f)
-            )
-            if sd:
-                mask &= pl.col("subdivision") == sd
-            else:
-                mask &= pl.col("subdivision").is_null()
-            return df.filter(mask)
-
-        row_a = _filter(df_a, county, state, subdiv, feature).row(
-            0, named=True
-        )
-        row_b = _filter(df_b, county, state, subdiv, feature).row(
-            0, named=True
-        )
-
-        diffs: list[tuple[str, str | None, str | None]] = []
-        for fld in compare_fields:
-            va = row_a.get(fld)
-            vb = row_b.get(fld)
-            na = va.strip().lower() if va else None
-            nb = vb.strip().lower() if vb else None
-            if na != nb:
-                diffs.append((fld, va, vb))
-
-        loc = location_label(
-            {"county": county, "state": state, "subdivision": subdiv}
-        )
-        label = f"{loc} → {feature}"
-
-        if diffs:
-            n_changed += 1
-            print(f"  {C.warn('CHANGED')}  {label}")
-            for fld, va, vb in diffs:
-                va_d = _truncate(va, 40) if va else "(null)"
-                vb_d = _truncate(vb, 40) if vb else "(null)"
-                print(
-                    f"           {C.DIM}├─{C.RESET} {fld}:"
-                    f" {C.fail(va_d)} → {C.ok(vb_d)}"
-                )
-        elif verbose:
-            n_unchanged += 1
-            print(f"  {C.DIM}SAME{C.RESET}     {label}")
-        else:
-            n_unchanged += 1
-
-    # ── Comparison summary ───────────────────────────────────────
-    print(C.bold(f"\n{'─' * 70}"))
-    print(C.bold("  Comparison summary"))
-    print(f"{'─' * 70}")
-    print(f"  Rows only in {label_a}: {len(only_a)}")
-    print(f"  Rows only in {label_b}: {len(only_b)}")
-    print(f"  Shared rows, changed : {C.warn(str(n_changed))}")
-    print(f"  Shared rows, same    : {n_unchanged}")
-    print()
-
-    # ── Optional: score both against reference ───────────────────
-    if ref_path:
-        _print_ref_scoring(
-            ref_path,
-            df_a,
-            df_b,
-            label_a,
-            label_b,
-        )
-
-
-def _print_ref_scoring(ref_path, df_a, df_b, label_a, label_b):
-    """Score both runs against reference and show divergences"""
-    ref = load_reference(ref_path)
-
-    print(C.bold(f"{'─' * 70}"))
-    print(C.bold("  Reference scoring"))
-    print(f"{'─' * 70}\n")
-
-    for label, df in [(label_a, df_a), (label_b, df_b)]:
-        passed, total = score_run(ref, df.lazy())
-        pct = (passed / total * 100) if total else 0
-        clr = _pct_color(pct)
-        print(f"  {label:.<40s} {clr(f'{passed}/{total}')} ({pct:.1f}%)")
-
-    print()
-
-    divergences = _find_divergences(ref, df_a, df_b)
-    if divergences:
-        print(f"  {C.bold('Divergent reference results')}:\n")
-        for d in divergences:
-            print(f"    {d['location']}  ·  {d['field']}")
-            sa = C.ok("✓") if d["a_pass"] else C.fail("✗")
-            sb = C.ok("✓") if d["b_pass"] else C.fail("✗")
-            print(f"      {label_a}: {sa}  {label_b}: {sb}  — {d['detail']}")
-        print()
-
-
-def _find_divergences(
-    ref: dict[str, dict],
-    df_a: pl.DataFrame,
-    df_b: pl.DataFrame,
-) -> list[dict]:
-    """Find checks where two runs disagree against the reference"""
-    slices_a = {
-        location_label(loc_data): loc_df
-        for loc_data, loc_df in match_labels(ref, df_a.lazy())
-    }
-
-    divs = []
-    for loc_data, loc_df_b in match_labels(ref, df_b.lazy()):
-        loc_lbl = location_label(loc_data)
-        loc_df_a = slices_a.get(loc_lbl, pl.DataFrame())
-
-        for feat_name, checks in loc_data["features"].items():
-            feat_label = f"{loc_lbl} → {feat_name}"
-            row_a = find_feature_row(loc_df_a, feat_name)
-            row_b = find_feature_row(loc_df_b, feat_name)
-
-            for fld, check in checks.items():
-                res_a = (
-                    run_checks(row_a, {fld: check})
-                    if row_a
-                    else [
-                        CheckResult(
-                            fld,
-                            check["mode"],
-                            False,
-                            "",
-                            "(missing)",
-                            "row missing",
-                        )
-                    ]
-                )
-                res_b = (
-                    run_checks(row_b, {fld: check})
-                    if row_b
-                    else [
-                        CheckResult(
-                            fld,
-                            check["mode"],
-                            False,
-                            "",
-                            "(missing)",
-                            "row missing",
-                        )
-                    ]
-                )
-                if res_a[0].passed != res_b[0].passed:
-                    divs.append(
-                        {
-                            "location": feat_label,
-                            "field": fld,
-                            "a_pass": res_a[0].passed,
-                            "b_pass": res_b[0].passed,
-                            "detail": (
-                                f"A: {res_a[0].actual[:50]}"
-                                f"  B: {res_b[0].actual[:50]}"
-                            ),
-                        }
-                    )
-    return divs
-
-
-# ── Init subcommand ──────────────────────────────────────────────────
-
-
-def cmd_init(run_path: str, output_path: str):
-    """Generate a reference YAML template from an existing CSV run"""
-    df = load_run(run_path).collect()
-
-    grouped: dict[str, dict[str, Any]] = {}
-    for row in df.iter_rows(named=True):
-        county = row["county"] or "unknown"
-        state = row["state"] or "unknown"
-        subdiv = row.get("subdivision")
-        feature = row["feature"] or "unknown"
-        fips = row.get("FIPS", "")
-
-        loc_key = location_label(
-            {
-                "county": county,
-                "state": state,
-                "subdivision": subdiv,
-            }
-        )
-
-        if loc_key not in grouped:
-            grouped[loc_key] = {"FIPS": fips, "features": {}}
-
-        feat_entry: dict[str, Any] = {}
-        for fld in EXACT_FIELDS:
-            v = row.get(fld)
-            if v:
-                feat_entry[fld] = v
-        for fld in TEXT_FIELDS:
-            v = row.get(fld)
-            if v:
-                feat_entry[fld] = "not_null"
-
-        grouped[loc_key]["features"][feature] = feat_entry or None
-
-    out = Path(output_path)
-    lines = [
-        "# Reference template — generated from: " + Path(run_path).name,
-        "# Review each entry and adjust match modes:",
-        '#   exact value  →  value: "1500"',
-        "#   keywords     →  summary:",
-        "#                     keywords: [word1, word2]",
-        "#   not_null     →  section: not_null",
-        "#   absent       →  adder: absent",
-        "#   remove line  →  field won't be checked",
-        "#",
-        "# Location keys:",
-        '#   County level   →  "County, State"',
-        '#   Township level →  "Subdivision, County, State"',
-        "",
-    ]
-
-    for loc_key in sorted(grouped):
-        data = grouped[loc_key]
-        lines.append(f'"{loc_key}":')
-        if data["FIPS"]:
-            lines.append(f'  FIPS: "{data["FIPS"]}"')
-        lines.append("  features:")
-        for feat_name in sorted(data["features"]):
-            lines.append("")
-            lines.append(f"    {feat_name}:")
-            feat = data["features"][feat_name]
-            if feat is None:
-                lines.append("      # (no fields extracted)")
-                continue
-            for fld, val in feat.items():
-                if val == "not_null":
-                    lines.append(f"      {fld}: not_null")
-                else:
-                    lines.append(f'      {fld}: "{val}"')
-        lines.append("")
-
-    out.write_text("\n".join(lines))
-    print(f"\n  {C.ok('✓')} Template written to {C.bold(str(out))}")
-    print(
-        f"  {C.DIM}Edit the file to set expected values"
-        f" and match modes.{C.RESET}\n"
-    )
-
-
-# ── CLI ──────────────────────────────────────────────────────────────
-
-
-@click.group(
-    epilog=__doc__,
-    context_settings={"help_option_names": ["-h", "--help"]},
-)
-def main():
-    """Evaluate and compare LLM extraction runs against reference"""
-
-
-@main.command("init")
-@click.argument("run")
-@click.option(
-    "-o",
-    "--output",
-    "output_path",
-    default="ground_truth.yaml",
-    show_default=True,
-    help="Output YAML path",
-)
-def init_command(run: str, output_path: str):
-    """Scaffold reference YAML from a CSV run"""
-    cmd_init(run, output_path)
-
-
-@main.command("validate")
-@click.argument("run")
-@click.option(
-    "-t",
-    "--ref",
-    "ref_path",
-    required=True,
-    help="Path to reference YAML file or directory",
-)
-@click.option(
-    "-v",
-    "--verbose",
-    is_flag=True,
-    default=False,
-    help="Show passing checks too",
-)
-@click.option(
-    "-f",
-    "--format",
-    "output_format",
-    type=click.Choice(["text", "json"]),
-    default="text",
-    show_default=True,
-    help="Output format for validation report",
-)
-def validate_command(
-    run: str,
-    ref_path: str,
-    verbose: bool,
-    output_format: str,
-):
-    """Validate a CSV run against reference"""
-    print(cmd_validate(run, ref_path, verbose, output_format))
-
-
-@main.command("compare")
-@click.argument("run_a")
-@click.argument("run_b")
-@click.option(
-    "-t",
-    "--ref",
-    "ref_path",
-    default=None,
-    help="Optional reference YAML file or directory",
-)
-@click.option(
-    "-v",
-    "--verbose",
-    is_flag=True,
-    default=False,
-    help="Show unchanged rows too",
-)
-def compare_command(
-    run_a: str,
-    run_b: str,
-    ref_path: str | None,
-    verbose: bool,
-):
-    """Compare two CSV runs"""
-    cmd_compare(run_a, run_b, ref_path, verbose)
-
-
-if __name__ == "__main__":
-    main()