Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"units",
"section",
"summary",
"ord_text",
"explanation"
],
"additionalProperties": false,
Expand Down Expand Up @@ -88,6 +89,10 @@
"type": "string",
"description": "A short summary with direct ordinance excerpts or quotes whenever possible. For qualitative features such as permitting, fencing, lighting, seismic monitoring, decommissioning, and prohibitions, this is the primary output field and should contain direct ordinance language. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders such as 'No explicit requirement found'; omit the feature instead when no requirement is present."
},
"ord_text": {
"type": "string",
"description": "Copy-and-paste directly from the source document the first sentence corresponding to the ordinance being extracted. For quantitative features, this is the full sentence from the source that contains the value being extracted. For qualitative features, this is the first full sentence of the text that relates to the ordinance being extracted. Reproduce the sentence verbatim from the source text with no paraphrasing, summarizing, added context, or commentary. Must be a non-null, non-empty string."
},
"explanation": {
"type": "string",
"description": "Brief rationale explaining why this row matches the selected feature under this schema. Reference the specific evidence in summary and how it supports the extracted value and units or, for qualitative features, the inclusion criteria. Must be a non-null, non-empty string and must not use absence placeholders."
Expand Down Expand Up @@ -250,6 +255,7 @@
"units": "feet",
"section": "Section 8.4 - Setbacks",
"summary": "'Geothermal production wells and associated facilities shall be set back at least 500 feet from all property lines.'",
"ord_text": "Geothermal production wells and associated facilities shall be set back at least 500 feet from all property lines.",
"explanation": "The excerpt states an explicit numeric minimum separation from property lines, so it maps directly to 'property lines distance' with value 500 feet."
},
{
Expand All @@ -261,6 +267,7 @@
"units": null,
"section": "Table 4 - Allowed Uses",
"summary": "'Geothermal power plants may be approved as conditional uses in the Industrial and Agricultural districts subject to county review.'",
"ord_text": "Geothermal power plants may be approved as conditional uses in the Industrial and Agricultural districts subject to county review.",
"explanation": "The ordinance explicitly lists two districts where geothermal power plants are allowed only through conditional-use approval, so this belongs under 'special use districts' with the district names preserved as an array."
},
{
Expand All @@ -272,6 +279,7 @@
"units": null,
"section": "Section 5.2 - Approval Process",
"summary": "'A conditional use permit and county drilling permit shall be obtained prior to the construction or operation of any geothermal power plant or exploratory well.'",
"ord_text": "A conditional use permit and county drilling permit shall be obtained prior to the construction or operation of any geothermal power plant or exploratory well.",
"explanation": "The clause explicitly requires two project approvals before geothermal development can start, so it belongs under 'required permits' with both permit names preserved as an array."
},
{
Expand All @@ -280,6 +288,7 @@
"units": "HH:MM (24-hour)",
"section": "Section 6.7 - Drilling Operations",
"summary": "'Routine geothermal drilling activities may occur only between 7:00 a.m. and 7:00 p.m., Monday through Saturday, and shall not occur on Sundays or legal holidays except in an emergency.'",
"ord_text": "Routine geothermal drilling activities may occur only between 7:00 a.m. and 7:00 p.m., Monday through Saturday, and shall not occur on Sundays or legal holidays except in an emergency.",
"explanation": "The ordinance gives an explicit drilling window beginning at 7:00 a.m., normalized to 24-hour time as 07:00."
},
{
Expand All @@ -288,6 +297,7 @@
"units": "HH:MM (24-hour)",
"section": "Section 6.7 - Drilling Operations",
"summary": "'Routine geothermal drilling activities may occur only between 7:00 a.m. and 7:00 p.m., Monday through Saturday, and shall not occur on Sundays or legal holidays except in an emergency.'",
"ord_text": "Routine geothermal drilling activities may occur only between 7:00 a.m. and 7:00 p.m., Monday through Saturday, and shall not occur on Sundays or legal holidays except in an emergency.",
"explanation": "The ordinance gives an explicit drilling window ending at 7:00 p.m., normalized to 24-hour time as 19:00."
},
{
Expand All @@ -296,6 +306,7 @@
"units": null,
"section": "Section 9.3 - Financial Assurance",
"summary": "'Prior to permit issuance, the operator shall provide financial assurance in a form acceptable to the state oil, gas, and geothermal agency in an amount sufficient to cover plugging, abandonment, reclamation, and decommissioning costs as determined by the agency engineer.'",
"ord_text": "Prior to permit issuance, the operator shall provide financial assurance in a form acceptable to the state oil, gas, and geothermal agency in an amount sufficient to cover plugging, abandonment, reclamation, and decommissioning costs as determined by the agency engineer.",
"explanation": "The clause imposes an enforceable financial assurance requirement but leaves the amount to an agency-determined formula, so it fits 'bond requirement' with value and units set to null."
}
]
Expand All @@ -309,6 +320,7 @@
"For any numeric feature, the summary must support the same requirement that produced value and units for that row. Never pair a numeric value from one clause with qualitative-only language from another clause that has no numeric threshold.",
"Standardize units in the units field using this schema's canonical vocabulary, while preserving ordinance-specific wording in summary.",
"Summary is the primary data carrier for all features in this schema; every row must have a non-null, non-empty string for summary.",
"Every row must include an ord_text that is the first full sentence from the source document corresponding to the ordinance being extracted, copied verbatim. For quantitative features, ord_text is the full source sentence that contains the value being extracted; for qualitative features, ord_text is the first full sentence of the source text that relates to the ordinance being extracted. Unlike summary, ord_text must be a single contiguous sentence reproduced exactly as written, with no paraphrasing, normalization, ellipses, added context, or commentary.",
"Every row must include an explanation that briefly justifies why the cited summary evidence matches the selected feature under this schema's rules.",
"Emit only positively matched features. Never emit a row to explain why a feature does not apply.",
"The outputs array is a sparse long-form extraction table and does not need to contain every enumerated feature.",
Expand Down
9 changes: 9 additions & 0 deletions compass/extraction/ghp/geothermal_heat_pump_schema.json5
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"units",
"section",
"summary",
"ord_text",
"explanation"
],
"additionalProperties": false,
Expand Down Expand Up @@ -81,6 +82,10 @@
"type": "string",
"description": "A short summary with direct ordinance excerpts/quotes whenever possible. For qualitative features (definitions, permitting, screening, inspection, decommissioning, prohibitions), this is the primary output field and should contain a direct ordinance excerpt. For numeric features, summary must support the same requirement used to extract value and units. Must be a non-null, non-empty string. Do not output absence placeholders (for example, 'No explicit ... found'); omit the feature instead when no requirement is present."
},
"ord_text": {
"type": "string",
"description": "Copy-and-paste directly from the source document the first sentence corresponding to the ordinance being extracted. For quantitative features, this is the full sentence from the source that contains the value being extracted. For qualitative features, this is the first full sentence of the text that relates to the ordinance being extracted. Reproduce the sentence verbatim from the source text with no paraphrasing, summarizing, added context, or commentary. Must be a non-null, non-empty string."
},
"explanation": {
"type": "string",
"description": "Brief rationale explaining why this row matches the selected feature under this schema. Reference the specific evidence in summary and how it supports the extracted value and units (or, for qualitative features, the inclusion criteria) while avoiding excluded cases. Must be a non-null, non-empty string and must not use absence placeholders."
Expand Down Expand Up @@ -241,6 +246,7 @@
"units": "feet",
"section": "Section 8.3 - Well Siting Standards",
"summary": "'No geothermal borehole shall be located within one hundred (100) feet of any private drinking water well.'",
"ord_text": "No geothermal borehole shall be located within one hundred (100) feet of any private drinking water well.",
"explanation": "The excerpt states an explicit numeric minimum separation from private drinking water wells, so it maps directly to 'private water' with value 100 feet."
},
{
Expand All @@ -249,6 +255,7 @@
"units": null,
"section": "Section 6.1 - Contractor Qualifications",
"summary": "'All geothermal wells shall be drilled by a state-licensed well driller in good standing.'",
"ord_text": "All geothermal wells shall be drilled by a state-licensed well driller in good standing.",
"explanation": "The clause explicitly ties a licensing credential to the drilling activity itself, which matches 'licensed driller'."
},
{
Expand All @@ -257,6 +264,7 @@
"units": null,
"section": "Section 12.4 - GEOTHERMAL ENERGY SYSTEM STANDARDS",
"summary": "'Only closed loop ground source heat pump systems utilizing heat transfer fluids as defined in this Zoning Code are permitted. Open loop ground source heat pump systems are not permitted.'",
"ord_text": "Open loop ground source heat pump systems are not permitted.",
"explanation": "The text states an explicit current ban on open-loop systems, so it matches 'prohibitions ol'."
}
]
Expand All @@ -270,6 +278,7 @@
"Feature disambiguation for wastewater vs line infrastructure: sewage/wastewater disposal structures (for example septic tanks, cesspools, leach fields, seepage pits, sludge/septage sites) map only to 'wastewater'; explicit sewer or sewage line wording maps only to 'sewer line'; explicit water service line wording maps only to 'water line'. If one clause explicitly names both sewer/sewage line and water line/service piping with a shared numeric setback, emit both line features with that same value and units. Phrases such as 'water and sewer lines' or 'sewage and water line' count as explicitly naming both line features when the shared noun 'line' or 'lines' applies to both utilities.",
"Numeric features in this schema are all setback features (driveways, property lines, yards, private water, public water, building foundation, wastewater, water line, sewer line, animal enclosures, roads, row, above ground fuel, below ground fuel, subsurface drains, wetlands, pools, hmat) plus noise, minimum well depth, and maximum well depth.",
"Summary is the primary data carrier for all features in this schema; every row must have a non-null, non-empty string for summary.",
"Every row must include an ord_text that is the first full sentence from the source document corresponding to the ordinance being extracted, copied verbatim. For quantitative features, ord_text is the full source sentence that contains the value being extracted; for qualitative features, ord_text is the first full sentence of the source text that relates to the ordinance being extracted. Unlike summary, ord_text must be a single contiguous sentence reproduced exactly as written, with no paraphrasing, normalization, ellipses, added context, or commentary.",
"Every row must include an explanation that briefly justifies why the cited summary evidence matches the selected feature under this schema's rules.",
"Emit only positively matched features. Never emit a row to explain why a feature does not apply.",
"The outputs array is a sparse long-form extraction table and does not need to contain every enumerated feature.",
Expand Down
1 change: 1 addition & 0 deletions compass/plugin/one_shot/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ def _to_dataframe(self, data):
"value",
"units",
"summary",
"ord_text",
"year",
"section",
"source",
Expand Down
3 changes: 2 additions & 1 deletion compass/utilities/finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@
"min_dist",
"max_dist",
"summary",
"ord_text",
"year",
"section",
"source",
"quantitative",
]
QUANT_OUT_COLS = _PARSED_COLS[:-1]
"""Output columns in quantitative ordinance file"""
QUAL_OUT_COLS = _PARSED_COLS[:6] + _PARSED_COLS[-5:-1]
QUAL_OUT_COLS = _PARSED_COLS[:6] + _PARSED_COLS[-6:-1]
"""Output columns in qualitative ordinance file"""


Expand Down
19 changes: 14 additions & 5 deletions examples/water_rights_demo/one-shot/water_rights_schema.json5
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"type": "array",
"items": {
"type": "object",
"required": ["feature", "value", "units", "section", "summary", "source"],
"required": ["feature", "value", "units", "section", "summary", "ord_text", "source"],
"additionalProperties": false,
"properties": {
"feature": {
Expand Down Expand Up @@ -70,6 +70,10 @@
"description": "Short requirement summary using direct excerpts/quotes whenever possible. Include key conditions, exemptions, qualifiers, application scope (permit/well/aquifer), and explanation for selected value. Null when no requirement exists.",
"default": null
},
"ord_text": {
"type": "string",
"description": "Copy-and-paste directly from the source document the first sentence corresponding to the requirement being extracted. For quantitative features, this is the full sentence from the source that contains the value being extracted; for qualitative features, this is the first full sentence of the source text that relates to the requirement being extracted. Unlike summary, reproduce this single contiguous sentence verbatim from the source, with no paraphrasing, normalization, ellipses, added context, or commentary. Must be a non-null, non-empty string."
},
"source": {
"type": ["number", "null"],
"description": "Integer indicating the source index from which this information was pulled. If not applicable or unavailable, use null.",
Expand Down Expand Up @@ -175,34 +179,39 @@
"value": true,
"units": null,
"section": "Rule 3.2 - Permit Required",
"summary": "'No person may drill a non-exempt well without first obtaining a permit from the District.' Exempt domestic wells are listed separately under Rule 3.5."
"summary": "'No person may drill a non-exempt well without first obtaining a permit from the District.' Exempt domestic wells are listed separately under Rule 3.5.",
"ord_text": "No person may drill a non-exempt well without first obtaining a permit from the District."
},
{
"feature": "annual extraction limits",
"value": 2,
"units": "acre-feet/year",
"section": "Rule 8.1 - Production Limits",
"summary": "The district sets an explicit annual production cap of '2 acre-feet per acre per year' for this permit class; text also notes permit-specific adjustments by aquifer conditions."
"summary": "The district sets an explicit annual production cap of '2 acre-feet per acre per year' for this permit class; text also notes permit-specific adjustments by aquifer conditions.",
"ord_text": "Each non-exempt permit is limited to an annual production of 2 acre-feet per acre per year."
},
{
"feature": "well spacing",
"value": 500,
"units": "feet",
"section": "Rule 8.4 - Well Spacing",
"summary": "'A new non-exempt well must be at least 500 feet from any existing non-exempt well.' Reduced spacing may be allowed for low-capacity wells below stated gpm threshold."
"summary": "'A new non-exempt well must be at least 500 feet from any existing non-exempt well.' Reduced spacing may be allowed for low-capacity wells below stated gpm threshold.",
"ord_text": "A new non-exempt well must be at least 500 feet from any existing non-exempt well."
},
{
"feature": "production cost",
"value": "permit specific",
"units": "dollars/acre-foot",
"section": "Fee Schedule - Production Fees",
"summary": "Production fees are assessed by permit class and use category rather than one universal rate; schedule provides tiered dollar-per-acre-foot charges."
"summary": "Production fees are assessed by permit class and use category rather than one universal rate; schedule provides tiered dollar-per-acre-foot charges.",
"ord_text": "Production fees shall be assessed in accordance with the tiered dollar-per-acre-foot rates set out in the Fee Schedule for each permit class and use category."
}
],
"$instructions": {
"general": [
"Extract only enacted district requirements, not proposed language or general background text.",
"Use direct excerpts/quotes in summary whenever possible.",
"Whenever a feature row is emitted, ord_text must be the first full sentence from the source document corresponding to the requirement being extracted, copied verbatim: for quantitative features the full source sentence containing the extracted value, and for qualitative features the first full source sentence relating to the requirement. Unlike summary, ord_text must be a single contiguous sentence reproduced exactly as written, with no paraphrasing, normalization, ellipses, added context, or commentary. If a feature has no requirement, omit the feature row entirely rather than emitting a row with an empty ord_text.",
"If a feature has no requirement, set value, units, section, and summary to null or omit the feature row.",
"When multiple values exist for one feature, choose the value applicable to the primary general rule and describe alternatives/conditions in summary.",
"Preserve distinctions between drilling permits, extraction permits, transfer permits, and reporting obligations.",
Expand Down
7 changes: 7 additions & 0 deletions tests/python/unit/utilities/test_utilities_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ def test_save_db_writes_csvs(tmp_path):
"value": 100,
"units": "ft",
"summary": "Maximum height",
"ord_text": "Buildings shall not exceed 100 ft.",
"year": 2020,
"source": "http://source",
"quantitative": True,
Expand All @@ -470,6 +471,7 @@ def test_save_db_writes_csvs(tmp_path):
{
"feature": "Setback",
"summary": "Setback distance",
"ord_text": "Turbines shall be set back from property lines.",
"quantitative": False,
}
)
Expand All @@ -490,6 +492,11 @@ def test_save_db_writes_csvs(tmp_path):
assert len(qual) == 1
assert quant.iloc[0]["feature"] == "Height"
assert qual.iloc[0]["feature"] == "Setback"
assert quant.iloc[0]["ord_text"] == "Buildings shall not exceed 100 ft."
assert (
qual.iloc[0]["ord_text"]
== "Turbines shall be set back from property lines."
)


def test_save_db_with_empty_df(tmp_path):
Expand Down
Loading