diff --git a/compass/extraction/__init__.py b/compass/extraction/__init__.py index fed7f61fe..172fcfc85 100644 --- a/compass/extraction/__init__.py +++ b/compass/extraction/__init__.py @@ -13,5 +13,6 @@ from .ghp import COMPASSGeoHeatPumpExtractor from .small_wind import COMPASSSmallWindExtractor from .solar import COMPASSSolarExtractor +from .transmission import COMPASSTransmissionExtractor from .water import TexasWaterRightsExtractor from .wind import COMPASSWindExtractor diff --git a/compass/extraction/transmission/__init__.py b/compass/extraction/transmission/__init__.py new file mode 100644 index 000000000..e0c458fc0 --- /dev/null +++ b/compass/extraction/transmission/__init__.py @@ -0,0 +1,12 @@ +"""COMPASS transmission extraction plugin""" + +import importlib.resources + +from compass.plugin import create_schema_based_one_shot_extraction_plugin + + +COMPASSTransmissionExtractor = create_schema_based_one_shot_extraction_plugin( + importlib.resources.files("compass.extraction.transmission") + / "plugin_config.yaml", + tech="transmission", +) diff --git a/compass/extraction/transmission/plugin_config.yaml b/compass/extraction/transmission/plugin_config.yaml new file mode 100644 index 000000000..341da9fa4 --- /dev/null +++ b/compass/extraction/transmission/plugin_config.yaml @@ -0,0 +1,38 @@ +schema: ./transmission_schema.json5 + +data_type_short_desc: high-voltage transmission lines ordinance + +query_templates: + - "filetype:pdf {jurisdiction} electric transmission line ordinance" + - "{jurisdiction} high voltage transmission line zoning ordinance" + - "{jurisdiction} electric transmission line siting code" + - "{jurisdiction} overhead electric utility line ordinance" + - "{jurisdiction} transmission line right-of-way regulation" + +website_keywords: + pdf: 92160 + transmission: 46080 + ordinance: 23040 + zoning: 11520 + utility: 5760 + electric: 5760 + right-of-way: 1440 + overhead: 720 + permit: 720 + land use: 720 + municipal: 360 + county: 360 + code of ordinances: 360 + ordinance code: 360 + land use code: 360 + conditional use permit: 180 + special use permit: 180 + statute: 180 + administrative code: 180 + government: 180 + +heuristic_keywords: true + +collection_prompts: true + +cache_llm_generated_content: true diff --git a/compass/extraction/transmission/transmission_schema.json5 b/compass/extraction/transmission/transmission_schema.json5 new file mode 100644 index 000000000..fc919663f --- /dev/null +++ b/compass/extraction/transmission/transmission_schema.json5 @@ -0,0 +1,188 @@ +{ + "title": "Transmission Ordinance Extraction Schema", + "description": "Single-shot structured extraction schema for energy transmission ordinances. This schema guides an LLM to extract all relevant features in one call and returns an outputs array where each object represents one row in the extracted long-form table.", + "version": "0.0.1", + "type": "object", + "required": ["outputs"], + "additionalProperties": false, + "properties": { + "outputs": { + "type": "array", + "description": "Sparse long-form extraction table. Include only features with an enacted, explicit requirement and emit **at most one row per feature**. Never infer, imply, or guess a requirement from related context.", + "items": { + "type": "object", + "required": [ + "feature", + "value", + "units", + "section", + "summary", + "explanation" + ], + "additionalProperties": false, + "properties": { + "feature": { + "type": "string", + "description": "The ordinance feature being extracted. Must be one of the enumerated feature IDs. Do not invent aliases, prefixes, or synonym variants.", + "enum": [ + "property lines", + // Tower vs facility height. Do I want both? + "maximum height", + "noise", + "decommissioning", + ] + }, + "value": { + "description": "The extracted ordinance value. For numerical setbacks/limits, use a number. For categorical outcomes, use a string. For fields that list multiple named items, use an array of strings. Use null only for qualitative features, and only when an enacted, explicit, enforceable ordinance requirement for that feature is present. Null must never be used to indicate absence. If a feature has no enacted, explicit requirement in the ordinance text, omit that feature from outputs.", + "anyOf": [ + {"type": "number"}, + {"type": "string"}, + { + "type": "array", + "items": {"type": "string"}, + "additionalProperties": false + }, + {"type": "null"} + ] + }, + "units": { + "type": ["string", "null"], + "description": "Units for the extracted value. Preserve the ordinance unit wording exactly whenever possible. For setbacks/depth, use linear units such as 'feet' or 'meters' as stated in text. For noise, use 'dBA' only if the ordinance says 'dBA' or 'dB(A)'; if it says 'dB' without A-weighting, keep 'dB'. Use null for qualitative fields without measurable units." + }, + "section": { + "type": ["string", "null"], + "description": "The section title or number from the ordinance where this requirement appears. Include numerical labels if provided. Null if no section identifier is available." + }, + "summary": { + "type": "string", + "description": "A short summary of the relevant ordinance requiremets." + }, + "explanation": { + "type": "string", + "description": "Brief rationale explaining why this row matches the selected feature under this schema. Reference the specific evidence in summary and how it supports the extracted value and units or, for qualitative features, the inclusion criteria. Must be a non-null, non-empty string and must not use absence placeholders." + } + } + } + } + }, + "$core_principles": { + /* + "scope_context": { + }, + */ + "strict_evidence_gate": { + "description": "Extract a feature only when the ordinance text explicitly states a requirement, definition, or prohibition for that same feature. Never infer, assume, extrapolate, or guess from related context, implications, headings, or nearby provisions. If the ordinance points to an outside document or standard without restating the controlling requirement in the ordinance text itself, do not import missing values from that outside source." + }, + "data_omission": { + "description": "Emit only positively matched features. If a feature is not explicitly present, omit it entirely rather than returning placeholder text. For qualitative features, use value=null and units=null only when an enacted, explicit requirement or definition for that same feature is present. For numeric features, extract only when an explicit numeric threshold is stated in the ordinance text; otherwise omit the feature instead of returning null, empty, or qualitative-only values. Never emit absence placeholders such as 'not found', 'no explicit requirement', 'none', or similar text in any field." + }, + "numeric_prioritization": { + // Review numeric features if I specify all conditions here. + "description": "When multiple numeric values apply to the same feature, keep one row and select the controlling most restrictive value for that feature. Restrictiveness rules: setbacks -> choose the largest minimum separation distance; tower height -> choose the highest maximum height; noise -> choose the lowest allowed noise limit. Keep condition-specific alternatives in summary only when the ordinance text explicitly shows they all apply to the same feature for GHP systems." + }, + /* + "definition_v_rule_test": { + } + */ + }, + "$definitions": { + "setback_features": { + "description": "Setback features for electrical transmission lines and related infrastructure. Treat each setback feature independently and do not cross-apply a setback unless the ordinance text explicitly states that it applies to multiple target types. When a single clause explicitly lists multiple target types and one shared numeric setback, emit one row per explicitly listed feature using the same numeric value and units and cite the same clause in summary. Apply the shared numeric prioritization rules in $core_principles when multiple numeric values explicitly apply to the same feature.", + "properties": { + "structures": { + "description": "Minimum required separation from structures and buildings that do not match the definition of 'residential buildings'." + }, + "residential buildings": { + "description": "Minimum required separation from residential buildings, occupied dwellings, occupied buildings, residences, homes, residential receptors, or residential uses." + }, + "property lines": { + "description": "Minimum required separation from property lines, lot lines, parcel boundaries, or lease boundaries when the ordinance explicitly states the distance is measured from that boundary. Do not remap property-line distances to roads or residential zones unless the text explicitly makes them equivalent for that requirement. Distances to official plan lines or specific plan lines for public highways do not belong here unless the ordinance expressly defines those lines as property boundaries for the same requirement." + }, + "roads": { + "description": "Minimum required separation from public road rights-of-way. Property-line setbacks do NOT count for this feature unless the ordinance text explicitly states that the property line is the road right-of-way or otherwise makes them the same boundary for that requirement. IGNORE: Do not respond based on setbacks from other kinds of right-of-way such as utility easements, etc." + }, + "airport": { + "description":"Minimum required separation from airport runways and heliports." + }, + "OHWM": { + "description":"Minimum required separation from Ordinary High Water Mark (OHWM)." + }, + "railroads": { + "description": "Minimum required separation from railroads, railroad rights-of-way, rail corridors, or active rail lines. Extract only when rail infrastructure is explicitly named." + } + } + }, + "numerical_features": { + "description": "Non-setback numerical restriction features. Only extract if numerical values are explicitly given in the text.", + "properties": { + "noise": { + "description": "Extract maximum allowable operational noise for electric transmission lines and related facilities only when an explicit numeric limit is stated. Normalize A-weighted units to 'dBA' in units and preserve verbatim wording in summary. If the ordinance only references compliance with external standards or provides no numeric noise limit, omit this feature entirely." + }, + "maximum-height": { + "description": "Extract maximum structure height allowed." + } + } + }, + "operational_features": { + "description": "Operational, licensing, permitting, inspection, and abandonment requirements. These features require text extraction only.", + "properties": { + "bond": { + "description": "Extract requirements for bonds, sureties, or financial assurance instruments related to construction, operation, maintenance, or decommissioning of electric transmission lines and related infrastructure. Include any specific conditions, amounts, or criteria for bond release when explicitly stated." + }, + "decommissioning": { + "description": "Extract requirements for abandonment, removal, and site restoration and when systems are retired, fail, or towers are abandoned, including responsible party and timeline details when explicitly stated." + }, + "permit": { + "description": "Extract requirements for conditional use permits, special use permits, or other discretionary land use approvals that apply to electric transmission lines and related infrastructure. Include any specific conditions or criteria for approval when explicitly stated." + }, + "FAA": { + "description": "Extract requirements for compliance with Federal Aviation Administration (FAA) regulations and local approvals for transmission lines and related infrastructure near airports. Include any specific FAA standards or local approval processes when explicitly stated." + } + } + }, + "prohibition_features": { + "description": "Prohibitions, bans, or moratoria on building, installing, siting, or otherwise deploying electric transmission lines and related infrastructure in certain areas or under certain conditions. These features require text extraction only.", + "properties": { + "rural zone": { + "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in rural zones, agricultural zones, or similar low-density land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure." + }, + "residential zone": { + "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in residential zones, residential districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure." + }, + "commercial zone": { + "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in commercial zones, commercial districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure." + }, + "light industrial zone": { + "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in light industrial zones, light industrial districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure." + }, + "agricultural zone": { + "description": "Extract prohibitions or moratoria on electric transmission lines and related infrastructure in agricultural zones, agricultural districts, or similar land use areas when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure. Include prohibitions in irrigated farmland, ranch land, or similar agricultural land when the ordinance explicitly states that the prohibition applies to transmission lines or related infrastructure in those areas." + } + } + }, + }, + "$examples": [ + { + "feature": "noise", + "value": 50, + "units": "dBA", + "section": "SECTION 1308 – Performance / Construction Standards", + // Need some editing + "summary": "The transmission line and or facility shall not generate noise in excess of Fifty (50) decibel levels at ground level to the property lines or at the nearest residence. Such measurements shall be signed by a qualified engineer, stating that noise levels are being met, per ordinance." + } + ], + "$instructions": { + "general": [ + "Use direct text excerpts and quotes in summary whenever possible.", + ], + "setbacks": [ + "Setbacks should be extracted as minimum separation distances.", + "Prefer numeric values with units ('feet', 'meters').", + "Setback rows must contain numeric value and non-null units; never emit qualitative-only setback rows.", + "Treat property-line, lot-line, and parcel-boundary setbacks as 'property lines' when the ordinance measures the setback from that boundary.", + ] + }, + "$qualitative_features": [ + "decommissioning", + ] +}