Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions compass/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
"""Ordinance document download and structured data extraction"""

from dotenv import load_dotenv

from ._version import __version__
from .utilities.logs import setup_logging_levels, COMPASS_DEBUG_LEVEL

# Temporarily import to register plugins
# Can drop once plugins register themselves
from .extraction import (
COMPASSGeoHeatPumpExtractor,
COMPASSGeoElectricityExtractor,
COMPASSGeoRMPExtractor,
COMPASSSmallWindExtractor,
COMPASSSolarExtractor,
COMPASSWindExtractor,
TexasWaterRightsExtractor,
)

load_dotenv()
setup_logging_levels()
10 changes: 6 additions & 4 deletions compass/_cli/finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from rich.theme import Theme
from rich.console import Console

from compass.plugin.registry import PLUGIN_REGISTRY
from compass.utilities import Directories
from compass.utilities.io import load_config
from compass.utilities.jurisdictions import Jurisdiction
Expand Down Expand Up @@ -66,7 +67,7 @@ def finalize(config):
console.print("Compiling databases...")
jurisdictions = jurisdictions.get("jurisdictions", [])

_compile_db(jurisdictions, dirs)
_compile_db(jurisdictions, dirs, tech)

console.print("Saving meta info...")
num_jurisdictions_searched = len(jurisdictions)
Expand All @@ -88,7 +89,7 @@ def finalize(config):
console.print(f"✅ Finalized COMPASS run in {dirs.out!s}!")


def _compile_db(jurisdictions, dirs):
def _compile_db(jurisdictions, dirs, tech):
"""Merge all jurisdiction dbs into one"""
all_doc_infos = []
for jur_info in jurisdictions:
Expand Down Expand Up @@ -117,5 +118,6 @@ def _compile_db(jurisdictions, dirs):
{"ord_db_fp": ord_db_fp, "jurisdiction": jurisdiction}
)

db, __ = doc_infos_to_db(all_doc_infos)
save_db(db, dirs.out)
out_cols = PLUGIN_REGISTRY[tech].OUTPUT_COLUMNS
db, __ = doc_infos_to_db(all_doc_infos, out_cols)
save_db(db, dirs.out, out_cols)
336 changes: 336 additions & 0 deletions compass/data/rmp_jurisdictions.csv

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions compass/extraction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# Temporarily import to register plugins
# Can drop once plugins register themselves
from .ghp import COMPASSGeoHeatPumpExtractor
from .geothermal_electricity import COMPASSGeoElectricityExtractor
from .rmp import COMPASSGeoRMPExtractor
from .small_wind import COMPASSSmallWindExtractor
from .solar import COMPASSSolarExtractor
from .water import TexasWaterRightsExtractor
Expand Down
14 changes: 14 additions & 0 deletions compass/extraction/geothermal_electricity/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""COMPASS Geothermal Electricity plugin"""

import importlib.resources

from compass.plugin import create_schema_based_one_shot_extraction_plugin


COMPASSGeoElectricityExtractor = (
create_schema_based_one_shot_extraction_plugin(
importlib.resources.files("compass.extraction.geothermal_electricity")
/ "plugin_config.yaml",
tech="geothermal",
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,4 @@ heuristic_keywords:
- "cannabis cultivation"
- "commercial cannabis"

collection_prompts: True
collection_prompts: True
37 changes: 37 additions & 0 deletions compass/extraction/rmp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""COMPASS Geothermal Resource Management Plan plugin"""

import importlib.resources
from pathlib import Path

from compass.plugin import create_schema_based_one_shot_extraction_plugin, OutputColumn
from compass.utilities import doc_infos_to_db, save_db


COMPASSGeoRMPExtractor = create_schema_based_one_shot_extraction_plugin(
importlib.resources.files("compass.extraction.rmp") / "plugin_config.yaml",
tech="geo_rmp",
)

_out_cols = getattr(COMPASSGeoRMPExtractor, "OUTPUT_COLUMNS", [])
if not any(col.name == "restriction_level" for col in _out_cols):
_out_cols.append(OutputColumn("restriction_level"))
if not any(col.name == "document_name" for col in _out_cols):
_out_cols.append(OutputColumn("document_name"))


@classmethod
def _save_structured_data(cls, doc_infos, out_dir):
"""Save RMP extraction results, adding document_name from source path"""
output_cols = getattr(cls, "OUTPUT_COLUMNS", [])
db, num_docs_found = doc_infos_to_db(doc_infos, output_cols)

if not db.empty:
db["document_name"] = db["source"].apply(
lambda src: Path(src).name if isinstance(src, str) and src else None
)

save_db(db, out_dir, output_cols)
return num_docs_found


COMPASSGeoRMPExtractor.save_structured_data = _save_structured_data
101 changes: 101 additions & 0 deletions compass/extraction/rmp/plugin_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# One-shot plugin configuration for geothermal restriction extraction from
# BLM Resource Management Plan (RMP) documents.
#
# Usage:
# compass process -c config.json5 -p plugin_config.yaml
#
# Or with pixi:
# pixi run --manifest-path "<path-to-COMPASS>/pixi.toml" \
# compass process -c config.json5 -p plugin_config.yaml

schema: ./rmp_schema.json

data_type_short_desc: geothermal leasing restriction in BLM Resource Management Plan

# Keyword-based heuristic for quickly filtering text chunks before LLM calls.
heuristic_keywords:
good_tech_keywords:
- "geothermal"
- "leasing"
- "stipulation"
- "closure"
- "drilling"
- "mineral"
- "nso"
- "energy"
- "map"
good_tech_acronyms:
- "nso"
- "rmp"
- "blm"
- "tlm"
- "esa"
good_tech_phrases:
- "no surface occupancy"
- "surface disturbance"
- "geothermal leasing"
- "geothermal development"
- "geothermal exploration"
- "mineral leasing"
- "no surface occupancy"
- "surface-disturbing activities"
- "timing limitation"
- "lease stipulation"
- "closed to leasing"
- "open to leasing"
- "geothermal resources"
- "resource management plan"
- "land withdrawal"
- "mineral rights"
- "energy development"
- "minerals and energy"
- "fluid mineral"
- "fluid minerals"
- "fluid mineral leasing"
- "fluid mineral development"
- "fluid leasable minerals"
not_tech_words:
- "grazing allotment"
- "livestock"
- "recreation permit"
- "campground"
- "trail maintenance"
- "off-highway vehicle"
- "ohv route"
- "land disposal"
- "land sale"
- "residential"
- "wildfire"
- "prescribed burn"
- "hunting permit"
- "fishing"

# Collection prompts filter individual text chunks extracted from the PDF.
# Chunks that pass are passed to the text extraction step.
# Setting to True auto-generates prompts from the schema.
collection_prompts: True

# System prompt used during the final structured extraction LLM call.
extraction_system_prompt: |-
You are an expert analyst specializing in BLM Resource Management Plans (RMPs)
and federal mineral leasing regulations. Your task is to extract structured data
about geothermal leasing restrictions from RMP document text.
Follow all instructions in the schema field descriptions carefully.
Only extract restrictions that directly constrain geothermal leasing, exploration,
drilling, or facility construction on BLM lands. Each distinct restriction should
be its own row in the outputs array. Use direct text excerpts and quotes in the
summary field wherever possible.
IMPORTANT: "Fluid minerals" is the BLM/Mineral Leasing Act regulatory term that
collectively covers oil, gas, AND geothermal resources. RMPs very frequently use
"fluid minerals" or "fluid mineral leasing/development" instead of spelling out
"oil, gas, and geothermal" — treat any restriction, closure, or stipulation on
fluid minerals as encompassing geothermal (geothermal_applicability = 'broad')
exactly as you would for an "oil, gas, and geothermal" or "mineral leasing" group
closure, unless the document explicitly excludes geothermal from the fluid
minerals category.

cache_llm_generated_content: true

# Allows a single jurisdiction entry to span multiple PDF documents,
# which is typical for RMPs that are split across many files.
doc_selection_method: multi-doc-all
Loading