diff --git a/lib/crewai/src/crewai/governance/__init__.py b/lib/crewai/src/crewai/governance/__init__.py new file mode 100644 index 0000000000..e505d92508 --- /dev/null +++ b/lib/crewai/src/crewai/governance/__init__.py @@ -0,0 +1,3 @@ +from crewai.governance.governance_decision import GovernanceDecision, GovernanceOutcome + +__all__ = ["GovernanceDecision", "GovernanceOutcome"] diff --git a/lib/crewai/src/crewai/governance/governance_decision.py b/lib/crewai/src/crewai/governance/governance_decision.py new file mode 100644 index 0000000000..314ece89a0 --- /dev/null +++ b/lib/crewai/src/crewai/governance/governance_decision.py @@ -0,0 +1,119 @@ +""" +GovernanceDecision — Vendor-neutral governance hook return type for CrewAI. + +This module defines the serialized contract that crew-level governance hooks +(before_tool_call / after_tool_call) can optionally return. External governance +engines (TealTiger, Neura Relay, Vaara, etc.) implement this contract without +requiring CrewAI to depend on any vendor package. + +The GovernanceDecision is the pre-execution authorization record. +The GovernanceOutcome is the post-execution result record, linked back +to the decision via decision_id. + +Vendor-specific evidence (signed receipts, Merkle proofs, etc.) lives +under the `extensions` dict and is never validated by CrewAI core. +""" + +from __future__ import annotations + +from typing import Any, Literal, TypedDict + + +class GovernanceDecision(TypedDict, total=False): + """Pre-execution authorization record returned by a governance hook. + + All fields are optional (total=False) to allow governance engines to + populate only the fields they support. The minimum useful decision is + {decision, reason}. + + Extensions are pass-through: CrewAI will serialize/deserialize them + without validation, allowing any governance engine to attach its own + evidence format (e.g., extensions["teec"], extensions["neura"]). + """ + + # Identity + decision_id: str + """Unique identifier for this decision. Used by GovernanceOutcome to link back.""" + + agent_id: str + """Identifier of the agent requesting the tool call.""" + + agent_role: str + """Role of the agent (e.g., 'Researcher', 'Admin').""" + + # Action context + tool: str + """Name of the tool being invoked.""" + + request_id: str + """Unique identifier for the specific tool call request.""" + + params_hash: str + """SHA-256 hash of the canonicalized (JCS) tool call parameters.""" + + target: str + """Target resource or entity the tool operates on, if known.""" + + # Policy evaluation + policy_refs: list[str] + """List of policy rule identifiers that were evaluated.""" + + decision: Literal["allow", "deny", "require_approval", "revise"] + """The governance verdict for this tool call.""" + + reason: str + """Human-readable explanation of why this decision was made.""" + + # Lifecycle + issued_at: str + """ISO 8601 timestamp of when this decision was issued.""" + + expires_at: str | None + """ISO 8601 timestamp after which this decision is invalid (fail-closed to deny).""" + + supersedes: str | None + """decision_id of a prior decision that this one explicitly overrides.""" + + revalidate_if: list[str] + """Conditions that require re-evaluation before execution (e.g., 'policy_updated', 'budget_changed').""" + + # Evidence + evidence_refs: list[str] + """References to external evidence artifacts (URIs, hashes, receipt IDs).""" + + extensions: dict[str, Any] + """Vendor-specific evidence. CrewAI passes this through without validation. + + Examples: + extensions["teec"] = {"receipt_id": "...", "evidence_hash": "...", "prev_hash": "..."} + extensions["neura"] = {"relay_id": "...", "action_card": "..."} + """ + + +class GovernanceOutcome(TypedDict, total=False): + """Post-execution result record linked to a GovernanceDecision. + + Emitted after the tool call completes (or fails). The decision_id + links this outcome back to the authorization record that preceded it. + """ + + decision_id: str + """Links back to the GovernanceDecision that authorized this execution.""" + + outcome: Literal["executed", "blocked", "error", "timeout"] + """What actually happened after the governance decision.""" + + tool_output_hash: str | None + """SHA-256 hash of the tool output (not the raw output itself).""" + + error_type: str | None + """Error class name if outcome is 'error'.""" + + error_message: str | None + """Error message if outcome is 'error'.""" + + completed_at: str + """ISO 8601 timestamp of when execution completed.""" + + extensions: dict[str, Any] + """Vendor-specific post-execution evidence.""" diff --git a/lib/crewai/tests/test_governance_decision_contract.py b/lib/crewai/tests/test_governance_decision_contract.py new file mode 100644 index 0000000000..4290974f36 --- /dev/null +++ b/lib/crewai/tests/test_governance_decision_contract.py @@ -0,0 +1,219 @@ +""" +Contract tests for GovernanceDecision and GovernanceOutcome. + +These tests validate that: +1. All four decision routes produce valid GovernanceDecision dicts +2. Extensions round-trip through JSON without validation failures +3. GovernanceOutcome links back to a decision via decision_id +4. Unknown extension payloads are preserved without modification +5. Error outcomes carry error_type and error_message + +No vendor imports. No external dependencies beyond stdlib. +""" + +import json +from typing import Any + +from crewai.governance.governance_decision import GovernanceDecision, GovernanceOutcome + + +# --- Contract Test Fixtures --- + +FIXTURE_ALLOW: GovernanceDecision = { + "decision_id": "d-001", + "agent_id": "support-bot", + "agent_role": "Support Agent", + "tool": "search_docs", + "request_id": "req-abc-001", + "params_hash": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "policy_refs": ["allow-read-tools-v1"], + "decision": "allow", + "reason": "Tool is in the agent's read allowlist", + "issued_at": "2026-06-03T14:00:00Z", +} + +FIXTURE_DENY: GovernanceDecision = { + "decision_id": "d-002", + "agent_id": "finance-agent", + "agent_role": "Finance Analyst", + "tool": "delete_customer", + "request_id": "req-abc-002", + "params_hash": "sha256:a8f3c91e4b2d7f6a1e9c3b5d8f2a4c6e0b7d9f1a3c5e7b9d1f3a5c7e9b0d2f4a", + "policy_refs": ["deny-destructive-v1"], + "decision": "deny", + "reason": "Tool not in allowlist for Finance Analyst role", + "issued_at": "2026-06-03T14:01:00Z", +} + +FIXTURE_REQUIRE_APPROVAL: GovernanceDecision = { + "decision_id": "d-003", + "agent_id": "admin-agent", + "agent_role": "Admin", + "tool": "export_data", + "request_id": "req-abc-003", + "target": "customer_database", + "policy_refs": ["require-approval-exports-v1"], + "decision": "require_approval", + "reason": "Data export requires human sign-off", + "issued_at": "2026-06-03T14:05:00Z", + "expires_at": "2026-06-03T14:10:00Z", +} + +FIXTURE_ALLOW_WITH_EXTENSION: GovernanceDecision = { + "decision_id": "d-004", + "agent_id": "ops-agent", + "agent_role": "Operations", + "tool": "deploy_service", + "request_id": "req-abc-004", + "params_hash": "sha256:b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4", + "policy_refs": ["allow-deploy-with-evidence-v1"], + "decision": "allow", + "reason": "Policy: scoped token and audit receipt present", + "issued_at": "2026-06-03T14:10:00Z", + "evidence_refs": ["teec-receipt-004"], + "extensions": { + "teec": { + "receipt_id": "teec-004", + "evidence_hash": "sha256:b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8", + "prev_hash": "sha256:f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2", + "verifier_contract_version": "1.0.0", + } + }, +} + +FIXTURE_REVISE: GovernanceDecision = { + "decision_id": "d-005", + "agent_id": "finance-agent", + "agent_role": "Finance Analyst", + "tool": "stripe.refund", + "request_id": "req-abc-005", + "params_hash": "sha256:c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5", + "target": "payment_pmt_123", + "policy_refs": ["refund-limit-v1"], + "decision": "revise", + "reason": "Refund amount exceeds $1000 limit. Reduce amount below $1000 and re-submit.", + "issued_at": "2026-06-03T14:15:00Z", + "revalidate_if": ["amount_changed"], +} + +FIXTURE_OUTCOME: GovernanceOutcome = { + "decision_id": "d-004", + "outcome": "executed", + "tool_output_hash": "sha256:d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5", + "completed_at": "2026-06-03T14:10:02Z", +} + +FIXTURE_OUTCOME_ERROR: GovernanceOutcome = { + "decision_id": "d-002", + "outcome": "error", + "error_type": "ToolExecutionError", + "error_message": "Connection refused: database host unreachable", + "completed_at": "2026-06-03T14:01:03Z", +} + +FIXTURE_UNKNOWN_EXTENSION: GovernanceDecision = { + "decision_id": "d-006", + "agent_id": "test-agent", + "tool": "any_tool", + "decision": "allow", + "reason": "Testing unknown extension round-trip", + "issued_at": "2026-06-03T14:20:00Z", + "extensions": { + "custom_vendor": { + "arbitrary_field": True, + "nested": {"deep": [1, 2, 3]}, + "unicode": "日本語テスト", + } + }, +} + + +# --- Contract Tests --- + + +def test_allow_fixture_is_valid_governance_decision() -> None: + """ALLOW decision contains minimum required fields.""" + assert FIXTURE_ALLOW["decision"] == "allow" + assert "decision_id" in FIXTURE_ALLOW + assert "agent_id" in FIXTURE_ALLOW + assert "tool" in FIXTURE_ALLOW + assert "reason" in FIXTURE_ALLOW + assert "issued_at" in FIXTURE_ALLOW + + +def test_deny_fixture_is_valid_governance_decision() -> None: + """DENY decision contains policy reference explaining the denial.""" + assert FIXTURE_DENY["decision"] == "deny" + assert len(FIXTURE_DENY["policy_refs"]) > 0 + assert "reason" in FIXTURE_DENY + + +def test_require_approval_fixture_has_expiry() -> None: + """REQUIRE_APPROVAL decision includes expires_at for time-bound approval.""" + assert FIXTURE_REQUIRE_APPROVAL["decision"] == "require_approval" + assert FIXTURE_REQUIRE_APPROVAL["expires_at"] is not None + + +def test_revise_fixture_has_revalidate_if() -> None: + """REVISE decision includes revalidate_if conditions.""" + assert FIXTURE_REVISE["decision"] == "revise" + assert len(FIXTURE_REVISE["revalidate_if"]) > 0 + + +def test_extension_round_trips_through_json() -> None: + """Extensions serialize to JSON and deserialize without data loss.""" + original = FIXTURE_ALLOW_WITH_EXTENSION + serialized = json.dumps(original) + deserialized = json.loads(serialized) + + assert deserialized["extensions"]["teec"]["receipt_id"] == "teec-004" + assert deserialized["extensions"]["teec"]["evidence_hash"] == original["extensions"]["teec"]["evidence_hash"] + assert deserialized["extensions"]["teec"]["prev_hash"] == original["extensions"]["teec"]["prev_hash"] + + +def test_unknown_extension_round_trips_without_validation_failure() -> None: + """Unknown vendor extensions pass through JSON round-trip unchanged. + + This proves the contract is vendor-neutral: CrewAI does not validate, + strip, or modify extension payloads it doesn't recognize. + """ + original = FIXTURE_UNKNOWN_EXTENSION + serialized = json.dumps(original) + deserialized = json.loads(serialized) + + assert deserialized["extensions"]["custom_vendor"]["arbitrary_field"] is True + assert deserialized["extensions"]["custom_vendor"]["nested"]["deep"] == [1, 2, 3] + assert deserialized["extensions"]["custom_vendor"]["unicode"] == "日本語テスト" + + +def test_outcome_links_back_to_decision() -> None: + """GovernanceOutcome references the authorizing decision via decision_id.""" + assert FIXTURE_OUTCOME["decision_id"] == FIXTURE_ALLOW_WITH_EXTENSION["decision_id"] + assert FIXTURE_OUTCOME["outcome"] == "executed" + assert "completed_at" in FIXTURE_OUTCOME + + +def test_error_outcome_has_error_fields() -> None: + """Error outcome carries error_type and error_message.""" + assert FIXTURE_OUTCOME_ERROR["outcome"] == "error" + assert FIXTURE_OUTCOME_ERROR["error_type"] is not None + assert FIXTURE_OUTCOME_ERROR["error_message"] is not None + assert FIXTURE_OUTCOME_ERROR["decision_id"] == FIXTURE_DENY["decision_id"] + + +def test_all_fixtures_json_serializable() -> None: + """Every fixture round-trips through JSON without error.""" + fixtures: list[dict[str, Any]] = [ + FIXTURE_ALLOW, + FIXTURE_DENY, + FIXTURE_REQUIRE_APPROVAL, + FIXTURE_ALLOW_WITH_EXTENSION, + FIXTURE_REVISE, + FIXTURE_OUTCOME, + FIXTURE_OUTCOME_ERROR, + FIXTURE_UNKNOWN_EXTENSION, + ] + for fixture in fixtures: + serialized = json.dumps(fixture) + deserialized = json.loads(serialized) + assert deserialized == fixture