diff --git a/.github/scripts/build_cloud_run_image.sh b/.github/scripts/build_cloud_run_image.sh new file mode 100755 index 000000000..27eeb7345 --- /dev/null +++ b/.github/scripts/build_cloud_run_image.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +if [[ "${CLOUD_RUN_DRY_RUN:-0}" == "1" ]]; then + cloud_run_run gcloud services enable run.googleapis.com artifactregistry.googleapis.com cloudbuild.googleapis.com --project "${CLOUD_RUN_PROJECT}" + cloud_run_run gcloud artifacts repositories describe "${CLOUD_RUN_ARTIFACT_REPOSITORY}" --project "${CLOUD_RUN_PROJECT}" --location "${CLOUD_RUN_REGION}" + cloud_run_run gcloud artifacts repositories create "${CLOUD_RUN_ARTIFACT_REPOSITORY}" --repository-format docker --location "${CLOUD_RUN_REGION}" --description "Docker repository for PolicyEngine API Cloud Run" + cloud_run_run gcloud auth configure-docker "${CLOUD_RUN_REGION}-docker.pkg.dev" --quiet + cloud_run_run docker build -f gcp/cloud_run/Dockerfile -t "${CLOUD_RUN_IMAGE_URI}" . + cloud_run_run docker push "${CLOUD_RUN_IMAGE_URI}" + exit 0 +fi + +gcloud services enable run.googleapis.com artifactregistry.googleapis.com cloudbuild.googleapis.com --project "${CLOUD_RUN_PROJECT}" + +if ! gcloud artifacts repositories describe "${CLOUD_RUN_ARTIFACT_REPOSITORY}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --location "${CLOUD_RUN_REGION}" >/dev/null 2>&1; then + gcloud artifacts repositories create "${CLOUD_RUN_ARTIFACT_REPOSITORY}" \ + --repository-format docker \ + --location "${CLOUD_RUN_REGION}" \ + --description "Docker repository for PolicyEngine API Cloud Run" +fi + +gcloud auth configure-docker "${CLOUD_RUN_REGION}-docker.pkg.dev" --quiet +docker build -f gcp/cloud_run/Dockerfile -t "${CLOUD_RUN_IMAGE_URI}" . +docker push "${CLOUD_RUN_IMAGE_URI}" diff --git a/.github/scripts/check_changelog_fragment.sh b/.github/scripts/check_changelog_fragment.sh new file mode 100644 index 000000000..280e9eb0e --- /dev/null +++ b/.github/scripts/check_changelog_fragment.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -euo pipefail + +fragments="$(find changelog.d -type f ! -name '.gitkeep' | wc -l)" +if [[ "${fragments}" -eq 0 ]]; then + echo "::error::No changelog fragment found in changelog.d/" + echo "Add one with: echo 'Description.' > changelog.d/\$(git branch --show-current)..md" + echo "Types: added, changed, fixed, removed, breaking" + exit 1 +fi diff --git a/.github/scripts/cloud_run_env.sh b/.github/scripts/cloud_run_env.sh new file mode 100755 index 000000000..66ca3c5f1 --- /dev/null +++ b/.github/scripts/cloud_run_env.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +cloud_run_set_defaults() { + CLOUD_RUN_PROJECT="${CLOUD_RUN_PROJECT:-policyengine-api}" + CLOUD_RUN_REGION="${CLOUD_RUN_REGION:-us-central1}" + CLOUD_RUN_SERVICE="${CLOUD_RUN_SERVICE:-policyengine-api}" + CLOUD_RUN_ARTIFACT_REPOSITORY="${CLOUD_RUN_ARTIFACT_REPOSITORY:-policyengine-api}" + CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT="${CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT:-policyengine-api-cr-runtime@policyengine-api.iam.gserviceaccount.com}" + CLOUD_RUN_CLOUD_SQL_INSTANCE="${CLOUD_RUN_CLOUD_SQL_INSTANCE:-policyengine-api:us-central1:policyengine-api-data}" + CLOUD_RUN_CPU="${CLOUD_RUN_CPU:-4}" + CLOUD_RUN_MEMORY="${CLOUD_RUN_MEMORY:-16Gi}" + CLOUD_RUN_TIMEOUT="${CLOUD_RUN_TIMEOUT:-300}" + CLOUD_RUN_MIN_INSTANCES="${CLOUD_RUN_MIN_INSTANCES:-0}" + CLOUD_RUN_MAX_INSTANCES="${CLOUD_RUN_MAX_INSTANCES:-1}" + CLOUD_RUN_PORT="${CLOUD_RUN_PORT:-8080}" + CLOUD_RUN_POLICYENGINE_DB_PASSWORD_SECRET="${CLOUD_RUN_POLICYENGINE_DB_PASSWORD_SECRET:-policyengine-api-prod-db-password:latest}" + CLOUD_RUN_GITHUB_MICRODATA_TOKEN_SECRET="${CLOUD_RUN_GITHUB_MICRODATA_TOKEN_SECRET:-policyengine-api-prod-github-microdata-token:latest}" + CLOUD_RUN_ANTHROPIC_API_KEY_SECRET="${CLOUD_RUN_ANTHROPIC_API_KEY_SECRET:-policyengine-api-prod-anthropic-api-key:latest}" + CLOUD_RUN_OPENAI_API_KEY_SECRET="${CLOUD_RUN_OPENAI_API_KEY_SECRET:-policyengine-api-prod-openai-api-key:latest}" + CLOUD_RUN_HUGGING_FACE_TOKEN_SECRET="${CLOUD_RUN_HUGGING_FACE_TOKEN_SECRET:-policyengine-api-prod-hugging-face-token:latest}" + + local sha + sha="${GITHUB_SHA:-local}" + CLOUD_RUN_IMAGE_TAG="${CLOUD_RUN_IMAGE_TAG:-${sha}}" + CLOUD_RUN_IMAGE_URI="${CLOUD_RUN_IMAGE_URI:-${CLOUD_RUN_REGION}-docker.pkg.dev/${CLOUD_RUN_PROJECT}/${CLOUD_RUN_ARTIFACT_REPOSITORY}/${CLOUD_RUN_SERVICE}:${CLOUD_RUN_IMAGE_TAG}}" + + local short_sha + short_sha="${sha:0:7}" + CLOUD_RUN_TAG="${CLOUD_RUN_TAG:-stage3-${GITHUB_RUN_NUMBER:-local}-${short_sha}}" + + export CLOUD_RUN_PROJECT + export CLOUD_RUN_REGION + export CLOUD_RUN_SERVICE + export CLOUD_RUN_ARTIFACT_REPOSITORY + export CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT + export CLOUD_RUN_CLOUD_SQL_INSTANCE + export CLOUD_RUN_CPU + export CLOUD_RUN_MEMORY + export CLOUD_RUN_TIMEOUT + export CLOUD_RUN_MIN_INSTANCES + export CLOUD_RUN_MAX_INSTANCES + export CLOUD_RUN_PORT + export CLOUD_RUN_POLICYENGINE_DB_PASSWORD_SECRET + export CLOUD_RUN_GITHUB_MICRODATA_TOKEN_SECRET + export CLOUD_RUN_ANTHROPIC_API_KEY_SECRET + export CLOUD_RUN_OPENAI_API_KEY_SECRET + export CLOUD_RUN_HUGGING_FACE_TOKEN_SECRET + export CLOUD_RUN_IMAGE_TAG + export CLOUD_RUN_IMAGE_URI + export CLOUD_RUN_TAG +} + +cloud_run_require_env() { + local missing=() + local name + + for name in "$@"; do + if [[ -z "${!name:-}" ]]; then + missing+=("${name}") + fi + done + + if (( ${#missing[@]} > 0 )); then + echo "Missing required Cloud Run deployment configuration: ${missing[*]}" >&2 + return 1 + fi +} + +cloud_run_run() { + if [[ "${CLOUD_RUN_DRY_RUN:-0}" == "1" ]]; then + printf '+' + printf ' %q' "$@" + printf '\n' + return 0 + fi + + "$@" +} diff --git a/.github/scripts/deploy_cloud_run_candidate.sh b/.github/scripts/deploy_cloud_run_candidate.sh new file mode 100755 index 000000000..f97dd9a29 --- /dev/null +++ b/.github/scripts/deploy_cloud_run_candidate.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +bash .github/scripts/validate_cloud_run_deploy_env.sh + +env_vars=( + "POLICYENGINE_DB_INSTANCE_CONNECTION_NAME=${CLOUD_RUN_CLOUD_SQL_INSTANCE}" + "POLICYENGINE_DB_USER=${POLICYENGINE_DB_USER:-policyengine}" + "POLICYENGINE_DB_NAME=${POLICYENGINE_DB_NAME:-policyengine}" + "SIMULATION_API_URL=${SIMULATION_API_URL}" + "GATEWAY_AUTH_REQUIRED=1" + "GATEWAY_AUTH_ISSUER=${GATEWAY_AUTH_ISSUER}" + "GATEWAY_AUTH_AUDIENCE=${GATEWAY_AUTH_AUDIENCE}" + "GATEWAY_AUTH_CLIENT_ID=${GATEWAY_AUTH_CLIENT_ID}" + "GATEWAY_AUTH_CLIENT_SECRET_RESOURCE=${GATEWAY_AUTH_CLIENT_SECRET_RESOURCE}" + "API_HOST_BACKEND=cloud_run" + "SIM_FRONT_DOOR=old_gateway_direct" + "SIM_COMPUTE_ECONOMY=old_gateway" + "CLOUD_RUN_REVISION_TAG=${CLOUD_RUN_TAG}" +) + +secret_vars=( + "POLICYENGINE_DB_PASSWORD=${CLOUD_RUN_POLICYENGINE_DB_PASSWORD_SECRET}" + "POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN=${CLOUD_RUN_GITHUB_MICRODATA_TOKEN_SECRET}" + "ANTHROPIC_API_KEY=${CLOUD_RUN_ANTHROPIC_API_KEY_SECRET}" + "OPENAI_API_KEY=${CLOUD_RUN_OPENAI_API_KEY_SECRET}" + "HUGGING_FACE_TOKEN=${CLOUD_RUN_HUGGING_FACE_TOKEN_SECRET}" +) + +set_env_vars="$(IFS='|'; echo "^|^${env_vars[*]}")" +set_secret_vars="$(IFS='|'; echo "^|^${secret_vars[*]}")" + +cloud_run_run gcloud run deploy "${CLOUD_RUN_SERVICE}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --region "${CLOUD_RUN_REGION}" \ + --platform managed \ + --image "${CLOUD_RUN_IMAGE_URI}" \ + --tag "${CLOUD_RUN_TAG}" \ + --no-traffic \ + --allow-unauthenticated \ + --execution-environment gen2 \ + --service-account "${CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT}" \ + --add-cloudsql-instances "${CLOUD_RUN_CLOUD_SQL_INSTANCE}" \ + --port "${CLOUD_RUN_PORT}" \ + --cpu "${CLOUD_RUN_CPU}" \ + --memory "${CLOUD_RUN_MEMORY}" \ + --timeout "${CLOUD_RUN_TIMEOUT}" \ + --min-instances "${CLOUD_RUN_MIN_INSTANCES}" \ + --max-instances "${CLOUD_RUN_MAX_INSTANCES}" \ + --set-env-vars "${set_env_vars}" \ + --set-secrets "${set_secret_vars}" diff --git a/.github/scripts/get_cloud_run_service_url.sh b/.github/scripts/get_cloud_run_service_url.sh new file mode 100644 index 000000000..1dc193cc3 --- /dev/null +++ b/.github/scripts/get_cloud_run_service_url.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +if [[ "${CLOUD_RUN_DRY_RUN:-0}" == "1" ]]; then + echo "https://${CLOUD_RUN_SERVICE}-dry-run.a.run.app" + exit 0 +fi + +gcloud run services describe "${CLOUD_RUN_SERVICE}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --region "${CLOUD_RUN_REGION}" \ + --platform managed \ + --format 'value(status.url)' diff --git a/.github/scripts/get_cloud_run_tag_url.sh b/.github/scripts/get_cloud_run_tag_url.sh new file mode 100755 index 000000000..e91d91462 --- /dev/null +++ b/.github/scripts/get_cloud_run_tag_url.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +if [[ "${CLOUD_RUN_DRY_RUN:-0}" == "1" ]]; then + echo "https://${CLOUD_RUN_TAG}---${CLOUD_RUN_SERVICE}-dry-run.a.run.app" + exit 0 +fi + +gcloud run services describe "${CLOUD_RUN_SERVICE}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --region "${CLOUD_RUN_REGION}" \ + --platform managed \ + --format json | python -c ' +import json +import os +import sys + +service = json.load(sys.stdin) +tag = os.environ["CLOUD_RUN_TAG"] +for traffic_target in service.get("status", {}).get("traffic", []): + if traffic_target.get("tag") == tag and traffic_target.get("url"): + print(traffic_target["url"]) + raise SystemExit(0) + +print(f"Failed to determine Cloud Run URL for tag {tag}", file=sys.stderr) +raise SystemExit(1) +' diff --git a/.github/scripts/promote_cloud_run_tag.sh b/.github/scripts/promote_cloud_run_tag.sh new file mode 100644 index 000000000..751cc6b26 --- /dev/null +++ b/.github/scripts/promote_cloud_run_tag.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +cloud_run_require_env \ + CLOUD_RUN_PROJECT \ + CLOUD_RUN_REGION \ + CLOUD_RUN_SERVICE \ + CLOUD_RUN_TAG + +cloud_run_run gcloud run services update-traffic "${CLOUD_RUN_SERVICE}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --region "${CLOUD_RUN_REGION}" \ + --platform managed \ + --to-tags "${CLOUD_RUN_TAG}=100" diff --git a/.github/scripts/sync_cloud_run_secrets.sh b/.github/scripts/sync_cloud_run_secrets.sh new file mode 100644 index 000000000..75f3325e6 --- /dev/null +++ b/.github/scripts/sync_cloud_run_secrets.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -euo pipefail +set +x + +CLOUD_RUN_PROJECT="${CLOUD_RUN_PROJECT:-policyengine-api}" + +require_env() { + local env_name="$1" + if [[ -z "${!env_name:-}" ]]; then + echo "::error::Missing required workflow environment ${env_name}." + exit 1 + fi +} + +sync_secret() { + local env_name="$1" + local secret_name="$2" + local secret_value="${!env_name:-}" + + if [[ -z "${secret_value}" ]]; then + echo "::error::Missing required GitHub secret ${env_name}." + exit 1 + fi + + if ! gcloud secrets describe "${secret_name}" \ + --project "${CLOUD_RUN_PROJECT}" >/dev/null 2>&1; then + gcloud secrets create "${secret_name}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --replication-policy automatic + fi + + printf '%s' "${secret_value}" | gcloud secrets versions add \ + "${secret_name}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --data-file=- >/dev/null + + gcloud secrets add-iam-policy-binding "${secret_name}" \ + --project "${CLOUD_RUN_PROJECT}" \ + --member "serviceAccount:${CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT}" \ + --role roles/secretmanager.secretAccessor >/dev/null + + echo "Synced ${env_name} to Secret Manager secret ${secret_name}." +} + +require_env CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT + +sync_secret POLICYENGINE_DB_PASSWORD policyengine-api-prod-db-password +sync_secret POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN policyengine-api-prod-github-microdata-token +sync_secret ANTHROPIC_API_KEY policyengine-api-prod-anthropic-api-key +sync_secret OPENAI_API_KEY policyengine-api-prod-openai-api-key +sync_secret HUGGING_FACE_TOKEN policyengine-api-prod-hugging-face-token diff --git a/.github/scripts/validate_cloud_run_deploy_env.sh b/.github/scripts/validate_cloud_run_deploy_env.sh new file mode 100755 index 000000000..fe549db34 --- /dev/null +++ b/.github/scripts/validate_cloud_run_deploy_env.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -euo pipefail + +source .github/scripts/cloud_run_env.sh +cloud_run_set_defaults + +cloud_run_require_env \ + CLOUD_RUN_PROJECT \ + CLOUD_RUN_REGION \ + CLOUD_RUN_SERVICE \ + CLOUD_RUN_ARTIFACT_REPOSITORY \ + CLOUD_RUN_IMAGE_URI \ + CLOUD_RUN_TAG \ + CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT \ + CLOUD_RUN_CLOUD_SQL_INSTANCE \ + CLOUD_RUN_POLICYENGINE_DB_PASSWORD_SECRET \ + CLOUD_RUN_GITHUB_MICRODATA_TOKEN_SECRET \ + CLOUD_RUN_ANTHROPIC_API_KEY_SECRET \ + CLOUD_RUN_OPENAI_API_KEY_SECRET \ + CLOUD_RUN_HUGGING_FACE_TOKEN_SECRET \ + SIMULATION_API_URL \ + GATEWAY_AUTH_ISSUER \ + GATEWAY_AUTH_AUDIENCE \ + GATEWAY_AUTH_CLIENT_ID \ + GATEWAY_AUTH_CLIENT_SECRET_RESOURCE diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 72269106e..32800b53e 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -36,14 +36,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check for changelog fragment - run: | - FRAGMENTS=$(find changelog.d -type f ! -name '.gitkeep' | wc -l) - if [ "$FRAGMENTS" -eq 0 ]; then - echo "::error::No changelog fragment found in changelog.d/" - echo "Add one with: echo 'Description.' > changelog.d/\$(git branch --show-current)..md" - echo "Types: added, changed, fixed, removed, breaking" - exit 1 - fi + run: bash .github/scripts/check_changelog_fragment.sh test_container_builds: name: Docker runs-on: ubuntu-latest @@ -61,6 +54,16 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build container run: docker build -t ghcr.io/policyengine/policyengine docker + test_cloud_run_container_builds: + name: Cloud Run container + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Build Cloud Run container + run: docker build -f gcp/cloud_run/Dockerfile -t policyengine-api-cloud-run:test . test_env_vars: name: Test environment variables runs-on: ubuntu-latest diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index cb5289d74..6255e276b 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -209,8 +209,68 @@ jobs: - name: Wait for staging version health run: bash .github/scripts/health_check.sh "${{ steps.version_url.outputs.url }}/readiness-check" + deploy-cloud-run-staging: + name: Deploy staging Cloud Run candidate + runs-on: ubuntu-latest + needs: + - ensure-staging-model-version-aligns-with-sim-api + - publish-git-tag + if: | + (github.repository == 'PolicyEngine/policyengine-api') + && (github.event.head_commit.message == 'Update PolicyEngine API') + environment: staging + permissions: + contents: read + id-token: write + outputs: + tag: ${{ steps.cloud_run.outputs.revision_tag }} + url: ${{ steps.cloud_run_url.outputs.url }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: GCP authentication + uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" + service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + - name: Set up GCloud + uses: "google-github-actions/setup-gcloud@v2" + - name: Compute Cloud Run staging metadata + id: cloud_run + run: | + echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT" + echo "revision_tag=stage3-staging-${GITHUB_RUN_NUMBER}-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + - name: Build and push Cloud Run image + run: bash .github/scripts/build_cloud_run_image.sh + env: + CLOUD_RUN_IMAGE_TAG: ${{ steps.cloud_run.outputs.image_tag }} + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + - name: Deploy tagged Cloud Run staging candidate + run: bash .github/scripts/deploy_cloud_run_candidate.sh + env: + CLOUD_RUN_IMAGE_TAG: ${{ steps.cloud_run.outputs.image_tag }} + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT: ${{ secrets.GCP_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT }} + POLICYENGINE_DB_USER: ${{ vars.POLICYENGINE_DB_USER }} + POLICYENGINE_DB_NAME: ${{ vars.POLICYENGINE_DB_NAME }} + SIMULATION_API_URL: ${{ secrets.SIMULATION_API_URL }} + GATEWAY_AUTH_ISSUER: ${{ secrets.GATEWAY_AUTH_ISSUER }} + GATEWAY_AUTH_AUDIENCE: ${{ secrets.GATEWAY_AUTH_AUDIENCE }} + GATEWAY_AUTH_CLIENT_ID: ${{ secrets.GATEWAY_AUTH_CLIENT_ID }} + GATEWAY_AUTH_CLIENT_SECRET_RESOURCE: ${{ secrets.GATEWAY_AUTH_CLIENT_SECRET_RESOURCE }} + - name: Resolve Cloud Run staging URL + id: cloud_run_url + run: | + url="$(bash .github/scripts/get_cloud_run_tag_url.sh)" + echo "url=${url}" >> "$GITHUB_OUTPUT" + echo "Cloud Run staging URL: ${url}" + env: + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + - name: Wait for Cloud Run staging health + run: bash .github/scripts/health_check.sh "${{ steps.cloud_run_url.outputs.url }}/readiness-check" + integration-tests-staging: - name: Run staging integration tests + name: Run App Engine staging integration tests runs-on: ubuntu-latest needs: deploy-staging if: | @@ -231,10 +291,73 @@ jobs: API_BASE_URL: ${{ needs.deploy-staging.outputs.url }} STAGING_API_TEST_PROBE_ID: ${{ needs.deploy-staging.outputs.version }} + integration-tests-staging-cloud-run: + name: Run Cloud Run staging integration tests + runs-on: ubuntu-latest + needs: deploy-cloud-run-staging + if: | + (github.repository == 'PolicyEngine/policyengine-api') + && (github.event.head_commit.message == 'Update PolicyEngine API') + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install staging test dependencies + run: pip install pytest httpx + - name: Run staging smoke test + run: python -m pytest tests/integration/test_live_calculate.py tests/integration/test_live_economy.py tests/integration/test_live_budget_window_cache.py -v + env: + API_BASE_URL: ${{ needs.deploy-cloud-run-staging.outputs.url }} + STAGING_API_TEST_PROBE_ID: cloud-run-${{ needs.deploy-cloud-run-staging.outputs.tag }} + + promote-cloud-run-staging: + name: Promote staging Cloud Run traffic + runs-on: ubuntu-latest + needs: + - deploy-cloud-run-staging + - integration-tests-staging + - integration-tests-staging-cloud-run + if: | + (github.repository == 'PolicyEngine/policyengine-api') + && (github.event.head_commit.message == 'Update PolicyEngine API') + environment: staging + permissions: + contents: read + id-token: write + outputs: + url: ${{ steps.cloud_run_service_url.outputs.url }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: GCP authentication + uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" + service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + - name: Set up GCloud + uses: "google-github-actions/setup-gcloud@v2" + - name: Promote Cloud Run staging candidate + run: bash .github/scripts/promote_cloud_run_tag.sh + env: + CLOUD_RUN_TAG: ${{ needs.deploy-cloud-run-staging.outputs.tag }} + - name: Resolve Cloud Run staging service URL + id: cloud_run_service_url + run: | + url="$(bash .github/scripts/get_cloud_run_service_url.sh)" + echo "url=${url}" >> "$GITHUB_OUTPUT" + echo "Cloud Run staging service URL: ${url}" + - name: Wait for Cloud Run staging service health + run: bash .github/scripts/health_check.sh "${{ steps.cloud_run_service_url.outputs.url }}/readiness-check" + ensure-production-model-version-aligns-with-sim-api: name: Ensure production model version aligns with simulation API runs-on: ubuntu-latest - needs: integration-tests-staging + needs: + - integration-tests-staging + - promote-cloud-run-staging if: | (github.repository == 'PolicyEngine/policyengine-api') && (github.event.head_commit.message == 'Update PolicyEngine API') @@ -257,10 +380,10 @@ jobs: env: SIMULATION_API_URL: ${{ secrets.SIMULATION_API_URL }} - deploy-production: - name: Deploy production App Engine version + deploy-production-candidate: + name: Deploy production App Engine candidate runs-on: ubuntu-latest - needs: ensure-production-model-version-aligns-with-sim-api + needs: deploy-staging if: | (github.repository == 'PolicyEngine/policyengine-api') && (github.event.head_commit.message == 'Update PolicyEngine API') @@ -268,6 +391,9 @@ jobs: permissions: contents: read id-token: write + outputs: + version: ${{ steps.version.outputs.version }} + url: ${{ steps.version_url.outputs.url }} steps: - name: Checkout repo uses: actions/checkout@v4 @@ -324,15 +450,106 @@ jobs: APP_ENGINE_VERSION: ${{ steps.version.outputs.version }} - name: Wait for production version health run: bash .github/scripts/health_check.sh "${{ steps.version_url.outputs.url }}/readiness-check" + + promote-production: + name: Promote production App Engine candidate + runs-on: ubuntu-latest + needs: + - deploy-production-candidate + - ensure-production-model-version-aligns-with-sim-api + if: | + (github.repository == 'PolicyEngine/policyengine-api') + && (github.event.head_commit.message == 'Update PolicyEngine API') + environment: production + permissions: + contents: read + id-token: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: GCP authentication + uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" + service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + - name: Set up GCloud + uses: "google-github-actions/setup-gcloud@v2" - name: Promote production version run: bash .github/scripts/promote_app_engine_version.sh env: - APP_ENGINE_VERSION: ${{ steps.version.outputs.version }} + APP_ENGINE_VERSION: ${{ needs.deploy-production-candidate.outputs.version }} + + deploy-cloud-run-candidate: + name: Deploy production Cloud Run candidate + runs-on: ubuntu-latest + needs: ensure-production-model-version-aligns-with-sim-api + if: | + (github.repository == 'PolicyEngine/policyengine-api') + && (github.event.head_commit.message == 'Update PolicyEngine API') + environment: production + permissions: + contents: read + id-token: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: GCP authentication + uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" + service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + - name: Set up GCloud + uses: "google-github-actions/setup-gcloud@v2" + - name: Compute Cloud Run candidate metadata + id: cloud_run + run: | + echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT" + echo "revision_tag=stage3-prod-${GITHUB_RUN_NUMBER}-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + - name: Deploy tagged Cloud Run candidate + run: bash .github/scripts/deploy_cloud_run_candidate.sh + env: + CLOUD_RUN_IMAGE_TAG: ${{ steps.cloud_run.outputs.image_tag }} + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT: ${{ secrets.GCP_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT }} + POLICYENGINE_DB_USER: ${{ vars.POLICYENGINE_DB_USER }} + POLICYENGINE_DB_NAME: ${{ vars.POLICYENGINE_DB_NAME }} + SIMULATION_API_URL: ${{ secrets.SIMULATION_API_URL }} + GATEWAY_AUTH_ISSUER: ${{ secrets.GATEWAY_AUTH_ISSUER }} + GATEWAY_AUTH_AUDIENCE: ${{ secrets.GATEWAY_AUTH_AUDIENCE }} + GATEWAY_AUTH_CLIENT_ID: ${{ secrets.GATEWAY_AUTH_CLIENT_ID }} + GATEWAY_AUTH_CLIENT_SECRET_RESOURCE: ${{ secrets.GATEWAY_AUTH_CLIENT_SECRET_RESOURCE }} + - name: Resolve Cloud Run candidate URL + id: cloud_run_url + run: | + url="$(bash .github/scripts/get_cloud_run_tag_url.sh)" + echo "url=${url}" >> "$GITHUB_OUTPUT" + echo "Cloud Run candidate URL: ${url}" + env: + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + - name: Install Cloud Run smoke test dependencies + run: pip install pytest httpx + - name: Run Cloud Run candidate smoke tests + run: python -m pytest tests/integration/test_cloud_run_candidate.py -v + env: + API_BASE_URL: ${{ steps.cloud_run_url.outputs.url }} + STAGING_API_TEST_PROBE_ID: cloud-run-${{ steps.cloud_run.outputs.revision_tag }} + - name: Promote Cloud Run production candidate + run: bash .github/scripts/promote_cloud_run_tag.sh + env: + CLOUD_RUN_TAG: ${{ steps.cloud_run.outputs.revision_tag }} + - name: Resolve Cloud Run production service URL + id: cloud_run_service_url + run: | + url="$(bash .github/scripts/get_cloud_run_service_url.sh)" + echo "url=${url}" >> "$GITHUB_OUTPUT" + echo "Cloud Run production service URL: ${url}" + - name: Wait for Cloud Run production service health + run: bash .github/scripts/health_check.sh "${{ steps.cloud_run_service_url.outputs.url }}/readiness-check" docker: name: Docker runs-on: ubuntu-latest - needs: deploy-production + needs: promote-production if: | (github.repository == 'PolicyEngine/policyengine-api') && (github.event.head_commit.message == 'Update PolicyEngine API') diff --git a/.github/workflows/sync-cloud-run-secrets.yml b/.github/workflows/sync-cloud-run-secrets.yml new file mode 100644 index 000000000..9a6be0717 --- /dev/null +++ b/.github/workflows/sync-cloud-run-secrets.yml @@ -0,0 +1,41 @@ +name: Sync Cloud Run secrets + +on: + workflow_dispatch: + +concurrency: + group: cloud-run-secret-sync + +jobs: + sync-cloud-run-secrets: + name: Sync GitHub secrets to Secret Manager + runs-on: ubuntu-latest + environment: production + permissions: + contents: read + id-token: write + steps: + - name: Require master branch + if: github.ref != 'refs/heads/master' + run: | + echo "::error::Cloud Run secret sync must run from master." + exit 1 + - name: Checkout repo + uses: actions/checkout@v4 + - name: GCP authentication + uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" + service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + - name: Set up GCloud + uses: "google-github-actions/setup-gcloud@v2" + - name: Sync runtime secrets + env: + CLOUD_RUN_PROJECT: policyengine-api + CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT: ${{ secrets.GCP_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT }} + POLICYENGINE_DB_PASSWORD: ${{ secrets.POLICYENGINE_DB_PASSWORD }} + POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + run: bash .github/scripts/sync_cloud_run_secrets.sh diff --git a/changelog.d/migration-pr3-cloud-run-candidate.added.md b/changelog.d/migration-pr3-cloud-run-candidate.added.md new file mode 100644 index 000000000..4e77c96f0 --- /dev/null +++ b/changelog.d/migration-pr3-cloud-run-candidate.added.md @@ -0,0 +1 @@ +Added a no-traffic Cloud Run candidate deployment path for the FastAPI shell. diff --git a/docs/engineering/skills/github-prs.md b/docs/engineering/skills/github-prs.md index 94b6cce60..ddbc83d9a 100644 --- a/docs/engineering/skills/github-prs.md +++ b/docs/engineering/skills/github-prs.md @@ -24,3 +24,17 @@ For migration work, identify: - what is newly prepared for FastAPI, SQLAlchemy/Alembic, Supabase, Cloud Run, or Modal migration; - which user-visible API contract changes are intentionally introduced. + +## Commit Hygiene + +AI agents must run formatting and lint checks before committing their own code +changes: + +```bash +make format +ruff check +``` + +Commit only after formatting succeeds and changed Python files pass lint. If a +broader repo-wide lint command fails on unrelated pre-existing issues, include +that result in the handoff instead of hiding it. diff --git a/docs/engineering/skills/testing.md b/docs/engineering/skills/testing.md index 83b4b21a9..caa332527 100644 --- a/docs/engineering/skills/testing.md +++ b/docs/engineering/skills/testing.md @@ -41,5 +41,44 @@ Regenerate and review `docs/engineering/generated/migration_contracts.md` when route inventory, migration registry flags, or v1 contract expectations change. FastAPI shell-only fallback changes should not change the route catalog. -Run `ruff format --check` and `ruff check` on changed Python files before -handoff. +For PR 3 Cloud Run candidate deployment changes, verify the command-building +guards, workflow track structure, ASGI compatibility, and container build: + +```bash +python -m pytest tests/unit/test_cloud_run_deploy_scripts.py tests/unit/test_asgi_factory.py -q +docker build -f gcp/cloud_run/Dockerfile -t policyengine-api-cloud-run:test . +``` + +If the Cloud Run container startup script changes, keep the script syntax and +child-process supervision assertions in `tests/unit/test_cloud_run_deploy_scripts.py` +updated. The tier 1 Redis path keeps Redis local to the container, so tests +should verify the bash entrypoint, explicit Redis/Uvicorn PID tracking, and +fail-fast behavior rather than any managed Redis integration. + +Staging deployment checks should run the same live integration suite against +both the App Engine staging URL and the tagged Cloud Run staging URL before +promoting the tested Cloud Run tag to the service URL. App Engine production +candidate deploys may run before the staging integration jobs finish, but must +use `APP_ENGINE_PROMOTE=0`; the traffic promotion job must remain gated on the +staging checks and production model-version alignment. Production Cloud Run +promotion should happen only after tagged candidate smoke tests pass, and should +health-check the Cloud Run service URL after promotion. Live Cloud Run candidate +checks must be explicit deployed probes. Production candidate smoke tests +require `API_BASE_URL` and should not run as part of ordinary local test +commands. These checks should stay read-only and avoid depending on specific +production data fixtures: + +```bash +API_BASE_URL=https://candidate-url python -m pytest tests/integration/test_cloud_run_candidate.py -v +``` + +Before committing AI-authored code changes, run repository formatting and lint: + +```bash +make format +ruff check +``` + +Commit only after formatting succeeds and changed Python files pass lint. If a +broader repo-wide lint command fails on unrelated pre-existing issues, include +that result in the handoff instead of hiding it. diff --git a/docs/migration-pr3-cloud-run-candidate-runbook.md b/docs/migration-pr3-cloud-run-candidate-runbook.md new file mode 100644 index 000000000..9aac27f3c --- /dev/null +++ b/docs/migration-pr3-cloud-run-candidate-runbook.md @@ -0,0 +1,187 @@ +# PR 3 Cloud Run Candidate Runbook + +PR 3 adds a production-configured Cloud Run candidate for the FastAPI ASGI +shell. It makes the Cloud Run service URL live after staged validation, but it +does not migrate the public App Engine/custom API URL. + +## Included + +- Cloud Run Docker runtime for `policyengine_api.asgi:app`. +- Tagged no-traffic Cloud Run revisions deployed on both the staging and + production tracks, then promoted to the Cloud Run service URL after tests. +- Runtime environment configuration for the production Cloud SQL instance and + the existing simulation gateway. +- Secret Manager-backed Cloud Run runtime credentials, synced manually from + existing GitHub Actions secrets. +- A dedicated Cloud Run runtime service account, separate from the GitHub deploy + service account used to run `gcloud`. +- The same live staging integration suite against both the App Engine staging + URL and the tagged Cloud Run staging URL. +- Production smoke tests against the tagged Cloud Run URL, including the public + simulation-gateway health probe. +- Tier 1 Cloud Run startup supervision: the container still runs local Redis, + but the bash startup script tracks Redis and Uvicorn child PIDs explicitly and + exits if either process dies. + +## Not Included + +- No public App Engine/custom API host traffic shift. +- No percent-based Cloud Run traffic ramp; the tested tag is promoted to 100% + of the Cloud Run service URL. +- No native FastAPI route migration beyond `/health`. +- No Supabase, Alembic, SQLAlchemy model, or Modal compute migration. +- No managed Redis, Redis Memorystore, or API v2-alpha-style cache replacement. +- No App Engine secret-handling migration; App Engine deploys still use the + existing transitional bundle path. +- No App Engine retirement. + +## Resource Defaults + +- Project: `policyengine-api` +- Region: `us-central1` +- Service: `policyengine-api` +- Artifact Registry repository: `policyengine-api` +- Cloud Run runtime service account: + `policyengine-api-cr-runtime@policyengine-api.iam.gserviceaccount.com` +- Cloud SQL instance: `policyengine-api:us-central1:policyengine-api-data` +- Staging revision tag: `stage3-staging-${GITHUB_RUN_NUMBER}-${GITHUB_SHA::7}` +- Production revision tag: `stage3-prod-${GITHUB_RUN_NUMBER}-${GITHUB_SHA::7}` +- Secret Manager secrets: + - `policyengine-api-prod-db-password` + - `policyengine-api-prod-github-microdata-token` + - `policyengine-api-prod-anthropic-api-key` + - `policyengine-api-prod-openai-api-key` + - `policyengine-api-prod-hugging-face-token` + +## Required Runtime IAM + +GitHub Actions still authenticates as `${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}` +to deploy App Engine and Cloud Run. Cloud Run itself runs as +`${{ secrets.GCP_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT }}`. + +The runtime service account must be: + +- granted Cloud SQL client access for + `policyengine-api:us-central1:policyengine-api-data`; +- allowed to read the five Cloud Run runtime secrets listed above; +- allowed to read the Secret Manager secret referenced by + `GATEWAY_AUTH_CLIENT_SECRET_RESOURCE`; +- allowed as a service account user for the GitHub deploy service account, so the + workflow can deploy revisions using that runtime identity. + +The manual `Sync Cloud Run secrets` workflow authenticates through Workload +Identity Federation as `${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}`. That deploy +service account must be able to create the five secrets if missing, add secret +versions, and grant the Cloud Run runtime service account Secret Manager access +on those secrets. + +## Secret Sync + +Run `.github/workflows/sync-cloud-run-secrets.yml` manually from `master` before +the first Cloud Run deployment that uses Secret Manager references, and again +whenever one of the source GitHub secrets is rotated. + +The workflow copies these existing GitHub secrets into Secret Manager: + +- `POLICYENGINE_DB_PASSWORD` +- `POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN` +- `ANTHROPIC_API_KEY` +- `OPENAI_API_KEY` +- `HUGGING_FACE_TOKEN` + +The workflow writes secret payloads to `gcloud secrets versions add` through +stdin and does not print secret values. GitHub Actions remains the temporary +source of truth in PR 3. The long-term target is to create, rotate, and manage +these credentials directly in Secret Manager, with GitHub Actions only deploying +Secret Manager references. + +## Post-Merge Flow + +The `Push` workflow now uses two deployment tracks. + +Staging: + +1. Deploy an App Engine staging version. +2. Build and deploy a tagged Cloud Run staging revision with no traffic. +3. Run the same live integration suite against both URLs in parallel: + +```bash +python -m pytest \ + tests/integration/test_live_calculate.py \ + tests/integration/test_live_economy.py \ + tests/integration/test_live_budget_window_cache.py \ + -v +``` +4. Promote the tested Cloud Run staging tag to 100% of the Cloud Run service + URL and health-check that service URL. + +Production: + +1. After the App Engine staging version is healthy, deploy the App Engine + production candidate with `APP_ENGINE_PROMOTE=0` and health-check its version + URL. This version must not receive production traffic yet. +2. In parallel, run the staging integration jobs and promote the tested Cloud + Run staging tag to the Cloud Run service URL. +3. After the staging gates pass, run the production model-version alignment + check. +4. Promote the already-deployed App Engine production candidate to receive + public production traffic. +5. Deploy a tagged Cloud Run production revision with no traffic. +6. Smoke-test the tagged Cloud Run production URL. +7. Promote the tested production tag to 100% of the Cloud Run service URL and + health-check that service URL. + +The Cloud Run deploy command still uses: + +```bash +gcloud run deploy policyengine-api \ + --tag "$CLOUD_RUN_TAG" \ + --no-traffic +``` + +The production Cloud Run job resolves the tagged URL and runs: + +```bash +python -m pytest tests/integration/test_cloud_run_candidate.py -v +``` + +Then it assigns Cloud Run service traffic to the tested tag: + +```bash +gcloud run services update-traffic policyengine-api \ + --to-tags "$CLOUD_RUN_TAG=100" +``` + +Failure marks the deployment workflow red. App Engine remains the public +production traffic target because the public URL is not migrated to Cloud Run. +Smoke tests against the production candidate must be read-only. + +## Manual Smoke + +After GitHub Actions prints the candidate URL: + +```bash +curl -i "$CLOUD_RUN_CANDIDATE_URL/health" +curl -i "$CLOUD_RUN_CANDIDATE_URL/readiness-check" +curl -i "$CLOUD_RUN_CANDIDATE_URL/liveness-check" +curl -i "$CLOUD_RUN_CANDIDATE_URL/simulation-gateway-check" +curl -i "$CLOUD_RUN_CANDIDATE_URL/us/metadata" +``` + +Expected behavior: + +- `/health` returns FastAPI JSON: `{"status":"healthy"}`. +- `/simulation-gateway-check` returns FastAPI JSON confirming the existing + simulation gateway client can initialize and reach the gateway health check. +- `/readiness-check` and `/liveness-check` return existing Flask text `OK`. +- `/us/metadata` returns the existing v1 metadata contract from Cloud SQL. + +## Rollback + +The public App Engine/custom API URL is not routed to the Cloud Run candidate in +this PR. If the staging Cloud Run track fails, production deployment is blocked. +If the production Cloud Run candidate fails before promotion, leave App Engine +as production-primary and fix the Cloud Run deploy path in a follow-up commit. +If the production Cloud Run service URL is promoted and later regresses, deploy +a fixed tagged revision and promote that tag, or manually shift the Cloud Run +service URL back to a prior healthy revision. diff --git a/gcp/cloud_run/Dockerfile b/gcp/cloud_run/Dockerfile new file mode 100644 index 000000000..f2a38279c --- /dev/null +++ b/gcp/cloud_run/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.12-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential redis-server \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY pyproject.toml README.md ./ +COPY policyengine_api ./policyengine_api +COPY gcp/cloud_run/start.sh ./start.sh + +RUN chmod +x ./start.sh \ + && pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -e . + +ENV GATEWAY_AUTH_REQUIRED=1 +ENV CACHE_REDIS_HOST=127.0.0.1 +ENV CACHE_REDIS_PORT=6379 +ENV CACHE_REDIS_DB=0 + +CMD ["/bin/bash", "/app/start.sh"] diff --git a/gcp/cloud_run/Dockerfile.dockerignore b/gcp/cloud_run/Dockerfile.dockerignore new file mode 100644 index 000000000..b14b5958e --- /dev/null +++ b/gcp/cloud_run/Dockerfile.dockerignore @@ -0,0 +1,17 @@ +# Cloud Run builds from the repository root, but the runtime image only needs +# the package and the entrypoint script. +* + +!README.md +!pyproject.toml +!policyengine_api/ +!policyengine_api/** +!gcp/ +!gcp/cloud_run/ +!gcp/cloud_run/start.sh + +**/__pycache__/ +**/*.py[cod] +**/.DS_Store +policyengine_api/data/*.db +policyengine_api/data/*.db-journal diff --git a/gcp/cloud_run/start.sh b/gcp/cloud_run/start.sh new file mode 100755 index 000000000..35a27a455 --- /dev/null +++ b/gcp/cloud_run/start.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -euo pipefail + +PORT="${PORT:-8080}" +CACHE_REDIS_HOST="${CACHE_REDIS_HOST:-127.0.0.1}" +CACHE_REDIS_PORT="${CACHE_REDIS_PORT:-6379}" +CACHE_REDIS_DB="${CACHE_REDIS_DB:-0}" +WEB_CONCURRENCY="${WEB_CONCURRENCY:-1}" +REDIS_READY_MAX_ATTEMPTS="${REDIS_READY_MAX_ATTEMPTS:-30}" +export CACHE_REDIS_HOST CACHE_REDIS_PORT CACHE_REDIS_DB + +redis_pid="" +uvicorn_pid="" + +shutdown() { + trap - INT TERM + + if [ -n "$uvicorn_pid" ] && kill -0 "$uvicorn_pid" 2>/dev/null; then + kill "$uvicorn_pid" 2>/dev/null || true + fi + + if [ -n "$redis_pid" ] && kill -0 "$redis_pid" 2>/dev/null; then + kill "$redis_pid" 2>/dev/null || true + fi + + if [ -n "$uvicorn_pid" ]; then + wait "$uvicorn_pid" 2>/dev/null || true + fi + + if [ -n "$redis_pid" ]; then + wait "$redis_pid" 2>/dev/null || true + fi +} + +trap 'shutdown; exit 143' INT TERM + +redis-server --bind "$CACHE_REDIS_HOST" \ + --port "$CACHE_REDIS_PORT" \ + --protected-mode yes \ + --maxclients 10000 \ + --timeout 0 & +redis_pid="$!" + +redis_ready_attempts=0 +until redis-cli -h "$CACHE_REDIS_HOST" -p "$CACHE_REDIS_PORT" ping >/dev/null 2>&1; do + redis_ready_attempts=$((redis_ready_attempts + 1)) + if ! kill -0 "$redis_pid" 2>/dev/null; then + echo "Redis exited before becoming ready" >&2 + shutdown + exit 1 + fi + + if [ "$redis_ready_attempts" -ge "$REDIS_READY_MAX_ATTEMPTS" ]; then + echo "Redis did not become ready after $redis_ready_attempts attempts" >&2 + shutdown + exit 1 + fi + sleep 1 +done + +uvicorn policyengine_api.asgi:app \ + --host 0.0.0.0 \ + --port "$PORT" \ + --workers "$WEB_CONCURRENCY" \ + --proxy-headers \ + --forwarded-allow-ips '*' & +uvicorn_pid="$!" + +set +e +wait -n "$redis_pid" "$uvicorn_pid" +status="$?" +set -e + +if ! kill -0 "$redis_pid" 2>/dev/null; then + echo "Redis exited; stopping Cloud Run container" >&2 +elif ! kill -0 "$uvicorn_pid" 2>/dev/null; then + echo "Uvicorn exited; stopping Cloud Run container" >&2 +else + echo "A supervised Cloud Run process exited; stopping container" >&2 +fi + +shutdown + +if [ "$status" -eq 0 ]; then + exit 1 +fi + +exit "$status" diff --git a/policyengine_api/asgi_factory.py b/policyengine_api/asgi_factory.py index b94838eaa..e81588aaa 100644 --- a/policyengine_api/asgi_factory.py +++ b/policyengine_api/asgi_factory.py @@ -2,19 +2,35 @@ from __future__ import annotations +import time +import uuid from typing import Literal from a2wsgi import WSGIMiddleware -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from pydantic import BaseModel from policyengine_api.constants import VERSION +from policyengine_api.migration_logging import log_migration_request + + +FASTAPI_NATIVE_LOGGED_PATHS = frozenset( + { + "/health", + "/simulation-gateway-check", + } +) class HealthResponse(BaseModel): status: Literal["healthy"] +class SimulationGatewayHealthResponse(BaseModel): + status: Literal["healthy"] + simulation_gateway: Literal["healthy"] + + def _add_vary_origin(response) -> None: vary = response.headers.get("Vary") if vary is None: @@ -37,16 +53,56 @@ def create_asgi_app(wsgi_app) -> FastAPI: @app.middleware("http") async def add_cors_for_native_routes(request, call_next): + started_at = time.time() + request_id = request.headers.get("X-Request-ID") or uuid.uuid4().hex response = await call_next(request) origin = request.headers.get("origin") if origin and "access-control-allow-origin" not in response.headers: response.headers["Access-Control-Allow-Origin"] = origin _add_vary_origin(response) + if request.url.path in FASTAPI_NATIVE_LOGGED_PATHS: + try: + log_migration_request( + request_id=request_id, + method=request.method, + path=request.url.path, + status_code=response.status_code, + started_at=started_at, + ) + except Exception: + pass return response @app.get("/health", response_model=HealthResponse) def health() -> HealthResponse: return HealthResponse(status="healthy") + @app.get( + "/simulation-gateway-check", + response_model=SimulationGatewayHealthResponse, + include_in_schema=False, + ) + def simulation_gateway_health() -> SimulationGatewayHealthResponse: + from policyengine_api.libs.simulation_api_modal import SimulationAPIModal + + try: + gateway_healthy = SimulationAPIModal().health_check() + except Exception as error: + raise HTTPException( + status_code=503, + detail="Simulation gateway client initialization failed", + ) from error + + if not gateway_healthy: + raise HTTPException( + status_code=503, + detail="Simulation gateway health check failed", + ) + + return SimulationGatewayHealthResponse( + status="healthy", + simulation_gateway="healthy", + ) + app.mount("/", WSGIMiddleware(wsgi_app)) return app diff --git a/policyengine_api/data/data.py b/policyengine_api/data/data.py index 6b16e713e..958021985 100644 --- a/policyengine_api/data/data.py +++ b/policyengine_api/data/data.py @@ -12,6 +12,23 @@ load_dotenv() +DEFAULT_REMOTE_DB_INSTANCE_CONNECTION_NAME = ( + "policyengine-api:us-central1:policyengine-api-data" +) +DEFAULT_REMOTE_DB_USER = "policyengine" +DEFAULT_REMOTE_DB_NAME = "policyengine" + + +def get_remote_database_config() -> dict[str, str]: + return { + "instance_connection_name": os.environ.get( + "POLICYENGINE_DB_INSTANCE_CONNECTION_NAME", + DEFAULT_REMOTE_DB_INSTANCE_CONNECTION_NAME, + ), + "db_user": os.environ.get("POLICYENGINE_DB_USER", DEFAULT_REMOTE_DB_USER), + "db_name": os.environ.get("POLICYENGINE_DB_NAME", DEFAULT_REMOTE_DB_NAME), + } + class _ResultProxy: """Lightweight wrapper that eagerly fetches results from a @@ -97,19 +114,17 @@ def __init__( self.initialize() def _create_pool(self): - instance_connection_name = "policyengine-api:us-central1:policyengine-api-data" + db_config = get_remote_database_config() self.connector = Connector() - db_user = "policyengine" db_pass = os.environ["POLICYENGINE_DB_PASSWORD"] if db_pass == ".dbpw": with open(".dbpw") as f: db_pass = f.read().strip() - db_name = "policyengine" conn = self.connector.connect( - instance_connection_string=instance_connection_name, + instance_connection_string=db_config["instance_connection_name"], driver="pymysql", - db=db_name, - user=db_user, + db=db_config["db_name"], + user=db_config["db_user"], password=db_pass, ) self.pool = sqlalchemy.create_engine( diff --git a/policyengine_api/migration_flags.py b/policyengine_api/migration_flags.py index c67ddda70..0d659eb36 100644 --- a/policyengine_api/migration_flags.py +++ b/policyengine_api/migration_flags.py @@ -69,7 +69,12 @@ def infer_route_group(path: str) -> str: """Infer a migration route group from a request path.""" if path in {"/", ""}: return "home" - if path in {"/liveness-check", "/readiness-check"}: + if path in { + "/health", + "/simulation-gateway-check", + "/liveness-check", + "/readiness-check", + }: return "health" if path == "/specification": return "specification" diff --git a/policyengine_api/migration_logging.py b/policyengine_api/migration_logging.py index 8bbb31dc5..5230502d8 100644 --- a/policyengine_api/migration_logging.py +++ b/policyengine_api/migration_logging.py @@ -27,27 +27,15 @@ def set_request_migration_context(): @app.after_request def log_request_migration_context(response): try: - route_group = infer_route_group(flask.request.path) - migration_context = get_migration_log_context(route_group) - elapsed_ms = None - started_at = getattr(flask.g, "request_started_at", None) - if started_at is not None: - elapsed_ms = round((time.time() - started_at) * 1000, 2) - - logger.log_struct( - { - "message": "API request served", - "request_id": getattr(flask.g, "request_id", None), - "method": flask.request.method, - "path": flask.request.path, - "status_code": response.status_code, - "latency_ms": elapsed_ms, - "country_id": flask.request.view_args.get("country_id") - if flask.request.view_args - else None, - "migration": migration_context, - }, - severity="INFO" if response.status_code < 500 else "ERROR", + log_migration_request( + request_id=getattr(flask.g, "request_id", None), + method=flask.request.method, + path=flask.request.path, + status_code=response.status_code, + started_at=getattr(flask.g, "request_started_at", None), + country_id=flask.request.view_args.get("country_id") + if flask.request.view_args + else None, ) except Exception: try: @@ -55,3 +43,36 @@ def log_request_migration_context(response): except Exception: pass return response + + +def log_migration_request( + *, + request_id: str | None, + method: str, + path: str, + status_code: int, + started_at: float | None, + country_id: str | None = None, +) -> None: + """Log a migration-aware API request in the shared structured format.""" + + elapsed_ms = None + if started_at is not None: + elapsed_ms = round((time.time() - started_at) * 1000, 2) + + route_group = infer_route_group(path) + migration_context = get_migration_log_context(route_group) + + logger.log_struct( + { + "message": "API request served", + "request_id": request_id, + "method": method, + "path": path, + "status_code": status_code, + "latency_ms": elapsed_ms, + "country_id": country_id, + "migration": migration_context, + }, + severity="INFO" if status_code < 500 else "ERROR", + ) diff --git a/tests/integration/test_cloud_run_candidate.py b/tests/integration/test_cloud_run_candidate.py new file mode 100644 index 000000000..401a7bea3 --- /dev/null +++ b/tests/integration/test_cloud_run_candidate.py @@ -0,0 +1,36 @@ +def test_cloud_run_candidate_health_routes(api_client): + health_response = api_client.get("/health") + assert health_response.status_code == 200, health_response.text + assert health_response.json() == {"status": "healthy"} + + liveness_response = api_client.get("/liveness-check") + assert liveness_response.status_code == 200, liveness_response.text + assert liveness_response.text == "OK" + + readiness_response = api_client.get("/readiness-check") + assert readiness_response.status_code == 200, readiness_response.text + assert readiness_response.text == "OK" + + simulation_gateway_response = api_client.get("/simulation-gateway-check") + assert simulation_gateway_response.status_code == 200, ( + simulation_gateway_response.text + ) + assert simulation_gateway_response.json() == { + "status": "healthy", + "simulation_gateway": "healthy", + } + + +def test_cloud_run_candidate_metadata_and_policy( + api_client, +): + metadata_response = api_client.get("/us/metadata") + assert metadata_response.status_code == 200, metadata_response.text + metadata = metadata_response.json()["result"] + current_law_id = metadata["current_law_id"] + + policy_response = api_client.get(f"/us/policy/{current_law_id}") + assert policy_response.status_code == 200, policy_response.text + policy_payload = policy_response.json() + assert policy_payload["status"] == "ok" + assert policy_payload["result"]["id"] == current_law_id diff --git a/tests/integration/test_live_economy.py b/tests/integration/test_live_economy.py index c204c34d3..ac1f76d8f 100644 --- a/tests/integration/test_live_economy.py +++ b/tests/integration/test_live_economy.py @@ -14,13 +14,6 @@ def _load_reform_payload(filename: str) -> dict: ) -def _pick_region(metadata: dict) -> str: - for region in metadata["economy_options"]["region"]: - if region["name"] == "us": - return "us" - return metadata["economy_options"]["region"][0]["name"] - - def _pick_time_period(metadata: dict) -> str: period_names = [ str(period["name"]) for period in metadata["economy_options"]["time_period"] @@ -44,20 +37,12 @@ def _pick_time_period(metadata: dict) -> str: return period_names[0] -def test_live_economy_smoke(api_client, integration_probe_id, poll_live_endpoint): - liveness_response = api_client.get("/liveness-check") - assert liveness_response.status_code == 200 - - readiness_response = api_client.get("/readiness-check") - assert readiness_response.status_code == 200 - +def test_live_utah_macro_reform(api_client, integration_probe_id, poll_live_endpoint): metadata_response = api_client.get("/us/metadata") metadata_response.raise_for_status() metadata = metadata_response.json()["result"] - current_law_id = metadata["current_law_id"] - region = _pick_region(metadata) - time_period = _pick_time_period(metadata) + test_year = _pick_time_period(metadata) policy_response = api_client.post( "/us/policy", @@ -69,36 +54,6 @@ def test_live_economy_smoke(api_client, integration_probe_id, poll_live_endpoint payload = poll_live_endpoint( api_client, f"/us/economy/{policy_id}/over/{current_law_id}", - { - "region": region, - "time_period": time_period, - "staging_probe": f"{integration_probe_id}-smoke", - }, - route_name="economy", - ) - - assert payload["status"] == "ok", payload - assert payload["result"] is not None, payload - assert "budget" in payload["result"], payload - - -def test_live_utah_macro_reform(api_client, integration_probe_id, poll_live_endpoint): - default_policy_id = 2 - - metadata_response = api_client.get("/us/metadata") - metadata_response.raise_for_status() - test_year = _pick_time_period(metadata_response.json()["result"]) - - policy_response = api_client.post( - "/us/policy", - json=_load_reform_payload("utah_reform.json"), - ) - assert policy_response.status_code in (200, 201) - policy_id = policy_response.json()["result"]["policy_id"] - - payload = poll_live_endpoint( - api_client, - f"/us/economy/{policy_id}/over/{default_policy_id}", { "region": "ut", "time_period": test_year, diff --git a/tests/unit/data/test_remote_database_config.py b/tests/unit/data/test_remote_database_config.py new file mode 100644 index 000000000..33ce245a4 --- /dev/null +++ b/tests/unit/data/test_remote_database_config.py @@ -0,0 +1,34 @@ +import os + +os.environ.setdefault("FLASK_DEBUG", "1") + +from policyengine_api.data.data import get_remote_database_config + + +def test_remote_database_config_defaults_to_current_production_values(monkeypatch): + monkeypatch.delenv("POLICYENGINE_DB_INSTANCE_CONNECTION_NAME", raising=False) + monkeypatch.delenv("POLICYENGINE_DB_USER", raising=False) + monkeypatch.delenv("POLICYENGINE_DB_NAME", raising=False) + + assert get_remote_database_config() == { + "instance_connection_name": "policyengine-api:us-central1:policyengine-api-data", + "db_user": "policyengine", + "db_name": "policyengine", + } + + +def test_remote_database_config_can_target_non_production_db(monkeypatch): + monkeypatch.setenv( + "POLICYENGINE_DB_INSTANCE_CONNECTION_NAME", + "policyengine-api-staging:us-central1:policyengine-api-data-staging", + ) + monkeypatch.setenv("POLICYENGINE_DB_USER", "policyengine_staging") + monkeypatch.setenv("POLICYENGINE_DB_NAME", "policyengine_staging") + + assert get_remote_database_config() == { + "instance_connection_name": ( + "policyengine-api-staging:us-central1:policyengine-api-data-staging" + ), + "db_user": "policyengine_staging", + "db_name": "policyengine_staging", + } diff --git a/tests/unit/routes/test_migration_context_logging.py b/tests/unit/routes/test_migration_context_logging.py index 204b284eb..29f32a72d 100644 --- a/tests/unit/routes/test_migration_context_logging.py +++ b/tests/unit/routes/test_migration_context_logging.py @@ -19,6 +19,17 @@ def readiness_check(): return app +def _app_without_migration_logging(): + app = Flask(__name__) + app.config["TESTING"] = True + + @app.route("/fallback") + def fallback(): + return Response("fallback", status=200, mimetype="text/plain") + + return app + + def test_request_logging_includes_migration_context(): with patch("policyengine_api.migration_logging.logger") as mock_logger: response = _app().test_client().get("/readiness-check") @@ -53,3 +64,47 @@ def test_request_logging_runs_for_asgi_fallback_routes(): log_payload = mock_logger.log_struct.call_args.args[0] assert log_payload["path"] == "/readiness-check" assert log_payload["migration"]["route_group"] == "health" + + +def test_request_logging_runs_for_fastapi_native_health_routes(monkeypatch): + monkeypatch.setenv("API_HOST_BACKEND", "cloud_run") + + with patch("policyengine_api.migration_logging.logger") as mock_logger: + response = TestClient(create_asgi_app(_app())).get( + "/health", + headers={"X-Request-ID": "request-123"}, + ) + + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + log_payload = mock_logger.log_struct.call_args.args[0] + assert log_payload["message"] == "API request served" + assert log_payload["request_id"] == "request-123" + assert log_payload["path"] == "/health" + assert log_payload["status_code"] == 200 + assert log_payload["country_id"] is None + assert log_payload["migration"]["route_group"] == "health" + assert log_payload["migration"]["api_host_backend"] == "cloud_run" + assert log_payload["migration"]["route_impl"] == "flask_fallback" + + +def test_fastapi_native_logging_failure_does_not_change_response(): + with patch( + "policyengine_api.migration_logging.logger.log_struct", + side_effect=RuntimeError("logging failed"), + ): + response = TestClient(create_asgi_app(_app())).get("/health") + + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + + +def test_asgi_shell_does_not_log_unregistered_flask_fallback_routes(): + with patch("policyengine_api.migration_logging.logger") as mock_logger: + response = TestClient(create_asgi_app(_app_without_migration_logging())).get( + "/fallback" + ) + + assert response.status_code == 200 + assert response.content == b"fallback" + mock_logger.log_struct.assert_not_called() diff --git a/tests/unit/test_asgi_factory.py b/tests/unit/test_asgi_factory.py index 015b62ae7..35503edbe 100644 --- a/tests/unit/test_asgi_factory.py +++ b/tests/unit/test_asgi_factory.py @@ -1,6 +1,7 @@ import importlib import json import sys +from unittest.mock import patch import pytest from fastapi.testclient import TestClient @@ -161,6 +162,38 @@ def test_health_route_uses_same_reflected_cors_policy(): assert response.headers["vary"] == "Origin" +def test_public_simulation_gateway_health_probe_checks_gateway(): + client = TestClient(create_asgi_app(create_test_wsgi_app())) + + with patch( + "policyengine_api.libs.simulation_api_modal.SimulationAPIModal" + ) as simulation_api: + simulation_api.return_value.health_check.return_value = True + + response = client.get("/simulation-gateway-check") + + assert response.status_code == 200 + assert response.json() == { + "status": "healthy", + "simulation_gateway": "healthy", + } + simulation_api.assert_called_once_with() + simulation_api.return_value.health_check.assert_called_once_with() + + +def test_public_simulation_gateway_health_probe_reports_failure(): + client = TestClient(create_asgi_app(create_test_wsgi_app())) + + with patch( + "policyengine_api.libs.simulation_api_modal.SimulationAPIModal" + ) as simulation_api: + simulation_api.return_value.health_check.return_value = False + + response = client.get("/simulation-gateway-check") + + assert response.status_code == 503 + + def test_existing_health_and_specification_paths_fall_back_to_flask(): client = TestClient(create_asgi_app(create_test_wsgi_app())) diff --git a/tests/unit/test_cloud_run_deploy_scripts.py b/tests/unit/test_cloud_run_deploy_scripts.py new file mode 100644 index 000000000..fcfeaa075 --- /dev/null +++ b/tests/unit/test_cloud_run_deploy_scripts.py @@ -0,0 +1,449 @@ +from __future__ import annotations + +import os +import re +import subprocess +from pathlib import Path + + +REPO = Path(__file__).resolve().parents[2] +PRODUCTION_CLOUD_SQL_INSTANCE = "policyengine-api:us-central1:policyengine-api-data" +DEDICATED_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT = ( + "policyengine-api-cr-runtime@policyengine-api.iam.gserviceaccount.com" +) +CLOUD_RUN_SECRET_MAPPINGS = { + "POLICYENGINE_DB_PASSWORD": "policyengine-api-prod-db-password:latest", + "POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN": ( + "policyengine-api-prod-github-microdata-token:latest" + ), + "ANTHROPIC_API_KEY": "policyengine-api-prod-anthropic-api-key:latest", + "OPENAI_API_KEY": "policyengine-api-prod-openai-api-key:latest", + "HUGGING_FACE_TOKEN": "policyengine-api-prod-hugging-face-token:latest", +} +RAW_CLOUD_RUN_SECRET_VALUES = ( + "raw-db-secret-value", + "raw-github-secret-value", + "raw-anthropic-secret-value", + "raw-openai-secret-value", + "raw-hf-secret-value", +) + + +def _script_env(**overrides: str) -> dict[str, str]: + env = { + "HOME": os.environ.get("HOME", ""), + "PATH": os.environ["PATH"], + "CLOUD_RUN_DRY_RUN": "1", + } + env.update(overrides) + return env + + +def _required_runtime_env() -> dict[str, str]: + return { + "POLICYENGINE_DB_PASSWORD": "raw-db-secret-value", + "POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN": ("raw-github-secret-value"), + "ANTHROPIC_API_KEY": "raw-anthropic-secret-value", + "OPENAI_API_KEY": "raw-openai-secret-value", + "HUGGING_FACE_TOKEN": "raw-hf-secret-value", + "SIMULATION_API_URL": "https://simulation.example.test", + "GATEWAY_AUTH_ISSUER": "https://issuer.example.test", + "GATEWAY_AUTH_AUDIENCE": "simulation-gateway", + "GATEWAY_AUTH_CLIENT_ID": "client-id", + "GATEWAY_AUTH_CLIENT_SECRET_RESOURCE": ( + "projects/policyengine-api/secrets/gateway-client-secret/versions/latest" + ), + } + + +def _run_script(path: str, env: dict[str, str]) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["bash", path], + cwd=REPO, + env=env, + text=True, + capture_output=True, + check=False, + ) + + +def _push_workflow() -> str: + return (REPO / ".github/workflows/push.yml").read_text(encoding="utf-8") + + +def _sync_secrets_workflow() -> str: + return (REPO / ".github/workflows/sync-cloud-run-secrets.yml").read_text( + encoding="utf-8" + ) + + +def _sync_secrets_script() -> str: + return (REPO / ".github/scripts/sync_cloud_run_secrets.sh").read_text( + encoding="utf-8" + ) + + +def _workflow_job_block(workflow: str, job_name: str) -> str: + match = re.search( + rf"^ {re.escape(job_name)}:\n(?P.*?)(?=^ [a-zA-Z0-9_-]+:|\Z)", + workflow, + flags=re.MULTILINE | re.DOTALL, + ) + assert match is not None, f"Missing workflow job {job_name}" + return match.group("body") + + +def _multiline_run_block_lengths(workflow_path: Path) -> list[tuple[int, int]]: + lines = workflow_path.read_text(encoding="utf-8").splitlines() + blocks: list[tuple[int, int]] = [] + + for line_index, line in enumerate(lines): + match = re.match(r"^(\s*)run: \|", line) + if match is None: + continue + + indent = len(match.group(1)) + body_lines = 0 + for body_line in lines[line_index + 1 :]: + if body_line.strip() and len(body_line) - len(body_line.lstrip()) <= indent: + break + if body_line.strip(): + body_lines += 1 + blocks.append((line_index + 1, body_lines)) + + return blocks + + +def test_cloud_run_startup_uses_asgi_entrypoint(): + start_script = (REPO / "gcp/cloud_run/start.sh").read_text(encoding="utf-8") + + assert "policyengine_api.asgi:app" in start_script + assert "policyengine_api.api" not in start_script + + +def test_cloud_run_startup_script_is_shell_syntax_valid(): + result = subprocess.run( + ["bash", "-n", "gcp/cloud_run/start.sh"], + cwd=REPO, + text=True, + capture_output=True, + check=False, + ) + + assert result.returncode == 0, result.stderr + + +def test_cloud_run_dockerfile_runs_startup_with_bash(): + dockerfile = (REPO / "gcp/cloud_run/Dockerfile").read_text(encoding="utf-8") + + assert 'CMD ["/bin/bash", "/app/start.sh"]' in dockerfile + assert 'CMD ["/bin/sh", "/app/start.sh"]' not in dockerfile + + +def test_cloud_run_startup_supervises_redis_and_uvicorn_children(): + start_script = (REPO / "gcp/cloud_run/start.sh").read_text(encoding="utf-8") + + assert "#!/usr/bin/env bash" in start_script + assert 'redis_pid="$!"' in start_script + assert 'uvicorn_pid="$!"' in start_script + assert "REDIS_READY_MAX_ATTEMPTS" in start_script + assert "Redis exited before becoming ready" in start_script + assert "Redis did not become ready" in start_script + assert "Redis exited; stopping Cloud Run container" in start_script + assert "Uvicorn exited; stopping Cloud Run container" in start_script + assert 'wait -n "$redis_pid" "$uvicorn_pid"' in start_script + assert 'kill -0 "$redis_pid"' in start_script + assert 'kill -0 "$uvicorn_pid"' in start_script + assert "trap 'shutdown; exit 143' INT TERM" in start_script + assert "pkill" not in start_script + assert re.search(r"(?m)^ *wait 2>/dev/null", start_script) is None + + +def test_validate_cloud_run_deploy_env_reports_missing_runtime_config(): + result = _run_script( + ".github/scripts/validate_cloud_run_deploy_env.sh", + _script_env(), + ) + + assert result.returncode == 1 + assert "Missing required Cloud Run deployment configuration" in result.stderr + assert "SIMULATION_API_URL" in result.stderr + assert "GATEWAY_AUTH_CLIENT_SECRET_RESOURCE" in result.stderr + assert "POLICYENGINE_DB_PASSWORD" not in result.stderr + + +def test_build_cloud_run_image_dry_run_uses_cloud_run_dockerfile(): + dockerignore = REPO / "gcp/cloud_run/Dockerfile.dockerignore" + + assert dockerignore.exists() + assert "policyengine_api/data/*.db" in dockerignore.read_text(encoding="utf-8") + + result = _run_script( + ".github/scripts/build_cloud_run_image.sh", + _script_env( + GITHUB_SHA="1234567890abcdef", + GITHUB_RUN_NUMBER="42", + ), + ) + + assert result.returncode == 0, result.stderr + assert "gcp/cloud_run/Dockerfile" in result.stdout + assert "docker push" in result.stdout + assert ( + "us-central1-docker.pkg.dev/policyengine-api/policyengine-api/" + "policyengine-api:1234567890abcdef" + ) in result.stdout + + +def test_deploy_cloud_run_candidate_dry_run_never_shifts_traffic(): + result = _run_script( + ".github/scripts/deploy_cloud_run_candidate.sh", + _script_env( + **_required_runtime_env(), + CLOUD_RUN_IMAGE_URI="us-central1-docker.pkg.dev/project/repo/api:sha", + CLOUD_RUN_TAG="stage3-test", + ), + ) + + assert result.returncode == 0, result.stderr + assert "gcloud run deploy" in result.stdout + assert "--no-traffic" in result.stdout + assert "stage3-test" in result.stdout + assert ( + f"--service-account {DEDICATED_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT}" + in result.stdout + ) + assert f"--add-cloudsql-instances {PRODUCTION_CLOUD_SQL_INSTANCE}" in result.stdout + assert ( + f"POLICYENGINE_DB_INSTANCE_CONNECTION_NAME={PRODUCTION_CLOUD_SQL_INSTANCE}" + in result.stdout + ) + assert "--set-secrets" in result.stdout + for env_name, secret_ref in CLOUD_RUN_SECRET_MAPPINGS.items(): + assert f"{env_name}={secret_ref}" in result.stdout + for raw_secret_value in RAW_CLOUD_RUN_SECRET_VALUES: + assert raw_secret_value not in result.stdout + assert "CLOUD_RUN_INTERNAL_PROBES" not in result.stdout + assert "--to-latest" not in result.stdout + assert "update-traffic" not in result.stdout + + +def test_get_cloud_run_tag_url_dry_run_uses_candidate_tag(): + result = _run_script( + ".github/scripts/get_cloud_run_tag_url.sh", + _script_env(CLOUD_RUN_TAG="stage3-test", CLOUD_RUN_SERVICE="policyengine-api"), + ) + + assert result.returncode == 0, result.stderr + assert result.stdout.strip() == ( + "https://stage3-test---policyengine-api-dry-run.a.run.app" + ) + + +def test_get_cloud_run_service_url_dry_run_uses_service_url(): + result = _run_script( + ".github/scripts/get_cloud_run_service_url.sh", + _script_env(CLOUD_RUN_SERVICE="policyengine-api"), + ) + + assert result.returncode == 0, result.stderr + assert result.stdout.strip() == "https://policyengine-api-dry-run.a.run.app" + + +def test_promote_cloud_run_tag_dry_run_shifts_service_traffic_to_tag(): + result = _run_script( + ".github/scripts/promote_cloud_run_tag.sh", + _script_env(CLOUD_RUN_TAG="stage3-test", CLOUD_RUN_SERVICE="policyengine-api"), + ) + + assert result.returncode == 0, result.stderr + assert "gcloud run services update-traffic policyengine-api" in result.stdout + assert "--to-tags stage3-test=100" in result.stdout + assert "--to-latest" not in result.stdout + + +def test_push_workflow_tests_app_engine_and_cloud_run_staging_tracks(): + workflow = _push_workflow() + app_engine_tests = _workflow_job_block(workflow, "integration-tests-staging") + cloud_run_tests = _workflow_job_block( + workflow, + "integration-tests-staging-cloud-run", + ) + cloud_run_promotion = _workflow_job_block(workflow, "promote-cloud-run-staging") + production_gate = _workflow_job_block( + workflow, + "ensure-production-model-version-aligns-with-sim-api", + ) + live_test_command = ( + "python -m pytest tests/integration/test_live_calculate.py " + "tests/integration/test_live_economy.py " + "tests/integration/test_live_budget_window_cache.py -v" + ) + + assert live_test_command in app_engine_tests + assert live_test_command in cloud_run_tests + assert "API_BASE_URL: ${{ needs.deploy-staging.outputs.url }}" in app_engine_tests + assert ( + "API_BASE_URL: ${{ needs.deploy-cloud-run-staging.outputs.url }}" + in cloud_run_tests + ) + assert "- integration-tests-staging" in production_gate + assert "- promote-cloud-run-staging" in production_gate + assert "- integration-tests-staging-cloud-run" not in production_gate + assert "- integration-tests-staging" in cloud_run_promotion + assert "- integration-tests-staging-cloud-run" in cloud_run_promotion + assert "bash .github/scripts/promote_cloud_run_tag.sh" in cloud_run_promotion + assert "bash .github/scripts/get_cloud_run_service_url.sh" in cloud_run_promotion + + +def test_push_workflow_deploys_app_engine_production_candidate_before_staging_gate(): + workflow = _push_workflow() + app_engine_candidate = _workflow_job_block(workflow, "deploy-production-candidate") + app_engine_promotion = _workflow_job_block(workflow, "promote-production") + docker_publish = _workflow_job_block(workflow, "docker") + cloud_run_production = _workflow_job_block(workflow, "deploy-cloud-run-candidate") + + assert "needs: deploy-staging" in app_engine_candidate + assert 'APP_ENGINE_PROMOTE: "0"' in app_engine_candidate + assert ( + "bash .github/scripts/promote_app_engine_version.sh" not in app_engine_candidate + ) + assert "- deploy-production-candidate" in app_engine_promotion + assert ( + "- ensure-production-model-version-aligns-with-sim-api" in app_engine_promotion + ) + assert "bash .github/scripts/promote_app_engine_version.sh" in app_engine_promotion + assert ( + "APP_ENGINE_VERSION: " + "${{ needs.deploy-production-candidate.outputs.version }}" + in app_engine_promotion + ) + assert ( + "needs: ensure-production-model-version-aligns-with-sim-api" + in cloud_run_production + ) + assert "needs: promote-production" in docker_publish + assert "stage3-prod-" in cloud_run_production + assert "Build and push Cloud Run image" not in cloud_run_production + + +def test_push_workflow_uses_dedicated_cloud_run_runtime_service_account(): + workflow = _push_workflow() + cloud_run_staging = _workflow_job_block(workflow, "deploy-cloud-run-staging") + cloud_run_production = _workflow_job_block(workflow, "deploy-cloud-run-candidate") + + runtime_account_secret = ( + "CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT: " + "${{ secrets.GCP_CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT }}" + ) + deploy_account_secret = ( + "CLOUD_RUN_RUNTIME_SERVICE_ACCOUNT: ${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}" + ) + + assert runtime_account_secret in cloud_run_staging + assert runtime_account_secret in cloud_run_production + assert deploy_account_secret not in cloud_run_staging + assert deploy_account_secret not in cloud_run_production + + +def test_push_workflow_does_not_pass_raw_secrets_to_cloud_run_deploy_jobs(): + workflow = _push_workflow() + cloud_run_staging = _workflow_job_block(workflow, "deploy-cloud-run-staging") + cloud_run_production = _workflow_job_block(workflow, "deploy-cloud-run-candidate") + raw_secret_envs = ( + "POLICYENGINE_DB_PASSWORD: ${{ secrets.POLICYENGINE_DB_PASSWORD }}", + ( + "POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: " + "${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN }}" + ), + "ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}", + "OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}", + "HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}", + ) + + for raw_secret_env in raw_secret_envs: + assert raw_secret_env not in cloud_run_staging + assert raw_secret_env not in cloud_run_production + + +def test_sync_cloud_run_secrets_workflow_is_manual_and_environment_gated(): + workflow = _sync_secrets_workflow() + + assert "workflow_dispatch:" in workflow + assert "pull_request:" not in workflow + assert "push:" not in workflow + assert "environment: production" in workflow + assert "id-token: write" in workflow + assert "github.ref != 'refs/heads/master'" in workflow + assert "google-github-actions/auth@v2" in workflow + assert ( + 'workload_identity_provider: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}"' + in workflow + ) + assert 'service_account: "${{ secrets.GCP_DEPLOY_SERVICE_ACCOUNT }}"' in workflow + + +def test_sync_cloud_run_secrets_workflow_writes_expected_secret_versions(): + workflow = _sync_secrets_workflow() + script = _sync_secrets_script() + + assert "run: bash .github/scripts/sync_cloud_run_secrets.sh" in workflow + assert "set +x" in script + assert "--data-file=-" in script + assert "gcloud secrets add-iam-policy-binding" in script + assert "roles/secretmanager.secretAccessor" in script + for env_name, secret_ref in CLOUD_RUN_SECRET_MAPPINGS.items(): + secret_name = secret_ref.removesuffix(":latest") + assert f"{env_name}: ${{{{ secrets.{env_name} }}}}" in workflow + assert f"sync_secret {env_name} {secret_name}" in script + + +def test_sync_cloud_run_secrets_script_is_shell_syntax_valid(): + result = subprocess.run( + ["bash", "-n", ".github/scripts/sync_cloud_run_secrets.sh"], + cwd=REPO, + text=True, + capture_output=True, + check=False, + ) + + assert result.returncode == 0, result.stderr + + +def test_changelog_fragment_script_is_shell_syntax_valid(): + result = subprocess.run( + ["bash", "-n", ".github/scripts/check_changelog_fragment.sh"], + cwd=REPO, + text=True, + capture_output=True, + check=False, + ) + + assert result.returncode == 0, result.stderr + + +def test_workflows_do_not_inline_long_run_blocks(): + oversized_blocks = [] + for workflow_path in (REPO / ".github/workflows").glob("*.y*ml"): + for line_number, body_lines in _multiline_run_block_lengths(workflow_path): + if body_lines > 4: + oversized_blocks.append( + f"{workflow_path.relative_to(REPO)}:{line_number} has " + f"{body_lines} inline run lines" + ) + + assert oversized_blocks == [] + + +def test_push_workflow_promotes_production_cloud_run_after_candidate_smoke(): + workflow = _push_workflow() + cloud_run_production = _workflow_job_block(workflow, "deploy-cloud-run-candidate") + smoke_index = cloud_run_production.index( + "python -m pytest tests/integration/test_cloud_run_candidate.py -v" + ) + promote_index = cloud_run_production.index( + "bash .github/scripts/promote_cloud_run_tag.sh" + ) + + assert smoke_index < promote_index + assert "bash .github/scripts/get_cloud_run_service_url.sh" in cloud_run_production diff --git a/tests/unit/test_migration_flags.py b/tests/unit/test_migration_flags.py index b44a4edf0..5367a4494 100644 --- a/tests/unit/test_migration_flags.py +++ b/tests/unit/test_migration_flags.py @@ -57,6 +57,8 @@ def test_invalid_migration_flag_raises(monkeypatch): ("path", "expected_group"), [ ("/", "home"), + ("/health", "health"), + ("/simulation-gateway-check", "health"), ("/readiness-check", "health"), ("/us/metadata", "metadata"), ("/us/policy/1", "policy"),