Skip to content
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion .github/workflows/run-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ on:
- swtbench
- commit0
- swebenchmultimodal
- harbor
- terminalbench
- programbench
sdk_ref:
Expand Down Expand Up @@ -77,6 +78,13 @@ on:
required: false
default: main
type: string
harbor_config_json:
description: >-
JSON config for Harbor mode (target, target_type, adapter_repo,
adapter_ref, adapter_path, agent, agent_env, agent_kwargs, extra_args)
required: false
default: ''
type: string
instance_ids:
description: >-
Comma-separated instance IDs to evaluate.
Expand Down Expand Up @@ -177,6 +185,7 @@ jobs:
TOOL_PRESET: ${{ github.event.inputs.tool_preset || 'default' }}
AGENT_TYPE: ${{ github.event.inputs.agent_type || 'default' }}
PARTIAL_ARCHIVE_URL: ${{ github.event.inputs.partial_archive_url || 'N/A' }}
HARBOR_CONFIG_JSON: ${{ github.event.inputs.harbor_config_json || 'N/A' }}
LABEL_NAME: ${{ github.event.label.name || 'N/A' }}
run: |
echo "=== Workflow Parameters ==="
Expand All @@ -202,6 +211,7 @@ jobs:
echo "tool_preset: $TOOL_PRESET"
echo "agent_type: $AGENT_TYPE"
echo "partial_archive_url: $PARTIAL_ARCHIVE_URL"
echo "harbor_config_json: $HARBOR_CONFIG_JSON"
echo ""
echo "=== Environment Variables ==="
echo "EVAL_REPO: $EVAL_REPO"
Expand Down Expand Up @@ -413,6 +423,7 @@ jobs:
TOOL_PRESET: ${{ github.event.inputs.tool_preset || 'default' }}
AGENT_TYPE: ${{ github.event.inputs.agent_type || 'default' }}
PARTIAL_ARCHIVE_URL: ${{ github.event.inputs.partial_archive_url || '' }}
HARBOR_CONFIG_JSON: ${{ github.event.inputs.harbor_config_json || '' }}
TRIGGERED_BY: ${{ github.actor }}
run: |
# Normalize instance_ids: strip all spaces
Expand All @@ -438,8 +449,9 @@ jobs:
--arg tool_preset "$TOOL_PRESET" \
--arg agent_type "$AGENT_TYPE" \
--arg partial_archive_url "$PARTIAL_ARCHIVE_URL" \
--arg harbor_config_json "$HARBOR_CONFIG_JSON" \
--arg triggered_by "$TRIGGERED_BY" \
'{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, extensions_branch: $extensions, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}')
'{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, extensions_branch: $extensions, benchmark: $benchmark, harbor_config_json: $harbor_config_json, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}')
RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
-H "Authorization: token $DISPATCH_TOKEN" \
-H "Accept: application/vnd.github+json" \
Expand Down
Loading