From fe09a4e42c720b1ddb8bfce5dbb5280021df2c79 Mon Sep 17 00:00:00 2001 From: Xiangyi Li Date: Wed, 17 Jun 2026 17:40:36 -0400 Subject: [PATCH 1/2] chore: release v0.6.4 --- CHANGELOG.md | 13 ++++++++++++- pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25cf10449..f303e23b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,24 @@ ## [Unreleased] +## 0.6.4 — 2026-06-17 + +### Added +- **Environment and config as run-time axes on `bench eval run`.** `--state` + binds the environment (S-axis) per run — inline JSON, a registry + `name@version` resolved through the environment registry, or a manifest path + (takes precedence over `--environment-manifest`). `--config-override` overlays + the task config (C-axis) — inline JSON/YAML/TOML or `@file`, deep-merged into + each task's resolved config. `--config` also gains a `--run-config` alias. + (#790) + ### Changed - **`bench continue` is now `bench eval continue`.** The command (and its `continue-batch` companion) moved under the `eval` group, where it is now discoverable in `bench eval --help` alongside `run`/`adopt`. The original top-level `bench continue` / `bench continue-batch` remain as hidden, deprecated aliases (they print a deprecation notice) so existing scripts keep - working. + working. (#800) ## 0.6.3 — 2026-06-16 diff --git a/pyproject.toml b/pyproject.toml index 8c52dc683..c885a43ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "benchflow" -version = "0.6.3" +version = "0.6.4" description = "Multi-turn agent benchmarking with ACP — run any agent, any model, any provider." readme = "README.md" requires-python = ">=3.12" diff --git a/uv.lock b/uv.lock index ce2ae6183..229686336 100644 --- a/uv.lock +++ b/uv.lock @@ -288,7 +288,7 @@ wheels = [ [[package]] name = "benchflow" -version = "0.6.3" +version = "0.6.4" source = { editable = "." } dependencies = [ { name = "agent-client-protocol" }, From 1861cf5df20945cb1b653a0e45c98ce02bf4cf42 Mon Sep 17 00:00:00 2001 From: Xiangyi Li Date: Wed, 17 Jun 2026 17:47:07 -0400 Subject: [PATCH 2/2] docs(changelog): note content-addressed environment binding (#790) --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f303e23b9..24a74255b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,12 @@ the task config (C-axis) — inline JSON/YAML/TOML or `@file`, deep-merged into each task's resolved config. `--config` also gains a `--run-config` alias. (#790) +- **Content-addressed environment binding.** Registry environment resolution is + content-addressed — `env_hash = sha256(manifest)` — so a `name@version` + resolves to an exact, pinned environment that is recorded for replay; the + C-axis `--config-override` is likewise persisted with its content hash and the + applied patch. Every rollout is attributable to the precise world and config + it ran against. (#790) ### Changed - **`bench continue` is now `bench eval continue`.** The command (and its