From 012c81966e266bff2f3f210b3ff8bda4df169ec2 Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Thu, 25 Jun 2026 14:39:57 +0100 Subject: [PATCH 1/4] switch to opus --- services/global_chat/config.yaml | 2 +- services/job_chat/rag.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index a8714f58..328d5ef3 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -8,7 +8,7 @@ router: # Planner configuration (complex orchestration) planner: - model: "claude-sonnet" + model: "claude-opus" max_tokens: 8192 temperature: 1.0 max_tool_calls: 10 diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 9a6b6c39..13caa0de 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,5 +1,5 @@ config_version: 1.0 -model: "claude-sonnet" +model: "claude-opus" llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8 From 8619dce8602642269755ecd945c7ae33cbf9cdcb Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Thu, 25 Jun 2026 18:19:33 +0100 Subject: [PATCH 2/4] adjust tokens effort --- services/global_chat/config.yaml | 3 +-- services/global_chat/planner.py | 13 +++++++++---- services/job_chat/job_chat.py | 7 ++++++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index 328d5ef3..d2f26a51 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -9,6 +9,5 @@ router: # Planner configuration (complex orchestration) planner: model: "claude-opus" - max_tokens: 8192 - temperature: 1.0 + max_tokens: 24576 max_tool_calls: 10 diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py index 7eb3bd9b..ee4fe80f 100644 --- a/services/global_chat/planner.py +++ b/services/global_chat/planner.py @@ -6,6 +6,7 @@ from typing import List, Dict, Optional from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass +import httpx from anthropic import Anthropic import sentry_sdk @@ -59,9 +60,8 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None): self.tools = TOOL_DEFINITIONS planner_config = config_loader.config.get("planner", {}) - self.model = resolve_model(planner_config.get("model", "claude-sonnet")) - self.max_tokens = planner_config.get("max_tokens", 8192) - self.temperature = planner_config.get("temperature", 1.0) + self.model = resolve_model(planner_config.get("model", "claude-opus")) + self.max_tokens = planner_config.get("max_tokens", 24576) self.max_tool_calls = planner_config.get("max_tool_calls", 20) self.current_yaml: Optional[str] = None @@ -285,6 +285,7 @@ def _call_api(self, system_prompt, messages, stream): messages=messages, tools=self.tools, thinking={"type": "adaptive"}, + output_config={"effort": "medium"}, ) as stream_obj: for event in stream_obj: if event.type == "content_block_delta": @@ -299,7 +300,11 @@ def _call_api(self, system_prompt, messages, stream): messages=messages, tools=self.tools, thinking={"type": "adaptive"}, - output_config={"effort": "high"}, + output_config={"effort": "medium"}, + # Per-request timeout (same values as the SDK default): + # required for non-streaming calls with max_tokens > ~21k, + # which the SDK otherwise rejects. + timeout=httpx.Timeout(600.0, connect=5.0), betas=["context-management-2025-06-27"], context_management={ "edits": [ diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index 31c11440..e58d7377 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -4,6 +4,7 @@ import yaml from typing import List, Optional, Dict, Any from dataclasses import dataclass +import httpx from anthropic import ( Anthropic, APIConnectionError, @@ -138,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Payload": @dataclass class ChatConfig: model: str = _MODEL - max_tokens: int = 16384 + max_tokens: int = 32768 api_key: Optional[str] = None @@ -288,6 +289,10 @@ def generate( max_tokens=self.config.max_tokens, messages=prompt, model=self.config.model, system=system_message, thinking={"type": "adaptive"}, output_config=output_config, + # Per-request timeout (same values as the SDK default): + # required for non-streaming calls with max_tokens > ~21k, + # which the SDK otherwise rejects. + timeout=httpx.Timeout(600.0, connect=5.0), **tool_kwargs ) message = self.client.messages.create(**create_kwargs) From 64fe20d2b68a004ae56525886fecf39240d0561d Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Thu, 25 Jun 2026 18:35:37 +0100 Subject: [PATCH 3/4] add changeset --- .changeset/twelve-lemons-tan.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/twelve-lemons-tan.md diff --git a/.changeset/twelve-lemons-tan.md b/.changeset/twelve-lemons-tan.md new file mode 100644 index 00000000..a0e3bd98 --- /dev/null +++ b/.changeset/twelve-lemons-tan.md @@ -0,0 +1,5 @@ +--- +"apollo": minor +--- + +upgrade to opus in planner and job chat From 788f2c3f390ef55bff4a61659a925040384c9058 Mon Sep 17 00:00:00 2001 From: "Hanna Paasivirta (OpenFn)" Date: Thu, 25 Jun 2026 18:40:26 +0100 Subject: [PATCH 4/4] adjust tokens --- services/job_chat/job_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index e58d7377..112a49c8 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -139,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Payload": @dataclass class ChatConfig: model: str = _MODEL - max_tokens: int = 32768 + max_tokens: int = 24576 api_key: Optional[str] = None