From 10728681bada14522c6697cba44896e0446e9084 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 29 May 2026 13:06:39 +0200 Subject: [PATCH 1/8] Add skilltoolset and new system_prompt_instructions to Tool and Toolset --- agent_skills_plan.md | 187 ++++++++++++++++++ haystack/components/agents/agent.py | 62 ++++++ haystack/tools/__init__.py | 3 + haystack/tools/tool.py | 18 ++ haystack/tools/toolset.py | 16 ++ .../add-skill-toolset-7f3c1a9e2b4d6c8a.yaml | 27 +++ test/components/agents/test_agent.py | 93 ++++++++- test/components/agents/test_agent_hitl.py | 1 + test/components/generators/chat/test_azure.py | 1 + .../generators/chat/test_azure_responses.py | 1 + .../generators/chat/test_hugging_face_api.py | 3 + .../chat/test_hugging_face_local.py | 2 + .../components/generators/chat/test_openai.py | 1 + .../generators/chat/test_openai_responses.py | 1 + test/tools/test_tool.py | 18 ++ test/tools/test_toolset.py | 5 + 16 files changed, 435 insertions(+), 4 deletions(-) create mode 100644 agent_skills_plan.md create mode 100644 releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml diff --git a/agent_skills_plan.md b/agent_skills_plan.md new file mode 100644 index 0000000000..27af34ce7f --- /dev/null +++ b/agent_skills_plan.md @@ -0,0 +1,187 @@ +# Plan: Skills support for the Haystack Agent + +Goal: let the Haystack `Agent` discover and read filesystem **Skills** the way Claude +Code / Codex do — progressive disclosure of expert instructions — using a small set of +pre-built tools plus a generic mechanism for tools to contribute system-prompt text. + +## Background — what we're replicating + +Claude Code / Codex Skills use **progressive disclosure**: + +| Level | What's loaded | When | +|---|---|---| +| 1 — Metadata | each skill's `name` + `description` | always, up front | +| 2 — Instructions | the full `SKILL.md` body | when a skill is triggered | +| 3 — Bundled files | `reference/*.md`, examples, etc. | on demand, as the body references them | + +A skill is a directory: + +``` +skills/ + pdf-forms/ + SKILL.md # YAML frontmatter (name, description) + markdown body + reference/forms.md +``` + +Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on demand. + +## Decisions (locked) + +- **Two tools**, both pure/stateless: `load_skill`, `read_skill_file`. +- **No script execution** in v1 (no Level-3 execution tool). +- **No search-based discovery** in v1 — Level-1 metadata is injected into the system prompt. +- **No `state_schema` additions.** Progressive disclosure works through message history: + a `load_skill` result is a tool-result message that persists in `state["messages"]`, so + the agent already "remembers" what it loaded. No `inputs_from_state` / `outputs_to_state`. + (`tool_call_counts`, already tracked internally, covers observability of how often + `load_skill` fired.) +- **Filesystem source only.** +- **Location:** `haystack/tools/skills/`. +- **System-prompt injection: Option C** — a generic contribution hook on `Tool`/`Toolset`, + consumed automatically by the `Agent`. Chosen over a manual concatenation helper + (Option A) because A complicates Agent serialization and can't compose multiple augments. + +## How MCP does the equivalent (reference) + +MCP servers return an optional top-level `instructions` string in their `initialize` +response. The spec frames it as a hint clients MAY add to the system prompt to explain +the server's tools as a whole. It is **server-level, not per-tool** — individual tools +only carry a `description`. So the container supplies the system-prompt instructions; the +tools just describe themselves. This validates the "top-level wins, skip member tools" rule. + +## Component 1 — `system_prompt_contribution()` hook (generic, reusable) + +Add an optional method to both base classes, default `None`: + +```python +# haystack/tools/tool.py (on Tool) +# haystack/tools/toolset.py (on Toolset) +def system_prompt_contribution(self) -> str | None: + """Text this tool/toolset wants appended to the Agent's system prompt. None by default.""" + return None +``` + +- `Tool` also gains an optional `system_prompt: str | None = None` dataclass field so a + plain `Tool` can carry instructions without subclassing; `system_prompt_contribution()` + returns it by default (subclasses may override for dynamism). The field round-trips through + `to_dict`/`from_dict` and is always present in the serialized dict (existing tool + serialization snapshots were updated to include `system_prompt: null`). +- `Toolset` subclasses (like `SkillToolset`) override the method. + +### Agent consumption — `_initialize_fresh_execution` (`agent.py:~654`) + +Right after `selected_tools = self._select_tools(tools)`, collect contributions and merge +them into the system message: + +```python +selected_tools = self._select_tools(tools) +contributions = _collect_system_prompt_contributions(selected_tools) +if contributions: + messages = _merge_system_prompt_contributions(messages, contributions) +``` + +Collection rules (a free function, unit-testable): + +- Recurse through `list`, `_ToolsetWrapper` (descend into `.toolsets` — this is the + `toolset_a + toolset_b` compose path), `Toolset`, `Tool`. +- **Top-level wins:** if a `Toolset` returns a contribution, use it and DO NOT descend into + its member tools. Only if it returns `None` do we gather member tools' contributions. +- Bare `Tool` passed directly contributes its own. + +Merge rules: + +- Join contributions with `\n\n`. +- If `messages[0]` is a system message (the rendered `system_prompt`, or a user-supplied + system message), append the contribution text to it. +- Otherwise prepend a new system message built from the contributions. +- Collect from `selected_tools` (respects per-run `run(tools=...)` filtering) and inject + **after** Jinja rendering — never into the template string — so skill text containing + `{{`/`{%` can't break `ChatPromptBuilder`. + +### Why this keeps Agent serialization clean + +Nothing about the augmentation is stored on the `Agent`. The system-prompt text is +regenerated from the toolset on every run, so `Agent.to_dict()` only serializes the +toolset (which serializes its `skills_dir`). Multiple augmenting toolsets compose via +`_ToolsetWrapper` and each contributes independently. + +## Component 2 — `SkillToolset(Toolset)` + +`haystack/tools/skills/skill_toolset.py` + +```python +@dataclass +class SkillMeta: + name: str # from frontmatter; falls back to directory name + description: str # from frontmatter + path: Path # the skill directory + +class SkillToolset(Toolset): + def __init__(self, skills_dir: str | Path) -> None: + self.skills_dir = Path(skills_dir) + self._skills: dict[str, SkillMeta] = self._scan() # frontmatter-only, cheap, in __init__ + super().__init__(tools=[self._load_skill_tool(), self._read_skill_file_tool()]) +``` + +- `_scan()` walks `skills_dir/*/SKILL.md`, parses YAML frontmatter (pyyaml — already a + dep), validates `name`/`description`, checks name uniqueness. Bodies are NOT read here. +- `system_prompt_contribution()` renders the Level-1 catalog + behavioral rules (below). +- `warm_up()` revalidates (idempotent). +- `to_dict()`/`from_dict()` serialize `skills_dir` only and rescan on load (mirrors + `SearchableToolset`). `add`/`__add__` of new ad-hoc tools left as default Toolset behavior. + +### Tool: `load_skill` (Level 2) + +```python +def load_skill(name: Annotated[str, "Exact skill name from the Available Skills list."]) -> str: + """Load a skill's full instructions. Call this before doing a task the skill covers.""" +``` + +Returns the `SKILL.md` body plus a manifest of bundled files (so the model knows what +`read_skill_file` can fetch). Unknown name → friendly error listing available skills. + +### Tool: `read_skill_file` (Level 3) + +```python +def read_skill_file( + name: Annotated[str, "Skill that owns the file."], + path: Annotated[str, "Path relative to the skill directory, e.g. 'reference/forms.md'."], +) -> str: + """Read a file bundled with a skill (reference docs, examples, templates).""" +``` + +Path-traversal guard: `(skill_dir / path).resolve()` must stay within `skill_dir.resolve()`, +else error. Missing file → friendly error. + +### System-prompt contribution text + +``` +## Available Skills +Specialized instruction sets for specific task types. Load one before doing matching work. + +- **pdf-forms**: Use when filling PDF forms or extracting fields from PDFs. +- **excel-report**: Use when creating or editing .xlsx spreadsheets. + +When a task matches a skill, call `load_skill` with its name BEFORE starting, then follow +the loaded instructions exactly (they override your general approach). Load skills only +when relevant; if a skill references a file, fetch it with `read_skill_file`. If no skill +matches, proceed normally. +``` + +## Files touched + +- `haystack/tools/tool.py` — add `system_prompt` field + `system_prompt_contribution()`; serde. +- `haystack/tools/toolset.py` — add `system_prompt_contribution()` (default None). +- `haystack/components/agents/agent.py` — collect + merge contributions in + `_initialize_fresh_execution` (+ two free helpers). +- `haystack/tools/skills/__init__.py`, `haystack/tools/skills/skill_toolset.py` — new. +- `haystack/tools/__init__.py` — export `SkillToolset`. +- Tests: `test/tools/skills/`, plus `system_prompt_contribution` cases in tool/toolset/agent tests. +- Reno release note. + +## Out of scope (future) + +- Script execution (`run_skill_script`) with confirmation-strategy gating. +- Search-based discovery (`discovery="search"`) for very large skill libraries. +- In-memory / non-filesystem skill sources. +- Optional `active_skills` observability via `outputs_to_state`. diff --git a/haystack/components/agents/agent.py b/haystack/components/agents/agent.py index f792e8251b..974580164d 100644 --- a/haystack/components/agents/agent.py +++ b/haystack/components/agents/agent.py @@ -39,6 +39,7 @@ serialize_tools_or_toolset, warm_up_tools, ) +from haystack.tools.toolset import _ToolsetWrapper from haystack.utils.callable_serialization import deserialize_callable, serialize_callable from haystack.utils.deserialization import deserialize_component_inplace @@ -193,6 +194,63 @@ def _render_prompt_messages( return prompt_messages +def _gather_system_prompt_contributions(item: Any, out: list[str]) -> None: + """ + Recursively collect system prompt contributions from a tool, toolset, or collection thereof. + + A `Toolset` that provides its own contribution takes precedence over its member tools, which are then + not contributed separately (mirroring how an MCP server's `instructions` describe its tools as a whole). + Composed toolsets (created via `toolset_a + toolset_b`) contribute each child toolset independently. + + :param item: A `Tool`, `Toolset`, or list/tuple of them. + :param out: List that collected, non-empty contributions are appended to, in order. + """ + if item is None: + return + if isinstance(item, _ToolsetWrapper): + for toolset in item.toolsets: + _gather_system_prompt_contributions(toolset, out) + elif isinstance(item, Toolset): + contribution = item.system_prompt_contribution() + if contribution: + out.append(contribution) + else: + for member_tool in item: + _gather_system_prompt_contributions(member_tool, out) + elif isinstance(item, Tool): + contribution = item.system_prompt_contribution() + if contribution: + out.append(contribution) + elif isinstance(item, (list, tuple)): + for sub_item in item: + _gather_system_prompt_contributions(sub_item, out) + + +def _apply_system_prompt_contributions(messages: list[ChatMessage], tools: ToolsType | None) -> list[ChatMessage]: + """ + Append tool/toolset system prompt contributions to the agent's system message. + + If the first message is a system message (the rendered `system_prompt`, or a user-supplied system message), + the contributions are appended to it. Otherwise a new system message is prepended. Contributions are merged + into the already-rendered message text (never the Jinja2 template) so arbitrary content is safe. + + :param messages: The messages the agent is about to run with. + :param tools: The tools selected for this run. + :returns: The messages with contributions applied (a new list; the input is not mutated). + """ + contributions: list[str] = [] + _gather_system_prompt_contributions(tools, contributions) + if not contributions: + return messages + + extra = "\n\n".join(contributions) + if messages and messages[0].is_from(ChatRole.SYSTEM): + existing = messages[0].text or "" + combined = f"{existing}\n\n{extra}" if existing else extra + return [ChatMessage.from_system(combined), *messages[1:]] + return [ChatMessage.from_system(extra), *messages] + + @dataclass(kw_only=True) class _ExecutionContext: """ @@ -653,6 +711,10 @@ def _initialize_fresh_execution( selected_tools = self._select_tools(tools) + # Append any system prompt instructions contributed by the selected tools/toolsets (e.g. a SkillToolset + # catalog) to the system message. Based on selected_tools so it respects per-run tool filtering. + messages = _apply_system_prompt_contributions(messages, selected_tools) + state_kwargs: dict[str, Any] = {key: kwargs[key] for key in self.state_schema.keys() if key in kwargs} state = State(schema=self.state_schema, data=state_kwargs) state.set("messages", messages) diff --git a/haystack/tools/__init__.py b/haystack/tools/__init__.py index 2fa7c35cf2..827c484a99 100644 --- a/haystack/tools/__init__.py +++ b/haystack/tools/__init__.py @@ -11,6 +11,7 @@ from haystack.tools.tool import Tool, _check_duplicate_tool_names from haystack.tools.toolset import Toolset from haystack.tools.searchable_toolset import SearchableToolset +from haystack.tools.skills import SkillMeta, SkillToolset from haystack.tools.component_tool import ComponentTool from haystack.tools.pipeline_tool import PipelineTool from haystack.tools.serde_utils import deserialize_tools_or_toolset_inplace, serialize_tools_or_toolset @@ -32,6 +33,8 @@ "serialize_tools_or_toolset", "Tool", "SearchableToolset", + "SkillMeta", + "SkillToolset", "ToolsType", "Toolset", "tool", diff --git a/haystack/tools/tool.py b/haystack/tools/tool.py index bb02b57c40..db9df347b1 100644 --- a/haystack/tools/tool.py +++ b/haystack/tools/tool.py @@ -86,6 +86,11 @@ class Tool: "documents": {"handler": custom_handler} } ``` + :param system_prompt: + Optional system prompt instructions associated with this Tool. When the Tool is used with an `Agent`, + this text is appended to the Agent's system prompt (see `system_prompt_contribution`). Use it to tell + the model how and when to use the Tool. Note that an enclosing `Toolset` that provides its own + `system_prompt_contribution` takes precedence and suppresses the contributions of its member tools. :raises ValueError: If `function` is async, if `parameters` is not a valid JSON schema, or if the `outputs_to_state`, `outputs_to_string`, or `inputs_from_state` configurations are invalid. :raises TypeError: If any configuration value in `outputs_to_state`, `outputs_to_string`, or @@ -99,6 +104,7 @@ class Tool: outputs_to_string: dict[str, Any] | None = None inputs_from_state: dict[str, str] | None = None outputs_to_state: dict[str, dict[str, Any]] | None = None + system_prompt: str | None = None def __post_init__(self) -> None: # noqa: C901, PLR0912 # Check that the function is not a coroutine (async function) @@ -258,6 +264,18 @@ def warm_up(self) -> None: """ pass + def system_prompt_contribution(self) -> str | None: + """ + Return optional system prompt instructions for this Tool. + + When the Tool is used with an `Agent`, the returned text is appended to the Agent's system prompt. + By default this returns the `system_prompt` attribute (which may be `None`). Subclasses can override + this to generate instructions dynamically. + + :returns: The system prompt contribution, or `None` if the Tool has nothing to contribute. + """ + return getattr(self, "system_prompt", None) + def invoke(self, **kwargs: Any) -> Any: """ Invoke the Tool with the provided keyword arguments. diff --git a/haystack/tools/toolset.py b/haystack/tools/toolset.py index 0941714f7f..1d7beed1a3 100644 --- a/haystack/tools/toolset.py +++ b/haystack/tools/toolset.py @@ -188,6 +188,22 @@ def __contains__(self, item: str | Tool) -> bool: return item in self.tools return False + def system_prompt_contribution(self) -> str | None: + """ + Return optional system prompt instructions for this Toolset. + + When the Toolset is used with an `Agent`, the returned text is appended to the Agent's system prompt. + This is the Toolset-level analogue of an MCP server's `instructions`: it describes how to use the + Toolset's tools as a whole. The default implementation returns `None`; subclasses (for example + `SkillToolset`) override it to generate instructions dynamically. + + When this returns a non-`None` value, it takes precedence over the `system_prompt` of the Toolset's + individual member tools, which are then not contributed separately. + + :returns: The system prompt contribution, or `None` if the Toolset has nothing to contribute. + """ + return None + def warm_up(self) -> None: """ Prepare the Toolset for use. diff --git a/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml b/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml new file mode 100644 index 0000000000..7b1e8a57a8 --- /dev/null +++ b/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml @@ -0,0 +1,27 @@ +--- +features: + - | + Added ``SkillToolset``, a ``Toolset`` that lets an ``Agent`` discover and read filesystem "skills" + through progressive disclosure, similar to how Claude Code and Codex expose skills. A skill is a + directory containing a ``SKILL.md`` file with YAML frontmatter (``name`` and ``description``) and a + markdown body of instructions, optionally bundling additional reference files. Point the toolset at a + skills directory and add it to an ``Agent``: + + .. code-block:: python + + from haystack.components.agents import Agent + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.tools import SkillToolset + + agent = Agent(chat_generator=OpenAIChatGenerator(), tools=SkillToolset("skills/")) + + The names and descriptions of all discovered skills are appended to the Agent's system prompt + automatically. The toolset exposes two tools: ``load_skill`` returns a skill's full instructions on + demand, and ``read_skill_file`` reads a file bundled with a skill (with path-traversal protection). + - | + ``Tool`` and ``Toolset`` now support a ``system_prompt_contribution()`` method. When a tool or toolset + is used with an ``Agent``, the text it returns is appended to the Agent's system prompt. ``Tool`` gains + an optional ``system_prompt`` field for this; ``Toolset`` subclasses can override the method to generate + instructions dynamically. When a ``Toolset`` provides a contribution, it takes precedence over the + ``system_prompt`` of its individual member tools, mirroring how an MCP server's ``instructions`` describe + its tools as a whole. diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py index 496f16e533..5e44f6e0c6 100644 --- a/test/components/agents/test_agent.py +++ b/test/components/agents/test_agent.py @@ -263,6 +263,7 @@ def test_to_dict(self, weather_tool, component_tool, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, }, { @@ -346,6 +347,7 @@ def test_to_dict_with_toolset(self, monkeypatch, weather_tool): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] @@ -416,6 +418,7 @@ def test_from_dict(self, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, }, { @@ -506,6 +509,7 @@ def test_from_dict_with_toolset(self, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] @@ -1268,7 +1272,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1283,7 +1287,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501 @@ -1376,7 +1380,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1389,7 +1393,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501 @@ -2220,3 +2224,84 @@ def run(self) -> dict: assert "agent" not in result chat_generator.run.assert_not_called() + + +@component +class CapturingChatGenerator: + """Records the messages it last received so tests can inspect the system prompt sent to the model.""" + + def __init__(self): + self.last_messages = None + + @component.output_types(replies=list[ChatMessage]) + def run(self, messages: list[ChatMessage], tools=None, **kwargs) -> dict[str, Any]: + self.last_messages = messages + return {"replies": [ChatMessage.from_assistant("done")]} + + +class _ContributingToolset(Toolset): + def __init__(self, tools, contribution): + super().__init__(tools) + self._contribution = contribution + + def system_prompt_contribution(self): + return self._contribution + + +class TestSystemPromptContributions: + def _tool(self, name, system_prompt=None): + return Tool( + name=name, + description="d", + parameters={"type": "object", "properties": {}}, + function=lambda: None, + system_prompt=system_prompt, + ) + + def test_toolset_contribution_appended_to_system_prompt(self): + generator = CapturingChatGenerator() + toolset = _ContributingToolset([self._tool("a")], "TOOLSET INSTRUCTIONS") + agent = Agent(chat_generator=generator, tools=toolset, system_prompt="You are helpful.") + agent.warm_up() + agent.run(messages=[ChatMessage.from_user("hi")]) + + system_message = generator.last_messages[0] + assert system_message.is_from(ChatRole.SYSTEM) + assert system_message.text == "You are helpful.\n\nTOOLSET INSTRUCTIONS" + + def test_contribution_prepended_when_no_system_prompt(self): + generator = CapturingChatGenerator() + toolset = _ContributingToolset([self._tool("a")], "TOOLSET INSTRUCTIONS") + agent = Agent(chat_generator=generator, tools=toolset) + agent.warm_up() + agent.run(messages=[ChatMessage.from_user("hi")]) + + assert generator.last_messages[0].is_from(ChatRole.SYSTEM) + assert generator.last_messages[0].text == "TOOLSET INSTRUCTIONS" + assert generator.last_messages[1].is_from(ChatRole.USER) + + def test_toolset_contribution_suppresses_member_tool_contributions(self): + generator = CapturingChatGenerator() + toolset = _ContributingToolset([self._tool("a", system_prompt="MEMBER")], "TOOLSET") + agent = Agent(chat_generator=generator, tools=toolset) + agent.warm_up() + agent.run(messages=[ChatMessage.from_user("hi")]) + + assert generator.last_messages[0].text == "TOOLSET" + + def test_bare_tool_system_prompt_is_contributed(self): + generator = CapturingChatGenerator() + agent = Agent(chat_generator=generator, tools=[self._tool("a", system_prompt="USE TOOL A")]) + agent.warm_up() + agent.run(messages=[ChatMessage.from_user("hi")]) + + assert generator.last_messages[0].text == "USE TOOL A" + + def test_no_contribution_leaves_messages_unchanged(self): + generator = CapturingChatGenerator() + agent = Agent(chat_generator=generator, tools=[self._tool("a")]) + agent.warm_up() + agent.run(messages=[ChatMessage.from_user("hi")]) + + assert len(generator.last_messages) == 1 + assert generator.last_messages[0].is_from(ChatRole.USER) diff --git a/test/components/agents/test_agent_hitl.py b/test/components/agents/test_agent_hitl.py index 1944aa0b33..b68e560176 100644 --- a/test/components/agents/test_agent_hitl.py +++ b/test/components/agents/test_agent_hitl.py @@ -93,6 +93,7 @@ def test_to_dict(self, tools, confirmation_strategies, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ], diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 93f5daf11c..31e15bf81a 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -429,6 +429,7 @@ def test_to_dict_with_toolset(self, tools, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] diff --git a/test/components/generators/chat/test_azure_responses.py b/test/components/generators/chat/test_azure_responses.py index 96e5016913..55f12c050c 100644 --- a/test/components/generators/chat/test_azure_responses.py +++ b/test/components/generators/chat/test_azure_responses.py @@ -249,6 +249,7 @@ def test_to_dict_with_toolset(self, tools, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 1bb91f8e3e..2dd9233a7b 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -278,6 +278,7 @@ def test_to_dict(self, mock_check_valid_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, + "system_prompt": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, @@ -342,6 +343,7 @@ def test_serde_in_pipeline(self, mock_check_valid_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, + "system_prompt": None, "outputs_to_string": None, "description": "description", "parameters": {"x": {"type": "string"}}, @@ -1215,6 +1217,7 @@ def test_to_dict_with_toolset(self, mock_check_valid_model, tools): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] diff --git a/test/components/generators/chat/test_hugging_face_local.py b/test/components/generators/chat/test_hugging_face_local.py index 06069999b5..dbf1e32d21 100644 --- a/test/components/generators/chat/test_hugging_face_local.py +++ b/test/components/generators/chat/test_hugging_face_local.py @@ -206,6 +206,7 @@ def test_to_dict(self, model_info_mock, tools): "inputs_from_state": None, "name": "weather", "outputs_to_state": None, + "system_prompt": None, "outputs_to_string": None, "description": "useful to determine the weather in a given location", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}, @@ -786,6 +787,7 @@ def test_to_dict_with_toolset(self, model_info_mock, mock_pipeline_with_tokenize "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, + "system_prompt": None, }, } ] diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 9a34f2ca46..b27f4e0da8 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -346,6 +346,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, + "system_prompt": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, diff --git a/test/components/generators/chat/test_openai_responses.py b/test/components/generators/chat/test_openai_responses.py index e9a2ec2b9a..79857c5794 100644 --- a/test/components/generators/chat/test_openai_responses.py +++ b/test/components/generators/chat/test_openai_responses.py @@ -260,6 +260,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, + "system_prompt": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, diff --git a/test/tools/test_tool.py b/test/tools/test_tool.py index 3a8502aff1..a0554759d8 100644 --- a/test/tools/test_tool.py +++ b/test/tools/test_tool.py @@ -161,6 +161,7 @@ def test_to_dict(self): "outputs_to_string": {"handler": "test_tool.format_string"}, "inputs_from_state": {"location": "city"}, "outputs_to_state": {"documents": {"source": "docs", "handler": "test_tool.get_weather_report"}}, + "system_prompt": None, }, } @@ -188,6 +189,23 @@ def test_from_dict(self): assert tool.inputs_from_state == {"location": "city"} assert tool.outputs_to_state == {"documents": {"source": "docs", "handler": get_weather_report}} + def test_system_prompt_contribution(self): + tool = Tool( + name="weather", + description="Get weather report", + parameters=parameters, + function=get_weather_report, + system_prompt="Always call weather before answering about the weather.", + ) + assert tool.system_prompt_contribution() == "Always call weather before answering about the weather." + + def test_system_prompt_contribution_defaults_to_none(self): + tool = Tool( + name="weather", description="Get weather report", parameters=parameters, function=get_weather_report + ) + assert tool.system_prompt is None + assert tool.system_prompt_contribution() is None + def test_serialize_outputs_to_string(self): config = {"handler": format_string, "source": "result", "raw_result": False} serialized = _serialize_outputs_to_string(config) diff --git a/test/tools/test_toolset.py b/test/tools/test_toolset.py index 214bceceb3..336d0a2fc0 100644 --- a/test/tools/test_toolset.py +++ b/test/tools/test_toolset.py @@ -157,6 +157,11 @@ def faulty_tool_func(location): class TestToolset: + def test_system_prompt_contribution_defaults_to_none(self): + """A plain Toolset contributes nothing to the system prompt by default.""" + toolset = Toolset([]) + assert toolset.system_prompt_contribution() is None + def test_toolset_with_multiple_tools(self): """Test that a Toolset with multiple tools works properly.""" add_tool = Tool( From b181681dbee75aa89e54406b2042350fdfcdd1ae Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 29 May 2026 13:16:32 +0200 Subject: [PATCH 2/8] clean up and make the plan file a bit more concise --- agent_skills_plan.md | 76 ++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 53 deletions(-) diff --git a/agent_skills_plan.md b/agent_skills_plan.md index 27af34ce7f..7d7abaf852 100644 --- a/agent_skills_plan.md +++ b/agent_skills_plan.md @@ -29,30 +29,25 @@ Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on de - **Two tools**, both pure/stateless: `load_skill`, `read_skill_file`. - **No script execution** in v1 (no Level-3 execution tool). -- **No search-based discovery** in v1 — Level-1 metadata is injected into the system prompt. -- **No `state_schema` additions.** Progressive disclosure works through message history: - a `load_skill` result is a tool-result message that persists in `state["messages"]`, so - the agent already "remembers" what it loaded. No `inputs_from_state` / `outputs_to_state`. - (`tool_call_counts`, already tracked internally, covers observability of how often - `load_skill` fired.) - **Filesystem source only.** -- **Location:** `haystack/tools/skills/`. -- **System-prompt injection: Option C** — a generic contribution hook on `Tool`/`Toolset`, - consumed automatically by the `Agent`. Chosen over a manual concatenation helper - (Option A) because A complicates Agent serialization and can't compose multiple augments. +- **System-prompt injection:** a generic contribution hook on `Tool`/`Toolset`, + consumed automatically by the `Agent`. + - An alternative to this approach is to add another tool that lists available skills and their descriptions and + force it to be called using the soon to be added condition param. ## How MCP does the equivalent (reference) -MCP servers return an optional top-level `instructions` string in their `initialize` -response. The spec frames it as a hint clients MAY add to the system prompt to explain -the server's tools as a whole. It is **server-level, not per-tool** — individual tools -only carry a `description`. So the container supplies the system-prompt instructions; the -tools just describe themselves. This validates the "top-level wins, skip member tools" rule. +MCP servers return an optional top-level `instructions` string in their `initialize` response. +The spec frames it as a hint clients MAY add to the system prompt to explain the server's tools as a whole. +Advantage of adding the **System-prompt injection** feature is that we could extend MCPToolset to inject these +top-level instructions. -## Component 1 — `system_prompt_contribution()` hook (generic, reusable) +## Part 1 — `system_prompt_contribution()` hook (generic, reusable) Add an optional method to both base classes, default `None`: +TODO: Add support for ComponentTool + PipelineTool + ```python # haystack/tools/tool.py (on Tool) # haystack/tools/toolset.py (on Toolset) @@ -61,17 +56,14 @@ def system_prompt_contribution(self) -> str | None: return None ``` -- `Tool` also gains an optional `system_prompt: str | None = None` dataclass field so a - plain `Tool` can carry instructions without subclassing; `system_prompt_contribution()` - returns it by default (subclasses may override for dynamism). The field round-trips through - `to_dict`/`from_dict` and is always present in the serialized dict (existing tool - serialization snapshots were updated to include `system_prompt: null`). +- `Tool` also gains an optional `system_prompt: str | None = None` dataclass field so a plain `Tool` can carry + instructions without subclassing. `system_prompt_contribution()` returns it by default (subclasses may override for + dynamism). - `Toolset` subclasses (like `SkillToolset`) override the method. -### Agent consumption — `_initialize_fresh_execution` (`agent.py:~654`) +### Agent consumption — `_initialize_fresh_execution` -Right after `selected_tools = self._select_tools(tools)`, collect contributions and merge -them into the system message: +Right after `selected_tools = self._select_tools(tools)`, collect contributions and merge them into the system message: ```python selected_tools = self._select_tools(tools) @@ -80,10 +72,9 @@ if contributions: messages = _merge_system_prompt_contributions(messages, contributions) ``` -Collection rules (a free function, unit-testable): +Collection rules: -- Recurse through `list`, `_ToolsetWrapper` (descend into `.toolsets` — this is the - `toolset_a + toolset_b` compose path), `Toolset`, `Tool`. +- Go through `list`, `_ToolsetWrapper` (descend into `.toolsets`), `Toolset`, `Tool`. - **Top-level wins:** if a `Toolset` returns a contribution, use it and DO NOT descend into its member tools. Only if it returns `None` do we gather member tools' contributions. - Bare `Tool` passed directly contributes its own. @@ -91,21 +82,13 @@ Collection rules (a free function, unit-testable): Merge rules: - Join contributions with `\n\n`. -- If `messages[0]` is a system message (the rendered `system_prompt`, or a user-supplied - system message), append the contribution text to it. +- If `messages[0]` is a system message (the rendered `system_prompt`, or a user-supplied system message), append + the contribution text to it. - Otherwise prepend a new system message built from the contributions. -- Collect from `selected_tools` (respects per-run `run(tools=...)` filtering) and inject - **after** Jinja rendering — never into the template string — so skill text containing - `{{`/`{%` can't break `ChatPromptBuilder`. - -### Why this keeps Agent serialization clean - -Nothing about the augmentation is stored on the `Agent`. The system-prompt text is -regenerated from the toolset on every run, so `Agent.to_dict()` only serializes the -toolset (which serializes its `skills_dir`). Multiple augmenting toolsets compose via -`_ToolsetWrapper` and each contributes independently. +- Collect from `selected_tools` and inject **after** Jinja rendering — never into the template string — so skill text + containing `{{`/`{%` can't break `ChatPromptBuilder`. -## Component 2 — `SkillToolset(Toolset)` +## Part 2 — `SkillToolset(Toolset)` `haystack/tools/skills/skill_toolset.py` @@ -168,20 +151,7 @@ when relevant; if a skill references a file, fetch it with `read_skill_file`. If matches, proceed normally. ``` -## Files touched - -- `haystack/tools/tool.py` — add `system_prompt` field + `system_prompt_contribution()`; serde. -- `haystack/tools/toolset.py` — add `system_prompt_contribution()` (default None). -- `haystack/components/agents/agent.py` — collect + merge contributions in - `_initialize_fresh_execution` (+ two free helpers). -- `haystack/tools/skills/__init__.py`, `haystack/tools/skills/skill_toolset.py` — new. -- `haystack/tools/__init__.py` — export `SkillToolset`. -- Tests: `test/tools/skills/`, plus `system_prompt_contribution` cases in tool/toolset/agent tests. -- Reno release note. - ## Out of scope (future) - Script execution (`run_skill_script`) with confirmation-strategy gating. - Search-based discovery (`discovery="search"`) for very large skill libraries. -- In-memory / non-filesystem skill sources. -- Optional `active_skills` observability via `outputs_to_state`. From 1e4a5e0fcb8d2b0a5b93dcd3bf6e42cea343ddf6 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 29 May 2026 13:20:38 +0200 Subject: [PATCH 3/8] more cleanup --- agent_skills_plan.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/agent_skills_plan.md b/agent_skills_plan.md index 7d7abaf852..e87fafa847 100644 --- a/agent_skills_plan.md +++ b/agent_skills_plan.md @@ -1,10 +1,10 @@ # Plan: Skills support for the Haystack Agent -Goal: let the Haystack `Agent` discover and read filesystem **Skills** the way Claude -Code / Codex do — progressive disclosure of expert instructions — using a small set of -pre-built tools plus a generic mechanism for tools to contribute system-prompt text. +Goal: let the Haystack `Agent` discover and read filesystem **Skills** the way Claude Code / Codex do — progressive +disclosure of expert instructions — using a small set of pre-built tools plus a generic mechanism for tools to +contribute system-prompt text. -## Background — what we're replicating +## Background Claude Code / Codex Skills use **progressive disclosure**: @@ -39,14 +39,15 @@ Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on de MCP servers return an optional top-level `instructions` string in their `initialize` response. The spec frames it as a hint clients MAY add to the system prompt to explain the server's tools as a whole. -Advantage of adding the **System-prompt injection** feature is that we could extend MCPToolset to inject these + +**NOTE:** Advantage of adding the **System-prompt injection** feature is that we could extend MCPToolset to inject these top-level instructions. ## Part 1 — `system_prompt_contribution()` hook (generic, reusable) Add an optional method to both base classes, default `None`: -TODO: Add support for ComponentTool + PipelineTool +**TODO:** Add support for ComponentTool + PipelineTool ```python # haystack/tools/tool.py (on Tool) @@ -96,8 +97,8 @@ Merge rules: @dataclass class SkillMeta: name: str # from frontmatter; falls back to directory name - description: str # from frontmatter - path: Path # the skill directory + description: str # from frontmatter + path: Path # the skill directory class SkillToolset(Toolset): def __init__(self, skills_dir: str | Path) -> None: @@ -106,12 +107,11 @@ class SkillToolset(Toolset): super().__init__(tools=[self._load_skill_tool(), self._read_skill_file_tool()]) ``` -- `_scan()` walks `skills_dir/*/SKILL.md`, parses YAML frontmatter (pyyaml — already a - dep), validates `name`/`description`, checks name uniqueness. Bodies are NOT read here. +- `_scan()` walks `skills_dir/*/SKILL.md`, parses YAML frontmatter, validates `name`/`description`, checks name + uniqueness. Bodies are NOT read here. - `system_prompt_contribution()` renders the Level-1 catalog + behavioral rules (below). -- `warm_up()` revalidates (idempotent). -- `to_dict()`/`from_dict()` serialize `skills_dir` only and rescan on load (mirrors - `SearchableToolset`). `add`/`__add__` of new ad-hoc tools left as default Toolset behavior. +- `warm_up()` revalidates. +- `to_dict()`/`from_dict()` serialize `skills_dir` only and rescan on load. ### Tool: `load_skill` (Level 2) @@ -120,8 +120,8 @@ def load_skill(name: Annotated[str, "Exact skill name from the Available Skills """Load a skill's full instructions. Call this before doing a task the skill covers.""" ``` -Returns the `SKILL.md` body plus a manifest of bundled files (so the model knows what -`read_skill_file` can fetch). Unknown name → friendly error listing available skills. +Returns the `SKILL.md` body plus a manifest of bundled files (so the model knows what `read_skill_file` can fetch). +Unknown name → friendly error listing available skills. ### Tool: `read_skill_file` (Level 3) @@ -133,8 +133,8 @@ def read_skill_file( """Read a file bundled with a skill (reference docs, examples, templates).""" ``` -Path-traversal guard: `(skill_dir / path).resolve()` must stay within `skill_dir.resolve()`, -else error. Missing file → friendly error. +Path-traversal guard: `(skill_dir / path).resolve()` must stay within `skill_dir.resolve()`, else error. +Missing file → friendly error. ### System-prompt contribution text From b1bdbb9fb7c03184583b061f9e8cda397918ffd8 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 29 May 2026 13:23:32 +0200 Subject: [PATCH 4/8] add some references --- agent_skills_plan.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/agent_skills_plan.md b/agent_skills_plan.md index e87fafa847..8ead19d594 100644 --- a/agent_skills_plan.md +++ b/agent_skills_plan.md @@ -25,6 +25,13 @@ skills/ Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on demand. +References: +- Anthropic, "Equipping agents for the real world with Agent Skills" — the three-stage progressive + disclosure model (discovery → activation → execution): + https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills +- Anthropic Agent Skills docs (overview + `SKILL.md` structure): + https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview + ## Decisions (locked) - **Two tools**, both pure/stateless: `load_skill`, `read_skill_file`. @@ -40,6 +47,13 @@ Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on de MCP servers return an optional top-level `instructions` string in their `initialize` response. The spec frames it as a hint clients MAY add to the system prompt to explain the server's tools as a whole. +References: +- MCP lifecycle / `initialize` (the `InitializeResult.instructions` field): + https://modelcontextprotocol.io/specification/2025-06-18/basic/lifecycle +- MCP schema (`instructions` docstring: "can be used by clients to improve the LLM's understanding of + available tools ... MAY be added to the system prompt"): + https://github.com/modelcontextprotocol/modelcontextprotocol/blob/main/schema/2025-06-18/schema.ts + **NOTE:** Advantage of adding the **System-prompt injection** feature is that we could extend MCPToolset to inject these top-level instructions. From 593b184ca73ec2b25fc12b77cca1c778e86203e4 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 1 Jun 2026 13:28:27 +0200 Subject: [PATCH 5/8] Add mising skill tool files --- haystack/tools/skills/__init__.py | 7 + haystack/tools/skills/skill_toolset.py | 230 ++++++++++++++++++++++++ test/tools/skills/test_skill_toolset.py | 119 ++++++++++++ 3 files changed, 356 insertions(+) create mode 100644 haystack/tools/skills/__init__.py create mode 100644 haystack/tools/skills/skill_toolset.py create mode 100644 test/tools/skills/test_skill_toolset.py diff --git a/haystack/tools/skills/__init__.py b/haystack/tools/skills/__init__.py new file mode 100644 index 0000000000..a63816d545 --- /dev/null +++ b/haystack/tools/skills/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.tools.skills.skill_toolset import SkillMeta, SkillToolset + +__all__ = ["SkillMeta", "SkillToolset"] diff --git a/haystack/tools/skills/skill_toolset.py b/haystack/tools/skills/skill_toolset.py new file mode 100644 index 0000000000..cdf3699cde --- /dev/null +++ b/haystack/tools/skills/skill_toolset.py @@ -0,0 +1,230 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass +from pathlib import Path +from typing import Annotated, Any + +import yaml + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools.from_function import create_tool_from_function +from haystack.tools.tool import Tool +from haystack.tools.toolset import Toolset + +SKILL_FILE_NAME = "SKILL.md" + + +@dataclass +class SkillMeta: + """ + Metadata describing a single skill discovered on disk. + + :param name: The skill's name, used by the agent to load it. + :param description: A short description of when to use the skill. Shown to the agent up front. + :param path: The skill's directory. + """ + + name: str + description: str + path: Path + + +def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]: + """ + Split a `SKILL.md` file into its YAML frontmatter and markdown body. + + The frontmatter is the YAML block delimited by leading and trailing `---` lines. If no frontmatter is + present, an empty mapping and the original text are returned. + + :param text: The full contents of a `SKILL.md` file. + :returns: A tuple of (frontmatter mapping, body). + :raises ValueError: If the frontmatter is present but is not a valid YAML mapping. + """ + stripped = text.lstrip() + if not stripped.startswith("---"): + return {}, text + + # Drop the leading '---' line, then split on the closing '---'. + after_open = stripped[len("---") :].lstrip("\n") + parts = after_open.split("\n---", 1) + if len(parts) != 2: + return {}, text + + frontmatter_block, body = parts + loaded = yaml.safe_load(frontmatter_block) or {} + if not isinstance(loaded, dict): + raise ValueError("Skill frontmatter must be a YAML mapping.") # noqa: TRY004 + return loaded, body.lstrip("\n") + + +class SkillToolset(Toolset): + """ + A Toolset that lets an Agent discover and read filesystem "skills" via progressive disclosure. + + A skill is a directory containing a `SKILL.md` file with YAML frontmatter (`name` and `description`) and a + markdown body of instructions. Skills may bundle additional files (reference docs, examples, templates). + This mirrors how Claude Code and Codex expose skills: + + - The name and description of every skill are injected into the Agent's system prompt + (via `system_prompt_contribution`) so the model knows which skills exist. + - `load_skill` returns a skill's full instructions on demand, plus a manifest of its bundled files. + - `read_skill_file` reads a bundled file on demand. + + Expected layout: + + ``` + skills/ + pdf-forms/ + SKILL.md # frontmatter (name, description) + markdown instructions + reference/forms.md + ``` + + ### Usage example + + ```python + from haystack.components.agents import Agent + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + from haystack.tools import SkillToolset + + skills = SkillToolset("skills/") + agent = Agent(chat_generator=OpenAIChatGenerator(), tools=skills) + # The skills catalog is appended to the system prompt automatically. + result = agent.run(messages=[ChatMessage.from_user("Fill in this PDF form for me.")]) + ``` + """ + + def __init__(self, skills_dir: str | Path) -> None: + """ + Initialize the SkillToolset by scanning a directory for skills. + + Only the frontmatter of each `SKILL.md` is read at construction time (cheap); bodies and bundled files + are read lazily when the agent calls `load_skill` / `read_skill_file`. + + :param skills_dir: Directory containing one subdirectory per skill, each with a `SKILL.md`. + :raises ValueError: If `skills_dir` does not exist, is not a directory, a skill is missing a required + frontmatter field, or two skills share the same name. + """ + self.skills_dir = Path(skills_dir) + self._skills: dict[str, SkillMeta] = self._scan() + super().__init__(tools=[self._create_load_skill_tool(), self._create_read_skill_file_tool()]) + + @property + def skills(self) -> dict[str, SkillMeta]: + """Mapping of skill name to its metadata.""" + return self._skills + + def _scan(self) -> dict[str, SkillMeta]: + """ + Scan `skills_dir` for skills, reading only the frontmatter of each `SKILL.md`. + + :returns: Mapping of skill name to metadata. + :raises ValueError: On a missing directory, missing required frontmatter, or duplicate skill names. + """ + if not self.skills_dir.is_dir(): + raise ValueError(f"Skills directory '{self.skills_dir}' does not exist or is not a directory.") + + skills: dict[str, SkillMeta] = {} + for skill_file in sorted(self.skills_dir.glob(f"*/{SKILL_FILE_NAME}")): + skill_dir = skill_file.parent + frontmatter, _ = _parse_frontmatter(skill_file.read_text(encoding="utf-8")) + + name = frontmatter.get("name", skill_dir.name) + description = frontmatter.get("description") + if not description: + raise ValueError(f"Skill '{name}' ({skill_file}) is missing a 'description' in its frontmatter.") + if name in skills: + raise ValueError(f"Duplicate skill name '{name}' found in '{self.skills_dir}'.") + + skills[name] = SkillMeta(name=name, description=description, path=skill_dir) + return skills + + def system_prompt_contribution(self) -> str | None: + """ + Render the skills catalog and usage instructions for injection into the Agent's system prompt. + + :returns: The catalog text, or `None` if no skills were found. + """ + if not self._skills: + return None + + lines = [ + "## Available Skills", + "Specialized instruction sets for specific task types. Load one before doing matching work.", + "", + ] + lines += [f"- **{meta.name}**: {meta.description}" for meta in self._skills.values()] + lines += [ + "", + "When a task matches a skill, call `load_skill` with its name BEFORE starting, then follow the loaded " + "instructions exactly (they override your general approach). Load skills only when relevant; if a skill " + "references a file, fetch it with `read_skill_file`. If no skill matches, proceed normally.", + ] + return "\n".join(lines) + + def _create_load_skill_tool(self) -> Tool: + """Create the `load_skill` tool, closed over this toolset's skill registry.""" + + def load_skill(name: Annotated[str, "Exact name of the skill to load, from the Available Skills list."]) -> str: + """Load a skill's full instructions. Call this before doing a task the skill covers.""" + meta = self._skills.get(name) + if meta is None: + available = ", ".join(self._skills) or "none" + return f"Unknown skill '{name}'. Available skills: {available}." + + _, body = _parse_frontmatter((meta.path / SKILL_FILE_NAME).read_text(encoding="utf-8")) + + bundled = sorted( + str(p.relative_to(meta.path)) for p in meta.path.rglob("*") if p.is_file() and p.name != SKILL_FILE_NAME + ) + if bundled: + manifest = "\n".join(f"- {path}" for path in bundled) + body = f"{body}\n\n---\nBundled files (read with `read_skill_file`):\n{manifest}" + return body + + return create_tool_from_function(function=load_skill, name="load_skill") + + def _create_read_skill_file_tool(self) -> Tool: + """Create the `read_skill_file` tool, closed over this toolset's skill registry.""" + + def read_skill_file( + name: Annotated[str, "Name of the skill that owns the file."], + path: Annotated[str, "Path of the file relative to the skill directory, e.g. 'reference/forms.md'."], + ) -> str: + """Read a file bundled with a skill (reference docs, examples, templates).""" + meta = self._skills.get(name) + if meta is None: + available = ", ".join(self._skills) or "none" + return f"Unknown skill '{name}'. Available skills: {available}." + + skill_dir = meta.path.resolve() + target = (skill_dir / path).resolve() + if skill_dir != target and skill_dir not in target.parents: + return f"Refusing to read '{path}': path escapes the '{name}' skill directory." + if not target.is_file(): + return f"File '{path}' not found in skill '{name}'." + return target.read_text(encoding="utf-8") + + return create_tool_from_function(function=read_skill_file, name="read_skill_file") + + def to_dict(self) -> dict[str, Any]: + """ + Serialize the toolset to a dictionary. + + Only the skills directory is serialized; tools are rebuilt by rescanning on deserialization. + + :returns: Dictionary representation of the toolset. + """ + return {"type": generate_qualified_class_name(type(self)), "data": {"skills_dir": str(self.skills_dir)}} + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SkillToolset": + """ + Deserialize a toolset from a dictionary. + + :param data: Dictionary representation of the toolset. + :returns: A new SkillToolset instance. + """ + return cls(skills_dir=data["data"]["skills_dir"]) diff --git a/test/tools/skills/test_skill_toolset.py b/test/tools/skills/test_skill_toolset.py new file mode 100644 index 0000000000..093e86ec04 --- /dev/null +++ b/test/tools/skills/test_skill_toolset.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from haystack.tools import SkillToolset +from haystack.tools.skills.skill_toolset import _parse_frontmatter + + +def _write_skill(skills_dir, name, description=None, body="Instructions.", files=None): + skill_dir = skills_dir / name + skill_dir.mkdir(parents=True) + frontmatter = f"---\nname: {name}\n" + if description is not None: + frontmatter += f"description: {description}\n" + frontmatter += "---\n" + (skill_dir / "SKILL.md").write_text(frontmatter + body, encoding="utf-8") + for rel_path, content in (files or {}).items(): + target = skill_dir / rel_path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + return skill_dir + + +class TestParseFrontmatter: + def test_parses_frontmatter_and_body(self): + frontmatter, body = _parse_frontmatter("---\nname: a\ndescription: d\n---\nThe body.") + assert frontmatter == {"name": "a", "description": "d"} + assert body == "The body." + + def test_no_frontmatter_returns_empty_mapping(self): + frontmatter, body = _parse_frontmatter("Just a body, no frontmatter.") + assert frontmatter == {} + assert body == "Just a body, no frontmatter." + + def test_non_mapping_frontmatter_raises(self): + with pytest.raises(ValueError): + _parse_frontmatter("---\n- just\n- a\n- list\n---\nbody") + + +class TestSkillToolset: + def test_scans_skills(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="Use to fill PDF forms.") + _write_skill(tmp_path, "excel", description="Use to edit spreadsheets.") + + toolset = SkillToolset(tmp_path) + + assert set(toolset.skills) == {"pdf-forms", "excel"} + assert toolset.skills["pdf-forms"].description == "Use to fill PDF forms." + assert {t.name for t in toolset} == {"load_skill", "read_skill_file"} + + def test_missing_directory_raises(self, tmp_path): + with pytest.raises(ValueError, match="does not exist"): + SkillToolset(tmp_path / "nope") + + def test_missing_description_raises(self, tmp_path): + _write_skill(tmp_path, "broken", description=None) + with pytest.raises(ValueError, match="missing a 'description'"): + SkillToolset(tmp_path) + + def test_system_prompt_contribution_lists_skills(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="Use to fill PDF forms.") + contribution = SkillToolset(tmp_path).system_prompt_contribution() + assert "## Available Skills" in contribution + assert "**pdf-forms**: Use to fill PDF forms." in contribution + assert "load_skill" in contribution and "read_skill_file" in contribution + + def test_system_prompt_contribution_none_when_empty(self, tmp_path): + assert SkillToolset(tmp_path).system_prompt_contribution() is None + + def test_load_skill_returns_body_and_manifest(self, tmp_path): + _write_skill( + tmp_path, + "pdf-forms", + description="Use to fill PDF forms.", + body="Step 1. Do the thing.", + files={"reference/forms.md": "details"}, + ) + load_skill = next(t for t in SkillToolset(tmp_path) if t.name == "load_skill") + result = load_skill.invoke(name="pdf-forms") + assert "Step 1. Do the thing." in result + assert "reference/forms.md" in result + + def test_load_skill_unknown(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="Use to fill PDF forms.") + load_skill = next(t for t in SkillToolset(tmp_path) if t.name == "load_skill") + assert "Unknown skill 'nope'" in load_skill.invoke(name="nope") + + def test_read_skill_file(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="d", files={"reference/forms.md": "form details"}) + read = next(t for t in SkillToolset(tmp_path) if t.name == "read_skill_file") + assert read.invoke(name="pdf-forms", path="reference/forms.md") == "form details" + + def test_read_skill_file_blocks_traversal(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="d") + (tmp_path / "secret.txt").write_text("top secret") + read = next(t for t in SkillToolset(tmp_path) if t.name == "read_skill_file") + result = read.invoke(name="pdf-forms", path="../secret.txt") + assert "escapes" in result + assert "top secret" not in result + + def test_read_skill_file_missing(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="d") + read = next(t for t in SkillToolset(tmp_path) if t.name == "read_skill_file") + assert "not found" in read.invoke(name="pdf-forms", path="nope.md") + + def test_to_dict_and_from_dict(self, tmp_path): + _write_skill(tmp_path, "pdf-forms", description="Use to fill PDF forms.") + toolset = SkillToolset(tmp_path) + + data = toolset.to_dict() + assert data == { + "type": "haystack.tools.skills.skill_toolset.SkillToolset", + "data": {"skills_dir": str(tmp_path)}, + } + + restored = SkillToolset.from_dict(data) + assert set(restored.skills) == {"pdf-forms"} From 1b8f989365afa56a7b9050310ae9a9dbc4883483 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 1 Jun 2026 14:08:30 +0200 Subject: [PATCH 6/8] change name of new param, fix windows issue, other small changes --- haystack/tools/skills/skill_toolset.py | 4 +++- haystack/tools/tool.py | 10 +++++----- haystack/tools/toolset.py | 4 ++-- .../add-skill-toolset-7f3c1a9e2b4d6c8a.yaml | 9 +++++---- test/components/agents/test_agent.py | 18 +++++++++--------- test/components/agents/test_agent_hitl.py | 2 +- test/components/generators/chat/test_azure.py | 2 +- .../generators/chat/test_azure_responses.py | 2 +- .../generators/chat/test_hugging_face_api.py | 6 +++--- .../generators/chat/test_hugging_face_local.py | 4 ++-- test/components/generators/chat/test_openai.py | 2 +- .../generators/chat/test_openai_responses.py | 2 +- test/tools/test_tool.py | 6 +++--- 13 files changed, 37 insertions(+), 34 deletions(-) diff --git a/haystack/tools/skills/skill_toolset.py b/haystack/tools/skills/skill_toolset.py index cdf3699cde..ee17a2d8bb 100644 --- a/haystack/tools/skills/skill_toolset.py +++ b/haystack/tools/skills/skill_toolset.py @@ -177,7 +177,9 @@ def load_skill(name: Annotated[str, "Exact name of the skill to load, from the A _, body = _parse_frontmatter((meta.path / SKILL_FILE_NAME).read_text(encoding="utf-8")) bundled = sorted( - str(p.relative_to(meta.path)) for p in meta.path.rglob("*") if p.is_file() and p.name != SKILL_FILE_NAME + p.relative_to(meta.path).as_posix() + for p in meta.path.rglob("*") + if p.is_file() and p.name != SKILL_FILE_NAME ) if bundled: manifest = "\n".join(f"- {path}" for path in bundled) diff --git a/haystack/tools/tool.py b/haystack/tools/tool.py index db9df347b1..be77435b40 100644 --- a/haystack/tools/tool.py +++ b/haystack/tools/tool.py @@ -86,7 +86,7 @@ class Tool: "documents": {"handler": custom_handler} } ``` - :param system_prompt: + :param system_prompt_instructions: Optional system prompt instructions associated with this Tool. When the Tool is used with an `Agent`, this text is appended to the Agent's system prompt (see `system_prompt_contribution`). Use it to tell the model how and when to use the Tool. Note that an enclosing `Toolset` that provides its own @@ -104,7 +104,7 @@ class Tool: outputs_to_string: dict[str, Any] | None = None inputs_from_state: dict[str, str] | None = None outputs_to_state: dict[str, dict[str, Any]] | None = None - system_prompt: str | None = None + system_prompt_instructions: str | None = None def __post_init__(self) -> None: # noqa: C901, PLR0912 # Check that the function is not a coroutine (async function) @@ -269,12 +269,12 @@ def system_prompt_contribution(self) -> str | None: Return optional system prompt instructions for this Tool. When the Tool is used with an `Agent`, the returned text is appended to the Agent's system prompt. - By default this returns the `system_prompt` attribute (which may be `None`). Subclasses can override - this to generate instructions dynamically. + By default this returns the `system_prompt_instructions` attribute (which may be `None`). Subclasses + can override this to generate instructions dynamically. :returns: The system prompt contribution, or `None` if the Tool has nothing to contribute. """ - return getattr(self, "system_prompt", None) + return self.system_prompt_instructions def invoke(self, **kwargs: Any) -> Any: """ diff --git a/haystack/tools/toolset.py b/haystack/tools/toolset.py index 1d7beed1a3..f194e2d429 100644 --- a/haystack/tools/toolset.py +++ b/haystack/tools/toolset.py @@ -197,8 +197,8 @@ def system_prompt_contribution(self) -> str | None: Toolset's tools as a whole. The default implementation returns `None`; subclasses (for example `SkillToolset`) override it to generate instructions dynamically. - When this returns a non-`None` value, it takes precedence over the `system_prompt` of the Toolset's - individual member tools, which are then not contributed separately. + When this returns a non-`None` value, it takes precedence over the `system_prompt_instructions` of the + Toolset's individual member tools, which are then not contributed separately. :returns: The system prompt contribution, or `None` if the Toolset has nothing to contribute. """ diff --git a/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml b/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml index 7b1e8a57a8..818f2ba631 100644 --- a/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml +++ b/releasenotes/notes/add-skill-toolset-7f3c1a9e2b4d6c8a.yaml @@ -21,7 +21,8 @@ features: - | ``Tool`` and ``Toolset`` now support a ``system_prompt_contribution()`` method. When a tool or toolset is used with an ``Agent``, the text it returns is appended to the Agent's system prompt. ``Tool`` gains - an optional ``system_prompt`` field for this; ``Toolset`` subclasses can override the method to generate - instructions dynamically. When a ``Toolset`` provides a contribution, it takes precedence over the - ``system_prompt`` of its individual member tools, mirroring how an MCP server's ``instructions`` describe - its tools as a whole. + an optional ``system_prompt_instructions`` field for this; ``Toolset`` subclasses can override the method + to generate instructions dynamically. When a ``Toolset`` provides a contribution, it takes precedence over + the ``system_prompt_instructions`` of its individual member tools, mirroring how an MCP server's + ``instructions`` describe its tools as a whole. Note that a member tool's own ``system_prompt_instructions`` + are therefore suppressed whenever its enclosing ``Toolset`` contributes. diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py index 5e44f6e0c6..c419c02f56 100644 --- a/test/components/agents/test_agent.py +++ b/test/components/agents/test_agent.py @@ -263,7 +263,7 @@ def test_to_dict(self, weather_tool, component_tool, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, }, { @@ -347,7 +347,7 @@ def test_to_dict_with_toolset(self, monkeypatch, weather_tool): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] @@ -418,7 +418,7 @@ def test_from_dict(self, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, }, { @@ -509,7 +509,7 @@ def test_from_dict_with_toolset(self, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] @@ -1272,7 +1272,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt_instructions": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1287,7 +1287,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt_instructions": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501 @@ -1380,7 +1380,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt_instructions": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1393,7 +1393,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "system_prompt_instructions": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501 @@ -2255,7 +2255,7 @@ def _tool(self, name, system_prompt=None): description="d", parameters={"type": "object", "properties": {}}, function=lambda: None, - system_prompt=system_prompt, + system_prompt_instructions=system_prompt, ) def test_toolset_contribution_appended_to_system_prompt(self): diff --git a/test/components/agents/test_agent_hitl.py b/test/components/agents/test_agent_hitl.py index b68e560176..536c1e1ba2 100644 --- a/test/components/agents/test_agent_hitl.py +++ b/test/components/agents/test_agent_hitl.py @@ -93,7 +93,7 @@ def test_to_dict(self, tools, confirmation_strategies, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ], diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 31e15bf81a..b60ff06d57 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -429,7 +429,7 @@ def test_to_dict_with_toolset(self, tools, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] diff --git a/test/components/generators/chat/test_azure_responses.py b/test/components/generators/chat/test_azure_responses.py index 55f12c050c..35316569e1 100644 --- a/test/components/generators/chat/test_azure_responses.py +++ b/test/components/generators/chat/test_azure_responses.py @@ -249,7 +249,7 @@ def test_to_dict_with_toolset(self, tools, monkeypatch): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 2dd9233a7b..b96852944d 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -278,7 +278,7 @@ def test_to_dict(self, mock_check_valid_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, @@ -343,7 +343,7 @@ def test_serde_in_pipeline(self, mock_check_valid_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, "outputs_to_string": None, "description": "description", "parameters": {"x": {"type": "string"}}, @@ -1217,7 +1217,7 @@ def test_to_dict_with_toolset(self, mock_check_valid_model, tools): "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] diff --git a/test/components/generators/chat/test_hugging_face_local.py b/test/components/generators/chat/test_hugging_face_local.py index dbf1e32d21..4c17c1ae6b 100644 --- a/test/components/generators/chat/test_hugging_face_local.py +++ b/test/components/generators/chat/test_hugging_face_local.py @@ -206,7 +206,7 @@ def test_to_dict(self, model_info_mock, tools): "inputs_from_state": None, "name": "weather", "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, "outputs_to_string": None, "description": "useful to determine the weather in a given location", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}, @@ -787,7 +787,7 @@ def test_to_dict_with_toolset(self, model_info_mock, mock_pipeline_with_tokenize "outputs_to_string": None, "inputs_from_state": None, "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, }, } ] diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index b27f4e0da8..2bc518fd17 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -346,7 +346,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, diff --git a/test/components/generators/chat/test_openai_responses.py b/test/components/generators/chat/test_openai_responses.py index 79857c5794..eb488fa326 100644 --- a/test/components/generators/chat/test_openai_responses.py +++ b/test/components/generators/chat/test_openai_responses.py @@ -260,7 +260,7 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "inputs_from_state": None, "name": "name", "outputs_to_state": None, - "system_prompt": None, + "system_prompt_instructions": None, "outputs_to_string": None, "parameters": {"x": {"type": "string"}}, }, diff --git a/test/tools/test_tool.py b/test/tools/test_tool.py index a0554759d8..4e4dc43d77 100644 --- a/test/tools/test_tool.py +++ b/test/tools/test_tool.py @@ -161,7 +161,7 @@ def test_to_dict(self): "outputs_to_string": {"handler": "test_tool.format_string"}, "inputs_from_state": {"location": "city"}, "outputs_to_state": {"documents": {"source": "docs", "handler": "test_tool.get_weather_report"}}, - "system_prompt": None, + "system_prompt_instructions": None, }, } @@ -195,7 +195,7 @@ def test_system_prompt_contribution(self): description="Get weather report", parameters=parameters, function=get_weather_report, - system_prompt="Always call weather before answering about the weather.", + system_prompt_instructions="Always call weather before answering about the weather.", ) assert tool.system_prompt_contribution() == "Always call weather before answering about the weather." @@ -203,7 +203,7 @@ def test_system_prompt_contribution_defaults_to_none(self): tool = Tool( name="weather", description="Get weather report", parameters=parameters, function=get_weather_report ) - assert tool.system_prompt is None + assert tool.system_prompt_instructions is None assert tool.system_prompt_contribution() is None def test_serialize_outputs_to_string(self): From 5d291221b8e6ff08b898039ff42bd7ec18db6458 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 1 Jun 2026 14:09:13 +0200 Subject: [PATCH 7/8] Remove md file --- agent_skills_plan.md | 171 ------------------------------------------- 1 file changed, 171 deletions(-) delete mode 100644 agent_skills_plan.md diff --git a/agent_skills_plan.md b/agent_skills_plan.md deleted file mode 100644 index 8ead19d594..0000000000 --- a/agent_skills_plan.md +++ /dev/null @@ -1,171 +0,0 @@ -# Plan: Skills support for the Haystack Agent - -Goal: let the Haystack `Agent` discover and read filesystem **Skills** the way Claude Code / Codex do — progressive -disclosure of expert instructions — using a small set of pre-built tools plus a generic mechanism for tools to -contribute system-prompt text. - -## Background - -Claude Code / Codex Skills use **progressive disclosure**: - -| Level | What's loaded | When | -|---|---|---| -| 1 — Metadata | each skill's `name` + `description` | always, up front | -| 2 — Instructions | the full `SKILL.md` body | when a skill is triggered | -| 3 — Bundled files | `reference/*.md`, examples, etc. | on demand, as the body references them | - -A skill is a directory: - -``` -skills/ - pdf-forms/ - SKILL.md # YAML frontmatter (name, description) + markdown body - reference/forms.md -``` - -Level 1 lives in the system context (not a tool). Levels 2/3 are pulled in on demand. - -References: -- Anthropic, "Equipping agents for the real world with Agent Skills" — the three-stage progressive - disclosure model (discovery → activation → execution): - https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills -- Anthropic Agent Skills docs (overview + `SKILL.md` structure): - https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview - -## Decisions (locked) - -- **Two tools**, both pure/stateless: `load_skill`, `read_skill_file`. -- **No script execution** in v1 (no Level-3 execution tool). -- **Filesystem source only.** -- **System-prompt injection:** a generic contribution hook on `Tool`/`Toolset`, - consumed automatically by the `Agent`. - - An alternative to this approach is to add another tool that lists available skills and their descriptions and - force it to be called using the soon to be added condition param. - -## How MCP does the equivalent (reference) - -MCP servers return an optional top-level `instructions` string in their `initialize` response. -The spec frames it as a hint clients MAY add to the system prompt to explain the server's tools as a whole. - -References: -- MCP lifecycle / `initialize` (the `InitializeResult.instructions` field): - https://modelcontextprotocol.io/specification/2025-06-18/basic/lifecycle -- MCP schema (`instructions` docstring: "can be used by clients to improve the LLM's understanding of - available tools ... MAY be added to the system prompt"): - https://github.com/modelcontextprotocol/modelcontextprotocol/blob/main/schema/2025-06-18/schema.ts - -**NOTE:** Advantage of adding the **System-prompt injection** feature is that we could extend MCPToolset to inject these -top-level instructions. - -## Part 1 — `system_prompt_contribution()` hook (generic, reusable) - -Add an optional method to both base classes, default `None`: - -**TODO:** Add support for ComponentTool + PipelineTool - -```python -# haystack/tools/tool.py (on Tool) -# haystack/tools/toolset.py (on Toolset) -def system_prompt_contribution(self) -> str | None: - """Text this tool/toolset wants appended to the Agent's system prompt. None by default.""" - return None -``` - -- `Tool` also gains an optional `system_prompt: str | None = None` dataclass field so a plain `Tool` can carry - instructions without subclassing. `system_prompt_contribution()` returns it by default (subclasses may override for - dynamism). -- `Toolset` subclasses (like `SkillToolset`) override the method. - -### Agent consumption — `_initialize_fresh_execution` - -Right after `selected_tools = self._select_tools(tools)`, collect contributions and merge them into the system message: - -```python -selected_tools = self._select_tools(tools) -contributions = _collect_system_prompt_contributions(selected_tools) -if contributions: - messages = _merge_system_prompt_contributions(messages, contributions) -``` - -Collection rules: - -- Go through `list`, `_ToolsetWrapper` (descend into `.toolsets`), `Toolset`, `Tool`. -- **Top-level wins:** if a `Toolset` returns a contribution, use it and DO NOT descend into - its member tools. Only if it returns `None` do we gather member tools' contributions. -- Bare `Tool` passed directly contributes its own. - -Merge rules: - -- Join contributions with `\n\n`. -- If `messages[0]` is a system message (the rendered `system_prompt`, or a user-supplied system message), append - the contribution text to it. -- Otherwise prepend a new system message built from the contributions. -- Collect from `selected_tools` and inject **after** Jinja rendering — never into the template string — so skill text - containing `{{`/`{%` can't break `ChatPromptBuilder`. - -## Part 2 — `SkillToolset(Toolset)` - -`haystack/tools/skills/skill_toolset.py` - -```python -@dataclass -class SkillMeta: - name: str # from frontmatter; falls back to directory name - description: str # from frontmatter - path: Path # the skill directory - -class SkillToolset(Toolset): - def __init__(self, skills_dir: str | Path) -> None: - self.skills_dir = Path(skills_dir) - self._skills: dict[str, SkillMeta] = self._scan() # frontmatter-only, cheap, in __init__ - super().__init__(tools=[self._load_skill_tool(), self._read_skill_file_tool()]) -``` - -- `_scan()` walks `skills_dir/*/SKILL.md`, parses YAML frontmatter, validates `name`/`description`, checks name - uniqueness. Bodies are NOT read here. -- `system_prompt_contribution()` renders the Level-1 catalog + behavioral rules (below). -- `warm_up()` revalidates. -- `to_dict()`/`from_dict()` serialize `skills_dir` only and rescan on load. - -### Tool: `load_skill` (Level 2) - -```python -def load_skill(name: Annotated[str, "Exact skill name from the Available Skills list."]) -> str: - """Load a skill's full instructions. Call this before doing a task the skill covers.""" -``` - -Returns the `SKILL.md` body plus a manifest of bundled files (so the model knows what `read_skill_file` can fetch). -Unknown name → friendly error listing available skills. - -### Tool: `read_skill_file` (Level 3) - -```python -def read_skill_file( - name: Annotated[str, "Skill that owns the file."], - path: Annotated[str, "Path relative to the skill directory, e.g. 'reference/forms.md'."], -) -> str: - """Read a file bundled with a skill (reference docs, examples, templates).""" -``` - -Path-traversal guard: `(skill_dir / path).resolve()` must stay within `skill_dir.resolve()`, else error. -Missing file → friendly error. - -### System-prompt contribution text - -``` -## Available Skills -Specialized instruction sets for specific task types. Load one before doing matching work. - -- **pdf-forms**: Use when filling PDF forms or extracting fields from PDFs. -- **excel-report**: Use when creating or editing .xlsx spreadsheets. - -When a task matches a skill, call `load_skill` with its name BEFORE starting, then follow -the loaded instructions exactly (they override your general approach). Load skills only -when relevant; if a skill references a file, fetch it with `read_skill_file`. If no skill -matches, proceed normally. -``` - -## Out of scope (future) - -- Script execution (`run_skill_script`) with confirmation-strategy gating. -- Search-based discovery (`discovery="search"`) for very large skill libraries. From a55a877817279e39ac1975ce5d5a92d3cab615ec Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 1 Jun 2026 14:17:45 +0200 Subject: [PATCH 8/8] fix unit tests --- test/components/agents/test_agent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py index 108022c6e9..94afb0bca4 100644 --- a/test/components/agents/test_agent.py +++ b/test/components/agents/test_agent.py @@ -1276,7 +1276,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null, "system_prompt_instructions": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1291,7 +1291,7 @@ def test_agent_tracing_span_run(self, caplog, monkeypatch, weather_tool): _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null, "system_prompt_instructions": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501 @@ -1384,7 +1384,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t assert set(llm_tags) == {"haystack.agent.step.llm.input", "haystack.agent.step.llm.output"} assert ( llm_tags["haystack.agent.step.llm.input"] - == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null}}]}' # noqa: E501 + == '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "tools": [{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null, "system_prompt_instructions": null}}]}' # noqa: E501 ) assert ( llm_tags["haystack.agent.step.llm.output"] @@ -1397,7 +1397,7 @@ async def test_agent_tracing_span_async_run(self, caplog, monkeypatch, weather_t _, run_tags = agent_spans[2] assert run_tags == { "haystack.agent.max_steps": 100, - "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null}}]', # noqa: E501 + "haystack.agent.tools": '[{"type": "haystack.tools.tool.Tool", "data": {"name": "weather_tool", "description": "Provides weather information for a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}, "function": "test_agent.weather_function", "outputs_to_string": null, "inputs_from_state": null, "outputs_to_state": null, "async_function": null, "system_prompt_instructions": null}}]', # noqa: E501 "haystack.agent.exit_conditions": '["text"]', "haystack.agent.state_schema": '{"messages": {"type": "list[haystack.dataclasses.chat_message.ChatMessage]", "handler": "haystack.components.agents.state.state_utils.merge_lists"}, "step_count": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "token_usage": {"type": "dict[str, typing.Any]", "handler": "haystack.components.agents.state.state_utils.replace_values"}, "tool_call_counts": {"type": "dict[str, int]", "handler": "haystack.components.agents.state.state_utils.replace_values"}}', # noqa: E501 "haystack.agent.input": '{"messages": [{"role": "user", "meta": {}, "name": null, "content": [{"text": "What\'s the weather in Paris?"}]}], "streaming_callback": null}', # noqa: E501