Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions haystack/components/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
serialize_tools_or_toolset,
warm_up_tools,
)
from haystack.tools.toolset import _ToolsetWrapper
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
from haystack.utils.deserialization import deserialize_component_inplace

Expand Down Expand Up @@ -193,6 +194,63 @@ def _render_prompt_messages(
return prompt_messages


def _gather_system_prompt_contributions(item: Any, out: list[str]) -> None:
"""
Recursively collect system prompt contributions from a tool, toolset, or collection thereof.

A `Toolset` that provides its own contribution takes precedence over its member tools, which are then
not contributed separately (mirroring how an MCP server's `instructions` describe its tools as a whole).
Composed toolsets (created via `toolset_a + toolset_b`) contribute each child toolset independently.

:param item: A `Tool`, `Toolset`, or list/tuple of them.
:param out: List that collected, non-empty contributions are appended to, in order.
"""
if item is None:
return
if isinstance(item, _ToolsetWrapper):
for toolset in item.toolsets:
_gather_system_prompt_contributions(toolset, out)
elif isinstance(item, Toolset):
contribution = item.system_prompt_contribution()
if contribution:
out.append(contribution)
else:
for member_tool in item:
_gather_system_prompt_contributions(member_tool, out)
elif isinstance(item, Tool):
contribution = item.system_prompt_contribution()
if contribution:
out.append(contribution)
elif isinstance(item, (list, tuple)):
for sub_item in item:
_gather_system_prompt_contributions(sub_item, out)


def _apply_system_prompt_contributions(messages: list[ChatMessage], tools: ToolsType | None) -> list[ChatMessage]:
"""
Append tool/toolset system prompt contributions to the agent's system message.

If the first message is a system message (the rendered `system_prompt`, or a user-supplied system message),
the contributions are appended to it. Otherwise a new system message is prepended. Contributions are merged
into the already-rendered message text (never the Jinja2 template) so arbitrary content is safe.

:param messages: The messages the agent is about to run with.
:param tools: The tools selected for this run.
:returns: The messages with contributions applied (a new list; the input is not mutated).
"""
contributions: list[str] = []
_gather_system_prompt_contributions(tools, contributions)
if not contributions:
return messages

extra = "\n\n".join(contributions)
if messages and messages[0].is_from(ChatRole.SYSTEM):
existing = messages[0].text or ""
combined = f"{existing}\n\n{extra}" if existing else extra
return [ChatMessage.from_system(combined), *messages[1:]]
return [ChatMessage.from_system(extra), *messages]


@dataclass(kw_only=True)
class _ExecutionContext:
"""
Expand Down Expand Up @@ -653,6 +711,10 @@ def _initialize_fresh_execution(

selected_tools = self._select_tools(tools)

# Append any system prompt instructions contributed by the selected tools/toolsets (e.g. a SkillToolset
# catalog) to the system message. Based on selected_tools so it respects per-run tool filtering.
messages = _apply_system_prompt_contributions(messages, selected_tools)

state_kwargs: dict[str, Any] = {key: kwargs[key] for key in self.state_schema.keys() if key in kwargs}
state = State(schema=self.state_schema, data=state_kwargs)
state.set("messages", messages)
Expand Down
3 changes: 3 additions & 0 deletions haystack/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from haystack.tools.tool import Tool, _check_duplicate_tool_names
from haystack.tools.toolset import Toolset
from haystack.tools.searchable_toolset import SearchableToolset
from haystack.tools.skills import SkillMeta, SkillToolset
from haystack.tools.component_tool import ComponentTool
from haystack.tools.pipeline_tool import PipelineTool
from haystack.tools.serde_utils import deserialize_tools_or_toolset_inplace, serialize_tools_or_toolset
Expand All @@ -32,6 +33,8 @@
"serialize_tools_or_toolset",
"Tool",
"SearchableToolset",
"SkillMeta",
"SkillToolset",
"ToolsType",
"Toolset",
"tool",
Expand Down
7 changes: 7 additions & 0 deletions haystack/tools/skills/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from haystack.tools.skills.skill_toolset import SkillMeta, SkillToolset

__all__ = ["SkillMeta", "SkillToolset"]
232 changes: 232 additions & 0 deletions haystack/tools/skills/skill_toolset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from dataclasses import dataclass
from pathlib import Path
from typing import Annotated, Any

import yaml

from haystack.core.serialization import generate_qualified_class_name
from haystack.tools.from_function import create_tool_from_function
from haystack.tools.tool import Tool
from haystack.tools.toolset import Toolset

SKILL_FILE_NAME = "SKILL.md"


@dataclass
class SkillMeta:
"""
Metadata describing a single skill discovered on disk.

:param name: The skill's name, used by the agent to load it.
:param description: A short description of when to use the skill. Shown to the agent up front.
:param path: The skill's directory.
"""

name: str
description: str
path: Path


def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
"""
Split a `SKILL.md` file into its YAML frontmatter and markdown body.

The frontmatter is the YAML block delimited by leading and trailing `---` lines. If no frontmatter is
present, an empty mapping and the original text are returned.

:param text: The full contents of a `SKILL.md` file.
:returns: A tuple of (frontmatter mapping, body).
:raises ValueError: If the frontmatter is present but is not a valid YAML mapping.
"""
stripped = text.lstrip()
if not stripped.startswith("---"):
return {}, text

# Drop the leading '---' line, then split on the closing '---'.
after_open = stripped[len("---") :].lstrip("\n")
parts = after_open.split("\n---", 1)
if len(parts) != 2:
return {}, text

frontmatter_block, body = parts
loaded = yaml.safe_load(frontmatter_block) or {}
if not isinstance(loaded, dict):
raise ValueError("Skill frontmatter must be a YAML mapping.") # noqa: TRY004
return loaded, body.lstrip("\n")


class SkillToolset(Toolset):
"""
A Toolset that lets an Agent discover and read filesystem "skills" via progressive disclosure.

A skill is a directory containing a `SKILL.md` file with YAML frontmatter (`name` and `description`) and a
markdown body of instructions. Skills may bundle additional files (reference docs, examples, templates).
This mirrors how Claude Code and Codex expose skills:

- The name and description of every skill are injected into the Agent's system prompt
(via `system_prompt_contribution`) so the model knows which skills exist.
- `load_skill` returns a skill's full instructions on demand, plus a manifest of its bundled files.
- `read_skill_file` reads a bundled file on demand.

Expected layout:

```
skills/
pdf-forms/
SKILL.md # frontmatter (name, description) + markdown instructions
reference/forms.md
```

### Usage example

```python
from haystack.components.agents import Agent
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.dataclasses import ChatMessage
from haystack.tools import SkillToolset

skills = SkillToolset("skills/")
agent = Agent(chat_generator=OpenAIChatGenerator(), tools=skills)
# The skills catalog is appended to the system prompt automatically.
result = agent.run(messages=[ChatMessage.from_user("Fill in this PDF form for me.")])
```
"""

def __init__(self, skills_dir: str | Path) -> None:
"""
Initialize the SkillToolset by scanning a directory for skills.

Only the frontmatter of each `SKILL.md` is read at construction time (cheap); bodies and bundled files
are read lazily when the agent calls `load_skill` / `read_skill_file`.

:param skills_dir: Directory containing one subdirectory per skill, each with a `SKILL.md`.
:raises ValueError: If `skills_dir` does not exist, is not a directory, a skill is missing a required
frontmatter field, or two skills share the same name.
"""
self.skills_dir = Path(skills_dir)
self._skills: dict[str, SkillMeta] = self._scan()
super().__init__(tools=[self._create_load_skill_tool(), self._create_read_skill_file_tool()])

@property
def skills(self) -> dict[str, SkillMeta]:
"""Mapping of skill name to its metadata."""
return self._skills

def _scan(self) -> dict[str, SkillMeta]:
"""
Scan `skills_dir` for skills, reading only the frontmatter of each `SKILL.md`.

:returns: Mapping of skill name to metadata.
:raises ValueError: On a missing directory, missing required frontmatter, or duplicate skill names.
"""
if not self.skills_dir.is_dir():
raise ValueError(f"Skills directory '{self.skills_dir}' does not exist or is not a directory.")

skills: dict[str, SkillMeta] = {}
for skill_file in sorted(self.skills_dir.glob(f"*/{SKILL_FILE_NAME}")):
skill_dir = skill_file.parent
frontmatter, _ = _parse_frontmatter(skill_file.read_text(encoding="utf-8"))

name = frontmatter.get("name", skill_dir.name)
description = frontmatter.get("description")
if not description:
raise ValueError(f"Skill '{name}' ({skill_file}) is missing a 'description' in its frontmatter.")
if name in skills:
raise ValueError(f"Duplicate skill name '{name}' found in '{self.skills_dir}'.")

skills[name] = SkillMeta(name=name, description=description, path=skill_dir)
return skills

def system_prompt_contribution(self) -> str | None:
"""
Render the skills catalog and usage instructions for injection into the Agent's system prompt.

:returns: The catalog text, or `None` if no skills were found.
"""
if not self._skills:
return None

lines = [
"## Available Skills",
"Specialized instruction sets for specific task types. Load one before doing matching work.",
"",
]
lines += [f"- **{meta.name}**: {meta.description}" for meta in self._skills.values()]
lines += [
"",
"When a task matches a skill, call `load_skill` with its name BEFORE starting, then follow the loaded "
"instructions exactly (they override your general approach). Load skills only when relevant; if a skill "
"references a file, fetch it with `read_skill_file`. If no skill matches, proceed normally.",
]
return "\n".join(lines)

def _create_load_skill_tool(self) -> Tool:
"""Create the `load_skill` tool, closed over this toolset's skill registry."""

def load_skill(name: Annotated[str, "Exact name of the skill to load, from the Available Skills list."]) -> str:
"""Load a skill's full instructions. Call this before doing a task the skill covers."""
meta = self._skills.get(name)
if meta is None:
available = ", ".join(self._skills) or "none"
return f"Unknown skill '{name}'. Available skills: {available}."

_, body = _parse_frontmatter((meta.path / SKILL_FILE_NAME).read_text(encoding="utf-8"))

bundled = sorted(
p.relative_to(meta.path).as_posix()
for p in meta.path.rglob("*")
if p.is_file() and p.name != SKILL_FILE_NAME
)
if bundled:
manifest = "\n".join(f"- {path}" for path in bundled)
body = f"{body}\n\n---\nBundled files (read with `read_skill_file`):\n{manifest}"
return body

return create_tool_from_function(function=load_skill, name="load_skill")

def _create_read_skill_file_tool(self) -> Tool:
"""Create the `read_skill_file` tool, closed over this toolset's skill registry."""

def read_skill_file(
name: Annotated[str, "Name of the skill that owns the file."],
path: Annotated[str, "Path of the file relative to the skill directory, e.g. 'reference/forms.md'."],
) -> str:
"""Read a file bundled with a skill (reference docs, examples, templates)."""
meta = self._skills.get(name)
if meta is None:
available = ", ".join(self._skills) or "none"
return f"Unknown skill '{name}'. Available skills: {available}."

skill_dir = meta.path.resolve()
target = (skill_dir / path).resolve()
if skill_dir != target and skill_dir not in target.parents:
return f"Refusing to read '{path}': path escapes the '{name}' skill directory."
if not target.is_file():
return f"File '{path}' not found in skill '{name}'."
return target.read_text(encoding="utf-8")

return create_tool_from_function(function=read_skill_file, name="read_skill_file")

def to_dict(self) -> dict[str, Any]:
"""
Serialize the toolset to a dictionary.

Only the skills directory is serialized; tools are rebuilt by rescanning on deserialization.

:returns: Dictionary representation of the toolset.
"""
return {"type": generate_qualified_class_name(type(self)), "data": {"skills_dir": str(self.skills_dir)}}

@classmethod
def from_dict(cls, data: dict[str, Any]) -> "SkillToolset":
"""
Deserialize a toolset from a dictionary.

:param data: Dictionary representation of the toolset.
:returns: A new SkillToolset instance.
"""
return cls(skills_dir=data["data"]["skills_dir"])
18 changes: 18 additions & 0 deletions haystack/tools/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ class Tool:
"documents": {"handler": custom_handler}
}
```
:param system_prompt_instructions:
Optional system prompt instructions associated with this Tool. When the Tool is used with an `Agent`,
this text is appended to the Agent's system prompt (see `system_prompt_contribution`). Use it to tell
the model how and when to use the Tool. Note that an enclosing `Toolset` that provides its own
`system_prompt_contribution` takes precedence and suppresses the contributions of its member tools.
:raises ValueError: If neither `function` nor `async_function` is provided, if `function` is a
coroutine function, if `async_function` is not a coroutine function, if `parameters` is not a
valid JSON schema, or if the `outputs_to_state`, `outputs_to_string`, or `inputs_from_state`
Expand All @@ -107,6 +112,7 @@ class Tool:
inputs_from_state: dict[str, str] | None = None
outputs_to_state: dict[str, dict[str, Any]] | None = None
async_function: Callable | None = None
system_prompt_instructions: str | None = None

def __post_init__(self) -> None: # noqa: C901, PLR0912
# At least one of function / async_function must be set.
Expand Down Expand Up @@ -280,6 +286,18 @@ def warm_up(self) -> None:
"""
pass

def system_prompt_contribution(self) -> str | None:
"""
Return optional system prompt instructions for this Tool.

When the Tool is used with an `Agent`, the returned text is appended to the Agent's system prompt.
By default this returns the `system_prompt_instructions` attribute (which may be `None`). Subclasses
can override this to generate instructions dynamically.

:returns: The system prompt contribution, or `None` if the Tool has nothing to contribute.
"""
return self.system_prompt_instructions

def invoke(self, **kwargs: Any) -> Any:
"""
Invoke the Tool synchronously with the provided keyword arguments.
Expand Down
Loading
Loading