diff --git a/cookbook/README.md b/cookbook/README.md index 469088d09..30a885809 100644 --- a/cookbook/README.md +++ b/cookbook/README.md @@ -51,11 +51,15 @@ View the [full list of providers here](https://portkey.ai/docs/welcome/integrati View the [full list of integrations here](https://portkey.ai/docs/welcome/integration-guides). ## monitoring-agents + * [Autogen](./monitoring-agents/Autogen_with_Telemetry.ipynb) * [CrewAI](./monitoring-agents/CrewAI_with_Telemetry.ipynb) * [Llama Agents](./monitoring-agents/Llama_Agents_with_Telemetry.ipynb) * [ControlFlow](./monitoring-agents/ControlFlow_with_Telemetry.ipynb) +## security + +* [Block unsafe agent tool payloads with a local firewall](./security/tool-payload-firewall.md) ## contributing diff --git a/cookbook/security/tool-payload-firewall.md b/cookbook/security/tool-payload-firewall.md new file mode 100644 index 000000000..c4dd14afc --- /dev/null +++ b/cookbook/security/tool-payload-firewall.md @@ -0,0 +1,69 @@ +# Block unsafe agent tool payloads with a local firewall + +Autonomous agents often leave the model gateway through tool calls: Stripe refunds, +CRM updates, email sends, database writes, or internal admin APIs. Content +guardrails help at the prompt and response layer, but tool-call arguments need a +structural check before the application executes them. + +The `tool-payload-firewall` plugin adds a local guardrail for OpenAI-compatible +`tool_calls`. It flattens tool-call arguments into JSON paths, applies allowlist, +blocked path, string length, and array size policies, then returns audit-ready +findings in the hook result. + +## Example + +Enable the plugin in `conf.json`: + +```json +{ + "plugins_enabled": ["default", "tool-payload-firewall"], + "credentials": {} +} +``` + +Build plugins: + +```bash +npm run build-plugins +``` + +Add the guardrail as an `after_request_hooks` policy so the gateway inspects the +model's proposed tool calls before your agent executes them: + +```json +{ + "after_request_hooks": [ + { + "id": "agent-tool-payload-policy", + "type": "guardrail", + "deny": true, + "checks": [ + { + "id": "tool-payload-firewall.scan", + "parameters": { + "allowedToolNames": ["lookup_customer", "create_refund"], + "blockedPaths": ["customer.ssn", "*.api_key", "recipients.*.email"], + "maxArrayItems": 25, + "maxStringLength": 2048 + } + } + ] + } + ] +} +``` + +## Why this helps agent reliability + +- `blockedPaths` catches sensitive or high-risk arguments such as `customer.ssn` + or `*.api_key` even when they appear inside otherwise valid JSON. +- `maxArrayItems` limits accidental mass updates and runaway API calls from + manipulated tool arguments. +- `maxStringLength` catches prompt-injection payloads hidden in fields such as + `notes`, `description`, or `instructions`. +- `allowedToolNames` prevents a model from escalating from retrieval tools into + write tools unless the route explicitly allows them. + +For teams running evals, the same policy can be used as a regression oracle: +replay historical tool traces through the gateway and assert that risky payload +shapes produce deterministic `hook_results` findings before rollout. diff --git a/plugins/index.ts b/plugins/index.ts index 16fbab793..2ab31007d 100644 --- a/plugins/index.ts +++ b/plugins/index.ts @@ -67,6 +67,7 @@ import { handler as f5GuardrailsScan } from './f5-guardrails/scan'; import { handler as azureShieldPrompt } from './azure/shieldPrompt'; import { handler as azureProtectedMaterial } from './azure/protectedMaterial'; import { handler as crowdstrikeAidrGuardChatCompletions } from './crowdstrike-aidr/guardChatCompletion'; +import { handler as toolPayloadFirewallScan } from './tool-payload-firewall/scan'; export const plugins = { default: { @@ -180,4 +181,7 @@ export const plugins = { 'crowdstrike-aidr': { guardChatCompletions: crowdstrikeAidrGuardChatCompletions, }, + 'tool-payload-firewall': { + scan: toolPayloadFirewallScan, + }, }; diff --git a/plugins/tool-payload-firewall/manifest.json b/plugins/tool-payload-firewall/manifest.json new file mode 100644 index 000000000..07d7f4eb2 --- /dev/null +++ b/plugins/tool-payload-firewall/manifest.json @@ -0,0 +1,76 @@ +{ + "id": "tool-payload-firewall", + "name": "Tool Payload Firewall", + "description": "Local guardrail for agent tool-call arguments. Flattens tool-call JSON, applies path/size policies, and returns audit-ready findings before external tool execution.", + "credentials": [], + "functions": [ + { + "name": "Scan Tool Payloads", + "id": "scan", + "supportedHooks": ["beforeRequestHook", "afterRequestHook"], + "type": "guardrail", + "description": [ + { + "type": "subHeading", + "text": "Inspect OpenAI-compatible tool_call arguments and block risky outbound payload shapes before an agent executes external tools." + } + ], + "parameters": { + "type": "object", + "properties": { + "blockedPaths": { + "type": "array", + "label": "Blocked JSON paths", + "description": [ + { + "type": "subHeading", + "text": "Exact or wildcard JSON paths to deny, for example customer.ssn, *.api_key, or recipients.*.email." + } + ], + "items": { + "type": "string" + }, + "default": [] + }, + "maxStringLength": { + "type": "number", + "label": "Maximum string length", + "description": [ + { + "type": "subHeading", + "text": "Deny tool arguments with string leaves longer than this value. Use this to catch prompt-injection payloads hidden in free-text fields." + } + ], + "default": 4096 + }, + "maxArrayItems": { + "type": "number", + "label": "Maximum array items", + "description": [ + { + "type": "subHeading", + "text": "Deny large arrays that can trigger mass updates, runaway API calls, or unexpected cost spikes." + } + ], + "default": 100 + }, + "allowedToolNames": { + "type": "array", + "label": "Allowed tool names", + "description": [ + { + "type": "subHeading", + "text": "Optional allowlist for tool names. When set, tool calls outside the allowlist fail the guardrail." + } + ], + "items": { + "type": "string" + }, + "default": [] + } + }, + "required": [] + } + } + ] +} diff --git a/plugins/tool-payload-firewall/scan.test.ts b/plugins/tool-payload-firewall/scan.test.ts new file mode 100644 index 000000000..361ad1fff --- /dev/null +++ b/plugins/tool-payload-firewall/scan.test.ts @@ -0,0 +1,125 @@ +import { PluginContext } from '../types'; +import { handler } from './scan'; + +const buildContext = (toolCalls: unknown[]): PluginContext => ({ + requestType: 'chatComplete', + request: { + json: { + messages: [{ role: 'user', content: 'issue a refund' }], + }, + }, + response: { + json: { + choices: [ + { + message: { + tool_calls: toolCalls, + }, + }, + ], + }, + }, +}); + +describe('tool payload firewall plugin', () => { + it('blocks configured tool argument paths on afterRequestHook', async () => { + const result = await handler( + buildContext([ + { + id: 'call_1', + function: { + name: 'update_customer', + arguments: JSON.stringify({ + customer: { + email: 'a@example.com', + ssn: '111-22-3333', + }, + }), + }, + }, + ]), + { blockedPaths: ['customer.ssn'] }, + 'afterRequestHook' + ); + + expect(result.verdict).toBe(false); + expect(result.data.findings).toEqual([ + expect.objectContaining({ + toolName: 'update_customer', + path: 'customer.ssn', + reason: 'blocked_path', + }), + ]); + }); + + it('supports wildcard blocked paths for nested payload policies', async () => { + const result = await handler( + buildContext([ + { + function: { + name: 'send_messages', + arguments: JSON.stringify({ + recipients: [{ email: 'user@example.com' }], + }), + }, + }, + ]), + { blockedPaths: ['recipients.*.email'] }, + 'afterRequestHook' + ); + + expect(result.verdict).toBe(false); + expect(result.data.findings[0]).toEqual( + expect.objectContaining({ + path: 'recipients.0.email', + reason: 'blocked_path', + }) + ); + }); + + it('flags large arrays that could trigger mass updates', async () => { + const result = await handler( + buildContext([ + { + function: { + name: 'bulk_update', + arguments: JSON.stringify({ + account_ids: ['a', 'b', 'c'], + }), + }, + }, + ]), + { maxArrayItems: 2 }, + 'afterRequestHook' + ); + + expect(result.verdict).toBe(false); + expect(result.data.findings[0]).toEqual( + expect.objectContaining({ + path: 'account_ids', + reason: 'array_too_large', + }) + ); + }); + + it('passes when no tool call violates the policy', async () => { + const result = await handler( + buildContext([ + { + function: { + name: 'lookup_customer', + arguments: JSON.stringify({ customer_id: 'cus_123' }), + }, + }, + ]), + { + allowedToolNames: ['lookup_customer'], + blockedPaths: ['customer.ssn'], + }, + 'afterRequestHook' + ); + + expect(result.verdict).toBe(true); + expect(result.data.findings).toEqual([]); + }); +}); diff --git a/plugins/tool-payload-firewall/scan.ts b/plugins/tool-payload-firewall/scan.ts new file mode 100644 index 000000000..856b851db --- /dev/null +++ b/plugins/tool-payload-firewall/scan.ts @@ -0,0 +1,243 @@ +import { + HookEventType, + PluginContext, + PluginHandler, + PluginParameters, +} from '../types'; + +type ToolCall = { + id?: string; + type?: string; + function?: { + name?: string; + arguments?: string | Record; + }; +}; + +type Finding = { + toolName: string; + toolCallId?: string; + path?: string; + reason: string; + valuePreview?: string; +}; + +type FlatLeaf = { + path: string; + value: unknown; +}; + +const DEFAULT_MAX_STRING_LENGTH = 4096; +const DEFAULT_MAX_ARRAY_ITEMS = 100; +const PREVIEW_LIMIT = 160; + +const normalizeStringArray = (value: unknown): string[] => { + return Array.isArray(value) + ? value.filter((item): item is string => typeof item === 'string') + : []; +}; + +const preview = (value: unknown): string => { + const text = typeof value === 'string' ? value : JSON.stringify(value); + return text.length > PREVIEW_LIMIT + ? `${text.slice(0, PREVIEW_LIMIT)}...` + : text; +}; + +const parseArguments = ( + args: string | Record | undefined +): unknown => { + if (!args) { + return {}; + } + + if (typeof args === 'string') { + return JSON.parse(args); + } + + return args; +}; + +const flatten = (value: unknown, basePath = ''): FlatLeaf[] => { + if (Array.isArray(value)) { + if (value.length === 0) { + return [{ path: basePath, value }]; + } + + return value.flatMap((item, index) => + flatten(item, basePath ? `${basePath}.${index}` : String(index)) + ); + } + + if (value && typeof value === 'object') { + const entries = Object.entries(value as Record); + if (entries.length === 0) { + return [{ path: basePath, value }]; + } + + return entries.flatMap(([key, nestedValue]) => + flatten(nestedValue, basePath ? `${basePath}.${key}` : key) + ); + } + + return [{ path: basePath, value }]; +}; + +const pathMatches = (path: string, pattern: string): boolean => { + const pathParts = path.split('.'); + const patternParts = pattern.split('.'); + + if (pathParts.length !== patternParts.length) { + return false; + } + + return patternParts.every( + (part, index) => part === '*' || part === pathParts[index] + ); +}; + +const getArrayViolations = ( + value: unknown, + maxArrayItems: number, + basePath = '' +): FlatLeaf[] => { + if (Array.isArray(value)) { + const current = + value.length > maxArrayItems ? [{ path: basePath, value }] : []; + return [ + ...current, + ...value.flatMap((item, index) => + getArrayViolations( + item, + maxArrayItems, + basePath ? `${basePath}.${index}` : String(index) + ) + ), + ]; + } + + if (value && typeof value === 'object') { + return Object.entries(value as Record).flatMap( + ([key, item]) => + getArrayViolations( + item, + maxArrayItems, + basePath ? `${basePath}.${key}` : key + ) + ); + } + + return []; +}; + +const extractToolCalls = ( + context: PluginContext, + eventType: HookEventType +): ToolCall[] => { + const target = + eventType === 'beforeRequestHook' ? context.request : context.response; + const json = target?.json; + + if (!json) { + return []; + } + + if (eventType === 'afterRequestHook') { + return (json.choices || []).flatMap( + (choice: any) => choice?.message?.tool_calls || [] + ); + } + + return (json.messages || []).flatMap( + (message: any) => message?.tool_calls || [] + ); +}; + +export const handler: PluginHandler = async ( + context: PluginContext, + parameters: PluginParameters, + eventType: HookEventType +) => { + const blockedPaths = normalizeStringArray(parameters.blockedPaths); + const allowedToolNames = normalizeStringArray(parameters.allowedToolNames); + const maxStringLength = + typeof parameters.maxStringLength === 'number' + ? parameters.maxStringLength + : DEFAULT_MAX_STRING_LENGTH; + const maxArrayItems = + typeof parameters.maxArrayItems === 'number' + ? parameters.maxArrayItems + : DEFAULT_MAX_ARRAY_ITEMS; + + const findings: Finding[] = []; + const toolCalls = extractToolCalls(context, eventType); + + for (const toolCall of toolCalls) { + const toolName = toolCall.function?.name || 'unknown'; + + if (allowedToolNames.length && !allowedToolNames.includes(toolName)) { + findings.push({ + toolName, + toolCallId: toolCall.id, + reason: 'tool_name_not_allowed', + }); + } + + let args: unknown; + try { + args = parseArguments(toolCall.function?.arguments); + } catch (error) { + findings.push({ + toolName, + toolCallId: toolCall.id, + reason: 'invalid_json_arguments', + valuePreview: preview(toolCall.function?.arguments || ''), + }); + continue; + } + + for (const leaf of flatten(args)) { + if (blockedPaths.some((pattern) => pathMatches(leaf.path, pattern))) { + findings.push({ + toolName, + toolCallId: toolCall.id, + path: leaf.path, + reason: 'blocked_path', + valuePreview: preview(leaf.value), + }); + } + + if ( + typeof leaf.value === 'string' && + leaf.value.length > maxStringLength + ) { + findings.push({ + toolName, + toolCallId: toolCall.id, + path: leaf.path, + reason: 'string_too_long', + valuePreview: preview(leaf.value), + }); + } + } + + for (const violation of getArrayViolations(args, maxArrayItems)) { + findings.push({ + toolName, + toolCallId: toolCall.id, + path: violation.path, + reason: 'array_too_large', + valuePreview: preview(violation.value), + }); + } + } + + return { + error: null, + verdict: findings.length === 0, + data: { + toolCallCount: toolCalls.length, + findings, + }, + }; +};