diff --git a/.changeset/error-narrowing.md b/.changeset/error-narrowing.md new file mode 100644 index 000000000..20fb9802f --- /dev/null +++ b/.changeset/error-narrowing.md @@ -0,0 +1,10 @@ +--- +'@tanstack/ai': patch +'@tanstack/ai-openai': patch +--- + +refactor(ai, ai-openai): narrow error handling before logging + +`catch (error: any)` sites in `stream-to-response.ts`, `activities/stream-generation-result.ts`, and `activities/generateVideo/index.ts` are now narrowed to `unknown` and funnel through a shared `toRunErrorPayload(error, fallback)` helper that extracts `message` / `code` without leaking the original error object (which can carry request state from an SDK). + +Replaced four `console.error` calls in the OpenAI text adapter's `chatStream` catch block that dumped the full error object to stdout. SDK errors can carry the original request including auth headers, so the library now logs only the narrowed `{ message, code }` payload via the internal logger — any user-supplied logger receives the sanitized shape, not the raw SDK error. diff --git a/.changeset/isolate-cloudflare-production.md b/.changeset/isolate-cloudflare-production.md new file mode 100644 index 000000000..b6500df23 --- /dev/null +++ b/.changeset/isolate-cloudflare-production.md @@ -0,0 +1,7 @@ +--- +'@tanstack/ai-isolate-cloudflare': patch +--- + +feat(ai-isolate-cloudflare): support production deployments and close tool-name injection vector + +The Worker now documents production-capable `unsafe_eval` usage (previously the code, wrangler.toml, and README all described it as dev-only). Tool names are validated against a strict identifier regex before being interpolated into the generated wrapper code, so a malicious tool name like `foo'); process.exit(1); (function bar() {` is rejected at generation time rather than breaking out of the wrapping function. diff --git a/.changeset/ollama-tool-converter.md b/.changeset/ollama-tool-converter.md new file mode 100644 index 000000000..99d9678c0 --- /dev/null +++ b/.changeset/ollama-tool-converter.md @@ -0,0 +1,7 @@ +--- +'@tanstack/ai-ollama': patch +--- + +refactor(ai-ollama): extract tool conversion into `src/tools/` matching peer adapters + +Tool handling lived inline inside the text adapter with raw type casts. It is now split into a dedicated `tool-converter.ts` / `function-tool.ts` pair (mirroring the structure used by `ai-openai`, `ai-anthropic`, `ai-grok`, and `ai-groq`) and re-exported from the package index as `convertFunctionToolToAdapterFormat` and `convertToolsToProviderFormat`. Runtime behavior is unchanged. diff --git a/.changeset/useChat-callback-propagation.md b/.changeset/useChat-callback-propagation.md new file mode 100644 index 000000000..ec6661b69 --- /dev/null +++ b/.changeset/useChat-callback-propagation.md @@ -0,0 +1,10 @@ +--- +'@tanstack/ai-react': patch +'@tanstack/ai-preact': patch +'@tanstack/ai-vue': patch +'@tanstack/ai-solid': patch +--- + +fix(ai-react, ai-preact, ai-vue, ai-solid): propagate `useChat` callback changes + +`onResponse`, `onChunk`, and `onCustomEvent` were captured by reference at client creation time. When a parent component re-rendered with fresh closures, the `ChatClient` kept calling the originals. Every framework now wraps these callbacks so the latest `options.xxx` is read at call time (via `optionsRef.current` in React/Preact, and direct option access in Vue/Solid, matching the pattern already used for `onFinish` / `onError`). Clearing a callback (setting it to `undefined`) now correctly no-ops instead of continuing to invoke the stale handler. diff --git a/packages/typescript/ai-code-mode-skills/tests/create-skill-management-tools.test.ts b/packages/typescript/ai-code-mode-skills/tests/create-skill-management-tools.test.ts new file mode 100644 index 000000000..f29aa4425 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/create-skill-management-tools.test.ts @@ -0,0 +1,302 @@ +import { describe, expect, it, vi } from 'vitest' +import { createSkillManagementTools } from '../src/create-skill-management-tools' +import { createMemorySkillStorage } from '../src/storage/memory-storage' +import { + createAlwaysTrustedStrategy, + createDefaultTrustStrategy, +} from '../src/trust-strategies' + +const mockContext = () => ({ emitCustomEvent: vi.fn() }) + +function getTool( + tools: ReturnType, + name: string, +) { + const tool = tools.find((t) => t.name === name) + if (!tool) throw new Error(`Tool ${name} not found`) + return tool +} + +function validRegisterInput( + overrides: Partial<{ + name: string + description: string + code: string + inputSchema: string + outputSchema: string + usageHints: Array + dependsOn: Array + }> = {}, +) { + return { + name: 'fetch_data', + description: 'A skill', + code: 'return input;', + inputSchema: '{"type":"object","properties":{}}', + outputSchema: '{"type":"object","properties":{}}', + usageHints: ['Use for fetching'], + dependsOn: [], + ...overrides, + } +} + +describe('createSkillManagementTools', () => { + it('exposes search_skills, get_skill, and register_skill', () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + expect(tools.map((t) => t.name).sort()).toEqual([ + 'get_skill', + 'register_skill', + 'search_skills', + ]) + }) + + describe('search_skills', () => { + it('returns lightweight matching entries', async () => { + const storage = createMemorySkillStorage([ + { + id: '1', + name: 'github_stats', + description: 'GitHub stats', + code: 'secret', + inputSchema: {}, + outputSchema: {}, + usageHints: ['for github'], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + }, + ]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'search_skills') + const results = (await tool.execute!( + { query: 'github', limit: 5 }, + mockContext() as any, + )) as Array> + expect(results).toHaveLength(1) + expect(results[0]).not.toHaveProperty('code') + expect(results[0]!.name).toBe('github_stats') + }) + + it('respects the limit parameter', async () => { + const storage = createMemorySkillStorage([ + { + id: 'a', + name: 'data_one', + description: '', + code: '', + inputSchema: {}, + outputSchema: {}, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + }, + { + id: 'b', + name: 'data_two', + description: '', + code: '', + inputSchema: {}, + outputSchema: {}, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + }, + ]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'search_skills') + const results = (await tool.execute!( + { query: 'data', limit: 1 }, + mockContext() as any, + )) as Array + expect(results).toHaveLength(1) + }) + }) + + describe('get_skill', () => { + it('returns an error object for a missing skill', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'get_skill') + const result = (await tool.execute!( + { name: 'missing' }, + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain('not found') + }) + + it('returns the full skill including code when found', async () => { + const storage = createMemorySkillStorage([ + { + id: '1', + name: 'alpha', + description: 'Alpha', + code: 'return 1;', + inputSchema: { type: 'object' }, + outputSchema: { type: 'number' }, + usageHints: ['hint'], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + }, + ]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'get_skill') + const result = (await tool.execute!( + { name: 'alpha' }, + mockContext() as any, + )) as { + name?: string + code?: string + inputSchema?: string + } + expect(result.name).toBe('alpha') + expect(result.code).toBe('return 1;') + expect(result.inputSchema).toBe('{"type":"object"}') + }) + }) + + describe('register_skill', () => { + it('rejects names starting with external_', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ name: 'external_evil' }), + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain("cannot start with 'external_'") + }) + + it('rejects names starting with skill_ (redundant prefix)', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ name: 'skill_duplicate' }), + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain("should not include the 'skill_' prefix") + }) + + it('rejects malformed JSON inputSchema', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ inputSchema: 'not valid json' }), + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain('inputSchema must be a valid JSON string') + }) + + it('rejects malformed JSON outputSchema', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ outputSchema: '{' }), + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain('outputSchema must be a valid JSON string') + }) + + it('rejects a duplicate name', async () => { + const storage = createMemorySkillStorage([ + { + id: '1', + name: 'existing', + description: '', + code: '', + inputSchema: {}, + outputSchema: {}, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + }, + ]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ name: 'existing' }), + mockContext() as any, + )) as { error?: string } + expect(result.error).toContain('already exists') + }) + + it('persists a valid skill with defaults', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + const result = (await tool.execute!( + validRegisterInput({ name: 'valid_skill' }), + mockContext() as any, + )) as { success?: boolean; skillId?: string } + expect(result.success).toBe(true) + expect(result.skillId).toMatch(/^[0-9a-f-]{36}$/) + + const saved = await storage.get('valid_skill') + expect(saved).not.toBeNull() + expect(saved!.stats).toEqual({ executions: 0, successRate: 0 }) + }) + + it('applies the trust strategy to set initial trust level', async () => { + const storage = createMemorySkillStorage([]) + const tools = createSkillManagementTools({ + storage, + trustStrategy: createAlwaysTrustedStrategy(), + }) + const tool = getTool(tools, 'register_skill') + await tool.execute!( + validRegisterInput({ name: 's1' }), + mockContext() as any, + ) + const saved = await storage.get('s1') + expect(saved!.trustLevel).toBe('trusted') + }) + + it('prefers explicit trustStrategy over storage.trustStrategy', async () => { + const storage = createMemorySkillStorage({ + trustStrategy: createAlwaysTrustedStrategy(), + }) + const tools = createSkillManagementTools({ + storage, + trustStrategy: createDefaultTrustStrategy(), + }) + const tool = getTool(tools, 'register_skill') + await tool.execute!( + validRegisterInput({ name: 's1' }), + mockContext() as any, + ) + const saved = await storage.get('s1') + expect(saved!.trustLevel).toBe('untrusted') + }) + + it('falls back to storage.trustStrategy when none provided', async () => { + const storage = createMemorySkillStorage({ + trustStrategy: createAlwaysTrustedStrategy(), + }) + const tools = createSkillManagementTools({ storage }) + const tool = getTool(tools, 'register_skill') + await tool.execute!( + validRegisterInput({ name: 's1' }), + mockContext() as any, + ) + const saved = await storage.get('s1') + expect(saved!.trustLevel).toBe('trusted') + }) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/create-skills-system-prompt.test.ts b/packages/typescript/ai-code-mode-skills/tests/create-skills-system-prompt.test.ts new file mode 100644 index 000000000..b9fd1cbbe --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/create-skills-system-prompt.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from 'vitest' +import { createSkillsSystemPrompt } from '../src/create-skills-system-prompt' +import type { Skill } from '../src/types' + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: 'id', + name: 'fetch_data', + description: 'Fetches data', + code: '', + inputSchema: { + type: 'object', + properties: { query: { type: 'string' } }, + required: ['query'], + }, + outputSchema: { type: 'object', properties: {} }, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + ...overrides, + } +} + +describe('createSkillsSystemPrompt', () => { + it('returns the empty-library prompt when totalSkillCount is 0', () => { + const prompt = createSkillsSystemPrompt({ + selectedSkills: [], + totalSkillCount: 0, + }) + expect(prompt).toContain('library is currently empty') + expect(prompt).toContain('register_skill') + }) + + it('returns the no-selected-skills prompt when skills exist but none selected', () => { + const prompt = createSkillsSystemPrompt({ + selectedSkills: [], + totalSkillCount: 12, + }) + expect(prompt).toContain('persistent skill library with 12 skills') + expect(prompt).toContain('No skills were pre-loaded') + }) + + it('uses singular wording for a single skill in library', () => { + const prompt = createSkillsSystemPrompt({ + selectedSkills: [], + totalSkillCount: 1, + }) + expect(prompt).toContain('library with 1 skill.') + expect(prompt).not.toContain('with 1 skills') + }) + + it('documents selected skills as direct tools when skillsAsTools=true', () => { + const skill = makeSkill({ + name: 'fetch_github', + description: 'Fetches GitHub data', + }) + const prompt = createSkillsSystemPrompt({ + selectedSkills: [skill], + totalSkillCount: 1, + skillsAsTools: true, + }) + expect(prompt).toContain('### fetch_github') + expect(prompt).toContain('[SKILL]') + expect(prompt).toContain('Fetches GitHub data') + expect(prompt).not.toContain('skill_fetch_github(') + }) + + it('documents selected skills as sandbox bindings when skillsAsTools=false', () => { + const skill = makeSkill({ name: 'fetch_github' }) + const prompt = createSkillsSystemPrompt({ + selectedSkills: [skill], + totalSkillCount: 1, + skillsAsTools: false, + }) + expect(prompt).toContain('skill_fetch_github') + expect(prompt).toContain('### Type Definitions') + expect(prompt).toContain('declare function skill_fetch_github') + }) + + it('renders a trust badge reflecting the skill trust level', () => { + const trusted = makeSkill({ name: 'a', trustLevel: 'trusted' }) + const provisional = makeSkill({ name: 'b', trustLevel: 'provisional' }) + const untrusted = makeSkill({ name: 'c', trustLevel: 'untrusted' }) + + const prompt = createSkillsSystemPrompt({ + selectedSkills: [trusted, provisional, untrusted], + totalSkillCount: 3, + skillsAsTools: true, + }) + + expect(prompt).toContain('✓ trusted') + expect(prompt).toContain('◐ provisional') + expect(prompt).toContain('○ untrusted') + }) + + it('defaults to skillsAsTools=true when not specified', () => { + const skill = makeSkill({ name: 'default_mode' }) + const prompt = createSkillsSystemPrompt({ + selectedSkills: [skill], + totalSkillCount: 1, + }) + expect(prompt).toContain('### default_mode') + expect(prompt).not.toContain('### Type Definitions') + }) + + it('embeds usageHints as bullet points', () => { + const skill = makeSkill({ + usageHints: ['When comparing X', 'When reducing Y'], + }) + const prompt = createSkillsSystemPrompt({ + selectedSkills: [skill], + totalSkillCount: 1, + }) + expect(prompt).toContain('- When comparing X') + expect(prompt).toContain('- When reducing Y') + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/file-storage.test.ts b/packages/typescript/ai-code-mode-skills/tests/file-storage.test.ts new file mode 100644 index 000000000..0f7d17632 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/file-storage.test.ts @@ -0,0 +1,147 @@ +import { mkdtemp, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { createFileSkillStorage } from '../src/storage/file-storage' +import { createAlwaysTrustedStrategy } from '../src/trust-strategies' +import type { SkillStorage } from '../src/types' + +function makeSkillInput(overrides: Partial> = {}) { + return { + id: 'id-1', + name: 'fetch_data', + description: 'Fetches data', + code: 'return input;', + inputSchema: { type: 'object', properties: {} }, + outputSchema: { type: 'object', properties: {} }, + usageHints: ['Use for fetching'], + dependsOn: [], + trustLevel: 'untrusted' as const, + stats: { executions: 0, successRate: 0 }, + ...overrides, + } as Parameters[0] +} + +describe('createFileSkillStorage', () => { + let dir: string + let storage: SkillStorage + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'skills-test-')) + storage = createFileSkillStorage(dir) + }) + + afterEach(async () => { + await rm(dir, { recursive: true, force: true }) + }) + + it('returns empty index for a fresh directory', async () => { + expect(await storage.loadIndex()).toEqual([]) + }) + + it('creates the directory if it does not exist yet', async () => { + const nested = join(dir, 'nested', 'deep') + const deepStorage = createFileSkillStorage(nested) + await expect(deepStorage.loadIndex()).resolves.toEqual([]) + }) + + it('saves a skill and round-trips it via get', async () => { + const saved = await storage.save(makeSkillInput({ name: 'alpha' })) + expect(saved.createdAt).toBeTruthy() + expect(saved.updatedAt).toBeTruthy() + + const fetched = await storage.get('alpha') + expect(fetched).not.toBeNull() + expect(fetched!.code).toBe('return input;') + }) + + it('persists skills across independent storage instances pointing at the same dir', async () => { + await storage.save(makeSkillInput({ name: 'persistent' })) + + const second = createFileSkillStorage(dir) + const reloaded = await second.get('persistent') + expect(reloaded).not.toBeNull() + expect(reloaded!.name).toBe('persistent') + }) + + it('separates code from metadata on disk', async () => { + await storage.save(makeSkillInput({ name: 'x', code: 'return 42;' })) + const { readFile } = await import('node:fs/promises') + const meta = JSON.parse( + await readFile(join(dir, 'x', 'meta.json'), 'utf-8'), + ) + const code = await readFile(join(dir, 'x', 'code.ts'), 'utf-8') + expect(meta).not.toHaveProperty('code') + expect(code).toBe('return 42;') + }) + + it('preserves createdAt when updating an existing skill', async () => { + // Deterministic clock: real timer sleeps are flaky because + // Date.prototype.toISOString() has millisecond resolution and on fast + // machines two saves can land in the same millisecond. + vi.useFakeTimers() + try { + vi.setSystemTime(new Date('2026-01-01T00:00:00.000Z')) + const first = await storage.save(makeSkillInput({ name: 'x' })) + vi.setSystemTime(new Date('2026-01-01T00:00:01.000Z')) + const second = await storage.save( + makeSkillInput({ name: 'x', description: 'updated' }), + ) + expect(second.createdAt).toBe(first.createdAt) + expect(second.updatedAt).not.toBe(first.updatedAt) + expect(second.description).toBe('updated') + } finally { + vi.useRealTimers() + } + }) + + it('deletes a skill including its directory and index entry', async () => { + await storage.save(makeSkillInput({ name: 'doomed' })) + expect(await storage.delete('doomed')).toBe(true) + expect(await storage.get('doomed')).toBeNull() + expect(await storage.loadIndex()).toEqual([]) + }) + + it('returns false when deleting a missing skill', async () => { + expect(await storage.delete('missing')).toBe(false) + }) + + it('searches via matching and respects limit', async () => { + await storage.save(makeSkillInput({ id: '1', name: 'github_stats' })) + await storage.save(makeSkillInput({ id: '2', name: 'npm_search' })) + await storage.save(makeSkillInput({ id: '3', name: 'other_github_tool' })) + + const results = await storage.search('github', { limit: 2 }) + expect(results).toHaveLength(2) + for (const r of results) { + expect(r.name).toContain('github') + } + }) + + it('updateStats increments and applies the trust strategy', async () => { + const alwaysTrusted = createFileSkillStorage({ + directory: dir, + trustStrategy: createAlwaysTrustedStrategy(), + }) + await alwaysTrusted.save( + makeSkillInput({ name: 'x', trustLevel: 'untrusted' }), + ) + await alwaysTrusted.updateStats('x', true) + const after = await alwaysTrusted.get('x') + expect(after!.stats.executions).toBe(1) + expect(after!.trustLevel).toBe('trusted') + }) + + it('updateStats is a no-op when skill does not exist', async () => { + await expect(storage.updateStats('missing', true)).resolves.toBeUndefined() + }) + + it('exposes the configured trust strategy', () => { + const strategy = createAlwaysTrustedStrategy() + const s = createFileSkillStorage({ + directory: dir, + trustStrategy: strategy, + }) + expect(s.trustStrategy).toBe(strategy) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/generate-skill-types.test.ts b/packages/typescript/ai-code-mode-skills/tests/generate-skill-types.test.ts new file mode 100644 index 000000000..101f6921b --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/generate-skill-types.test.ts @@ -0,0 +1,181 @@ +import { describe, expect, it } from 'vitest' +import { generateSkillTypes } from '../src/generate-skill-types' +import type { Skill } from '../src/types' + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: 'id', + name: 'skill_name', + description: 'A skill', + code: '', + inputSchema: { type: 'object', properties: {} }, + outputSchema: { type: 'object', properties: {} }, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + ...overrides, + } +} + +describe('generateSkillTypes', () => { + it('returns empty string for empty skills array', () => { + expect(generateSkillTypes([])).toBe('') + }) + + it('generates declare function with snake_case name preserved', () => { + const skill = makeSkill({ + name: 'fetch_stats', + inputSchema: { type: 'string' }, + outputSchema: { type: 'number' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('declare function skill_fetch_stats') + expect(result).toContain('Promise') + }) + + it('inlines primitive input/output types', () => { + const skill = makeSkill({ + inputSchema: { type: 'string' }, + outputSchema: { type: 'boolean' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('input: string') + expect(result).toContain('Promise') + }) + + it('creates interface for object input with properties', () => { + const skill = makeSkill({ + name: 'fetch_data', + inputSchema: { + type: 'object', + properties: { + owner: { type: 'string' }, + repo: { type: 'string' }, + }, + required: ['owner', 'repo'], + }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('interface SkillFetchDataInput') + expect(result).toContain('owner: string') + expect(result).toContain('repo: string') + expect(result).toContain('input: SkillFetchDataInput') + }) + + it('marks non-required properties as optional', () => { + const skill = makeSkill({ + inputSchema: { + type: 'object', + properties: { + required_field: { type: 'string' }, + optional_field: { type: 'number' }, + }, + required: ['required_field'], + }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('required_field: string') + expect(result).toContain('optional_field?: number') + }) + + it('quotes property names that are not valid identifiers', () => { + const skill = makeSkill({ + inputSchema: { + type: 'object', + properties: { + 'with-dash': { type: 'string' }, + '123numeric': { type: 'string' }, + }, + required: [], + }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('"with-dash"') + expect(result).toContain('"123numeric"') + }) + + it('converts array schemas to Array', () => { + const skill = makeSkill({ + inputSchema: { type: 'array', items: { type: 'string' } }, + outputSchema: { type: 'array' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('input: Array') + expect(result).toContain('Promise>') + }) + + it('converts enum schemas to a union of string literals', () => { + const skill = makeSkill({ + inputSchema: { enum: ['red', 'green', 'blue'] }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('"red" | "green" | "blue"') + }) + + it('converts anyOf / oneOf to a union type', () => { + const skill = makeSkill({ + inputSchema: { + anyOf: [{ type: 'string' }, { type: 'number' }], + }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('string | number') + }) + + it('handles type arrays like ["string", "null"]', () => { + const skill = makeSkill({ + inputSchema: { type: ['string', 'null'] }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('string | null') + }) + + it('embeds usageHints as @hint JSDoc tags', () => { + const skill = makeSkill({ + usageHints: ['Use when searching', 'Also good for filtering'], + inputSchema: { type: 'string' }, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('@hint Use when searching') + expect(result).toContain('@hint Also good for filtering') + }) + + it('falls back to unknown for schemas it cannot represent', () => { + const skill = makeSkill({ + inputSchema: { mystery: true } as Record, + outputSchema: { type: 'string' }, + }) + const result = generateSkillTypes([skill]) + expect(result).toContain('input: unknown') + }) + + it('handles multiple skills in order', () => { + const skills = [ + makeSkill({ + name: 'first', + inputSchema: { type: 'string' }, + outputSchema: { type: 'string' }, + }), + makeSkill({ + name: 'second', + inputSchema: { type: 'number' }, + outputSchema: { type: 'number' }, + }), + ] + const result = generateSkillTypes(skills) + const firstIdx = result.indexOf('skill_first') + const secondIdx = result.indexOf('skill_second') + expect(firstIdx).toBeGreaterThan(-1) + expect(secondIdx).toBeGreaterThan(firstIdx) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/memory-storage.test.ts b/packages/typescript/ai-code-mode-skills/tests/memory-storage.test.ts new file mode 100644 index 000000000..da227a134 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/memory-storage.test.ts @@ -0,0 +1,250 @@ +import { describe, expect, it } from 'vitest' +import { createMemorySkillStorage } from '../src/storage/memory-storage' +import { + createAlwaysTrustedStrategy, + createRelaxedTrustStrategy, +} from '../src/trust-strategies' +import type { Skill } from '../src/types' + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: overrides.id ?? 'skill-1', + name: overrides.name ?? 'fetch_data', + description: overrides.description ?? 'Fetches data from an API', + code: overrides.code ?? 'return { ok: true };', + inputSchema: overrides.inputSchema ?? { type: 'object', properties: {} }, + outputSchema: overrides.outputSchema ?? { type: 'object', properties: {} }, + usageHints: overrides.usageHints ?? ['Use when fetching'], + dependsOn: overrides.dependsOn ?? [], + trustLevel: overrides.trustLevel ?? 'untrusted', + stats: overrides.stats ?? { executions: 0, successRate: 0 }, + createdAt: overrides.createdAt ?? '2026-01-01T00:00:00.000Z', + updatedAt: overrides.updatedAt ?? '2026-01-01T00:00:00.000Z', + } +} + +describe('createMemorySkillStorage', () => { + describe('initialization', () => { + it('accepts an empty array', async () => { + const storage = createMemorySkillStorage([]) + expect(await storage.loadAll()).toEqual([]) + }) + + it('accepts an array of initial skills', async () => { + const skill = makeSkill() + const storage = createMemorySkillStorage([skill]) + expect(await storage.loadAll()).toHaveLength(1) + }) + + it('accepts an options object with initialSkills', async () => { + const skill = makeSkill() + const storage = createMemorySkillStorage({ initialSkills: [skill] }) + expect(await storage.loadAll()).toHaveLength(1) + }) + + it('exposes configured trust strategy', () => { + const strategy = createAlwaysTrustedStrategy() + const storage = createMemorySkillStorage({ trustStrategy: strategy }) + expect(storage.trustStrategy).toBe(strategy) + }) + }) + + describe('loadIndex', () => { + it('returns lightweight entries without code', async () => { + const skill = makeSkill({ code: 'return secret_value;' }) + const storage = createMemorySkillStorage([skill]) + const index = await storage.loadIndex() + expect(index).toHaveLength(1) + expect(index[0]).not.toHaveProperty('code') + expect(index[0]).toHaveProperty('name', skill.name) + expect(index[0]).toHaveProperty('trustLevel', skill.trustLevel) + }) + }) + + describe('get', () => { + it('returns null for a missing skill', async () => { + const storage = createMemorySkillStorage([]) + expect(await storage.get('nonexistent')).toBeNull() + }) + + it('returns the skill when it exists', async () => { + const skill = makeSkill({ name: 'alpha' }) + const storage = createMemorySkillStorage([skill]) + expect(await storage.get('alpha')).toEqual(skill) + }) + }) + + describe('save', () => { + it('creates a new skill with timestamps', async () => { + const storage = createMemorySkillStorage([]) + const saved = await storage.save({ + id: 'x', + name: 'new_skill', + description: 'd', + code: 'c', + inputSchema: {}, + outputSchema: {}, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + }) + expect(saved.createdAt).toBeTruthy() + expect(saved.updatedAt).toBeTruthy() + }) + + it('preserves createdAt when updating an existing skill', async () => { + const existing = makeSkill({ + name: 'x', + createdAt: '2020-01-01T00:00:00.000Z', + }) + const storage = createMemorySkillStorage([existing]) + + const updated = await storage.save({ + id: existing.id, + name: existing.name, + description: 'new description', + code: existing.code, + inputSchema: existing.inputSchema, + outputSchema: existing.outputSchema, + usageHints: existing.usageHints, + dependsOn: existing.dependsOn, + trustLevel: existing.trustLevel, + stats: existing.stats, + }) + + expect(updated.createdAt).toBe('2020-01-01T00:00:00.000Z') + expect(updated.updatedAt).not.toBe('2020-01-01T00:00:00.000Z') + expect(updated.description).toBe('new description') + }) + }) + + describe('delete', () => { + it('returns false when skill does not exist', async () => { + const storage = createMemorySkillStorage([]) + expect(await storage.delete('nothing')).toBe(false) + }) + + it('returns true and removes the skill when it exists', async () => { + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + expect(await storage.delete('x')).toBe(true) + expect(await storage.get('x')).toBeNull() + }) + }) + + describe('search', () => { + it('returns empty array when no skills match', async () => { + const storage = createMemorySkillStorage([makeSkill()]) + const results = await storage.search('completely unrelated query') + expect(results).toEqual([]) + }) + + it('matches on name, description, and usageHints', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ + name: 'github_stats', + description: 'Fetches GitHub repository statistics', + usageHints: ['Use for repo analysis'], + }), + makeSkill({ + id: 'skill-2', + name: 'npm_search', + description: 'Search the npm registry', + usageHints: ['Use for packages'], + }), + ]) + + const results = await storage.search('github') + expect(results).toHaveLength(1) + expect(results[0]!.name).toBe('github_stats') + }) + + it('boosts exact name matches over description-only matches', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ + id: 'a', + name: 'widget', + description: 'Just a description', + }), + makeSkill({ + id: 'b', + name: 'processor', + description: 'Processes widget data', + }), + ]) + + const results = await storage.search('widget') + expect(results[0]!.name).toBe('widget') + }) + + it('respects the limit option', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ id: '1', name: 'data_one' }), + makeSkill({ id: '2', name: 'data_two' }), + makeSkill({ id: '3', name: 'data_three' }), + ]) + const results = await storage.search('data', { limit: 2 }) + expect(results).toHaveLength(2) + }) + }) + + describe('updateStats', () => { + it('is a no-op when the skill does not exist', async () => { + const storage = createMemorySkillStorage([]) + await expect( + storage.updateStats('nothing', true), + ).resolves.toBeUndefined() + }) + + it('increments execution count and recalculates success rate', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ name: 'x', stats: { executions: 0, successRate: 0 } }), + ]) + await storage.updateStats('x', true) + const after = await storage.get('x') + expect(after!.stats.executions).toBe(1) + expect(after!.stats.successRate).toBe(1) + }) + + it('computes a running success rate across failures and successes', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ name: 'x', stats: { executions: 0, successRate: 0 } }), + ]) + await storage.updateStats('x', true) + await storage.updateStats('x', false) + const after = await storage.get('x') + expect(after!.stats.executions).toBe(2) + expect(after!.stats.successRate).toBe(0.5) + }) + + it('promotes trust level when stats cross the strategy threshold', async () => { + const storage = createMemorySkillStorage({ + initialSkills: [ + makeSkill({ + name: 'x', + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + }), + ], + trustStrategy: createRelaxedTrustStrategy(), + }) + await storage.updateStats('x', true) + await storage.updateStats('x', true) + await storage.updateStats('x', true) + const after = await storage.get('x') + expect(after!.trustLevel).toBe('provisional') + }) + + it('updates the updatedAt timestamp', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ + name: 'x', + updatedAt: '2020-01-01T00:00:00.000Z', + }), + ]) + await storage.updateStats('x', true) + const after = await storage.get('x') + expect(after!.updatedAt).not.toBe('2020-01-01T00:00:00.000Z') + }) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/select-relevant-skills.test.ts b/packages/typescript/ai-code-mode-skills/tests/select-relevant-skills.test.ts new file mode 100644 index 000000000..a8bb3492c --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/select-relevant-skills.test.ts @@ -0,0 +1,181 @@ +import { describe, expect, it, vi } from 'vitest' +import { selectRelevantSkills } from '../src/select-relevant-skills' +import { createMemorySkillStorage } from '../src/storage/memory-storage' +import type { AnyTextAdapter, ModelMessage } from '@tanstack/ai' +import type { Skill } from '../src/types' + +const chatMock = vi.hoisted(() => vi.fn()) +vi.mock('@tanstack/ai', async (importOriginal) => { + const actual = (await importOriginal()) as Record + return { ...actual, chat: chatMock } +}) + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: 'id', + name: 'fetch_data', + description: 'Fetches data', + code: 'return 1;', + inputSchema: {}, + outputSchema: {}, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + ...overrides, + } +} + +function streamChunks(text: string) { + return (async function* () { + yield { type: 'TEXT_MESSAGE_CONTENT' as const, delta: text } + })() +} + +const dummyAdapter = {} as AnyTextAdapter +const userMessage: ModelMessage = { + role: 'user', + content: 'please use github tool', +} + +describe('selectRelevantSkills', () => { + it('returns empty array when the skill index is empty', async () => { + const storage = createMemorySkillStorage([]) + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: [], + maxSkills: 5, + storage, + }) + expect(result).toEqual([]) + expect(chatMock).not.toHaveBeenCalled() + }) + + it('returns empty array when there are no messages', async () => { + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toEqual([]) + expect(chatMock).not.toHaveBeenCalled() + }) + + it('returns skills whose names were selected by the model', async () => { + const skill = makeSkill({ name: 'github_stats' }) + const storage = createMemorySkillStorage([skill]) + chatMock.mockReturnValueOnce(streamChunks('["github_stats"]')) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('github_stats') + }) + + it('strips markdown code fences around the JSON response', async () => { + const skill = makeSkill({ name: 'github_stats' }) + const storage = createMemorySkillStorage([skill]) + chatMock.mockReturnValueOnce(streamChunks('```json\n["github_stats"]\n```')) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toHaveLength(1) + }) + + it('returns an empty array when the model response is not an array', async () => { + const skill = makeSkill({ name: 'github_stats' }) + const storage = createMemorySkillStorage([skill]) + chatMock.mockReturnValueOnce(streamChunks('{"not": "an array"}')) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toEqual([]) + }) + + it('returns empty array when JSON parsing fails (safe fallback)', async () => { + const skill = makeSkill({ name: 'github_stats' }) + const storage = createMemorySkillStorage([skill]) + chatMock.mockReturnValueOnce(streamChunks('not json at all')) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toEqual([]) + }) + + it('truncates model selections to maxSkills', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ id: '1', name: 'a' }), + makeSkill({ id: '2', name: 'b' }), + makeSkill({ id: '3', name: 'c' }), + ]) + chatMock.mockReturnValueOnce(streamChunks('["a","b","c"]')) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 2, + storage, + }) + expect(result).toHaveLength(2) + }) + + it('filters out skill names that no longer resolve in storage', async () => { + const skill = makeSkill({ name: 'still_exists' }) + const storage = createMemorySkillStorage([skill]) + chatMock.mockReturnValueOnce( + streamChunks('["still_exists","deleted_skill"]'), + ) + + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toHaveLength(1) + expect(result[0]!.name).toBe('still_exists') + }) + + it('returns empty array when the chat stream throws', async () => { + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + chatMock.mockImplementationOnce(() => { + throw new Error('network down') + }) + const result = await selectRelevantSkills({ + adapter: dummyAdapter, + messages: [userMessage], + skillIndex: await storage.loadIndex(), + maxSkills: 5, + storage, + }) + expect(result).toEqual([]) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/skills-to-bindings.test.ts b/packages/typescript/ai-code-mode-skills/tests/skills-to-bindings.test.ts new file mode 100644 index 000000000..6d832aab6 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/skills-to-bindings.test.ts @@ -0,0 +1,201 @@ +import { describe, expect, it, vi } from 'vitest' +import { + skillsToBindings, + skillsToSimpleBindings, +} from '../src/skills-to-bindings' +import { createMemorySkillStorage } from '../src/storage/memory-storage' +import type { Skill } from '../src/types' + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: 'id', + name: 'sample', + description: 'Sample skill', + code: 'return input.value * 2;', + inputSchema: { type: 'object', properties: {} }, + outputSchema: { type: 'object', properties: {} }, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + ...overrides, + } +} + +describe('skillsToBindings', () => { + it('prefixes binding names with skill_', () => { + const storage = createMemorySkillStorage([]) + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'alpha' })], + executeInSandbox: async () => undefined, + storage, + }) + expect(Object.keys(bindings)).toEqual(['skill_alpha']) + }) + + it('serializes input via JSON.stringify into the wrapped code', async () => { + const storage = createMemorySkillStorage([]) + const executeInSandbox = vi.fn(async () => 'ok') + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x', code: 'return input;' })], + executeInSandbox, + storage, + }) + + await bindings.skill_x!.execute({ value: 42 }) + const call = executeInSandbox.mock.calls[0] as unknown as [string, unknown] + expect(call[0]).toContain('const input = {"value":42}') + expect(call[0]).toContain('return input;') + expect(call[1]).toEqual({ value: 42 }) + }) + + it('emits skill_call then skill_result events on success', async () => { + const storage = createMemorySkillStorage([]) + const emitCustomEvent = vi.fn() + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox: async () => 42, + storage, + context: { emitCustomEvent } as any, + }) + + await bindings.skill_x!.execute({}) + + const eventNames = emitCustomEvent.mock.calls.map(([name]) => name) + expect(eventNames).toEqual([ + 'code_mode:skill_call', + 'code_mode:skill_result', + ]) + }) + + it('emits skill_error when sandbox execution throws, and re-throws', async () => { + const storage = createMemorySkillStorage([]) + const emitCustomEvent = vi.fn() + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox: async () => { + throw new Error('boom') + }, + storage, + context: { emitCustomEvent } as any, + }) + + await expect(bindings.skill_x!.execute({})).rejects.toThrow('boom') + const eventNames = emitCustomEvent.mock.calls.map(([name]) => name) + expect(eventNames).toContain('code_mode:skill_error') + }) + + it('updates storage stats with success=true on success', async () => { + const storage = createMemorySkillStorage([ + makeSkill({ name: 'x', stats: { executions: 0, successRate: 0 } }), + ]) + const updateStats = vi.spyOn(storage, 'updateStats') + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox: async () => 1, + storage, + }) + + await bindings.skill_x!.execute({}) + expect(updateStats).toHaveBeenCalledWith('x', true) + }) + + it('updates storage stats with success=false on failure', async () => { + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + const updateStats = vi.spyOn(storage, 'updateStats') + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox: async () => { + throw new Error('fail') + }, + storage, + }) + + await expect(bindings.skill_x!.execute({})).rejects.toThrow() + expect(updateStats).toHaveBeenCalledWith('x', false) + }) + + it('does not reject if storage.updateStats fails', async () => { + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + storage.updateStats = async () => { + throw new Error('stats broke') + } + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox: async () => 'ok', + storage, + }) + + await expect(bindings.skill_x!.execute({})).resolves.toBe('ok') + }) + + it('serializes string inputs as JSON strings (prevents code injection via input)', async () => { + const storage = createMemorySkillStorage([]) + const executeInSandbox = vi.fn(async () => null) + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x', code: 'return input;' })], + executeInSandbox, + storage, + }) + + // Adversarial payload: attempts to escape the wrapping const-declaration + const malicious = `"); throw new Error("escaped"); ("` + await bindings.skill_x!.execute(malicious) + + const wrappedCode = ( + executeInSandbox.mock.calls[0] as unknown as [string, unknown] + )[0] + // JSON.stringify quotes & escapes the whole thing — it becomes a string literal + expect(wrappedCode).toContain(`const input = ${JSON.stringify(malicious)}`) + // Ensure the raw payload is not present unquoted + expect(wrappedCode).not.toContain( + `const input = "); throw new Error("escaped"); ("`, + ) + }) + + it('forwards the configured input through to executeInSandbox unchanged', async () => { + const storage = createMemorySkillStorage([]) + const executeInSandbox = vi.fn(async () => 'ok') + const bindings = skillsToBindings({ + skills: [makeSkill({ name: 'x' })], + executeInSandbox, + storage, + }) + + const input = { complex: { nested: [1, 2] } } + await bindings.skill_x!.execute(input) + expect( + (executeInSandbox.mock.calls[0] as unknown as [string, unknown])[1], + ).toBe(input) + }) +}) + +describe('skillsToSimpleBindings', () => { + it('prefixes names with skill_', () => { + const bindings = skillsToSimpleBindings([makeSkill({ name: 'alpha' })]) + expect(Object.keys(bindings)).toEqual(['skill_alpha']) + }) + + it('exposes metadata without executing anything', () => { + const skill = makeSkill({ + name: 'meta', + description: 'desc', + inputSchema: { type: 'string' }, + outputSchema: { type: 'number' }, + }) + const bindings = skillsToSimpleBindings([skill]) + expect(bindings.skill_meta!.name).toBe('skill_meta') + expect(bindings.skill_meta!.description).toBe('desc') + expect(bindings.skill_meta!.inputSchema).toEqual({ type: 'string' }) + expect(bindings.skill_meta!.outputSchema).toEqual({ type: 'number' }) + }) + + it('execute() throws because execution is not available in this mode', async () => { + const bindings = skillsToSimpleBindings([makeSkill({ name: 'x' })]) + await expect(bindings.skill_x!.execute({})).rejects.toThrow( + /not available for execution/, + ) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/skills-to-tools.test.ts b/packages/typescript/ai-code-mode-skills/tests/skills-to-tools.test.ts new file mode 100644 index 000000000..c5444a190 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/skills-to-tools.test.ts @@ -0,0 +1,239 @@ +import { describe, expect, it, vi } from 'vitest' +import { z } from 'zod' +import { toolDefinition } from '@tanstack/ai' +import { skillToTool, skillsToTools } from '../src/skills-to-tools' +import { createMemorySkillStorage } from '../src/storage/memory-storage' +import type { IsolateContext, IsolateDriver } from '@tanstack/ai-code-mode' +import type { Skill } from '../src/types' + +function makeSkill(overrides: Partial = {}): Skill { + return { + id: 'id', + name: 'do_thing', + description: 'Does a thing', + code: 'return input.value * 2;', + inputSchema: { + type: 'object', + properties: { value: { type: 'number' } }, + required: ['value'], + }, + outputSchema: { type: 'number' }, + usageHints: [], + dependsOn: [], + trustLevel: 'untrusted', + stats: { executions: 0, successRate: 0 }, + createdAt: '', + updatedAt: '', + ...overrides, + } +} + +function createMockDriver( + result: { success: boolean; value?: unknown; error?: { message: string } } = { + success: true, + value: 42, + }, +): { + driver: IsolateDriver + executeSpy: ReturnType + disposeSpy: ReturnType +} { + const executeSpy = vi.fn().mockResolvedValue({ + ...result, + logs: [], + }) + const disposeSpy = vi.fn().mockResolvedValue(undefined) + const context: IsolateContext = { + execute: executeSpy, + dispose: disposeSpy, + } + const driver: IsolateDriver = { + createContext: vi.fn().mockResolvedValue(context), + } + return { driver, executeSpy, disposeSpy } +} + +const mockContext = () => ({ emitCustomEvent: vi.fn() }) + +describe('skillToTool', () => { + it('prefixes the tool description with [SKILL]', () => { + const { driver } = createMockDriver() + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill({ description: 'Fetches data' }), + driver, + bindings: {}, + storage, + }) + expect(tool.description).toContain('[SKILL]') + expect(tool.description).toContain('Fetches data') + }) + + it('exposes the skill name as the tool name', () => { + const { driver } = createMockDriver() + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill({ name: 'custom_name' }), + driver, + bindings: {}, + storage, + }) + expect(tool.name).toBe('custom_name') + }) + + it('creates an isolate context, executes, returns the value, and disposes', async () => { + const { driver, executeSpy, disposeSpy } = createMockDriver({ + success: true, + value: 84, + }) + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill(), + driver, + bindings: {}, + storage, + }) + + const result = await tool.execute!({ value: 42 }, mockContext() as any) + expect(result).toBe(84) + expect(executeSpy).toHaveBeenCalledOnce() + expect(disposeSpy).toHaveBeenCalledOnce() + }) + + it('disposes the isolate context even if execution throws', async () => { + const { driver, disposeSpy } = createMockDriver({ + success: false, + error: { message: 'sandbox error' }, + }) + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill(), + driver, + bindings: {}, + storage, + }) + + await expect( + tool.execute!({ value: 1 }, mockContext() as any), + ).rejects.toThrow('sandbox error') + expect(disposeSpy).toHaveBeenCalledOnce() + }) + + it('emits skill_call then skill_result events on success', async () => { + const { driver } = createMockDriver({ success: true, value: 'ok' }) + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill({ name: 'x' }), + driver, + bindings: {}, + storage, + }) + const ctx = mockContext() + await tool.execute!({ value: 1 }, ctx as any) + const eventNames = (ctx.emitCustomEvent as any).mock.calls.map( + ([name]: [string]) => name, + ) + expect(eventNames).toEqual([ + 'code_mode:skill_call', + 'code_mode:skill_result', + ]) + }) + + it('emits skill_error when execution fails', async () => { + const { driver } = createMockDriver({ + success: false, + error: { message: 'boom' }, + }) + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill({ name: 'x' }), + driver, + bindings: {}, + storage, + }) + const ctx = mockContext() + await expect(tool.execute!({ value: 1 }, ctx as any)).rejects.toThrow( + 'boom', + ) + const eventNames = (ctx.emitCustomEvent as any).mock.calls.map( + ([name]: [string]) => name, + ) + expect(eventNames).toContain('code_mode:skill_error') + }) + + it('records stats (success=true) on success', async () => { + const { driver } = createMockDriver() + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + const spy = vi.spyOn(storage, 'updateStats') + const tool = skillToTool({ + skill: makeSkill({ name: 'x' }), + driver, + bindings: {}, + storage, + }) + await tool.execute!({ value: 1 }, mockContext() as any) + expect(spy).toHaveBeenCalledWith('x', true) + }) + + it('records stats (success=false) on failure', async () => { + const { driver } = createMockDriver({ + success: false, + error: { message: 'no' }, + }) + const storage = createMemorySkillStorage([makeSkill({ name: 'x' })]) + const spy = vi.spyOn(storage, 'updateStats') + const tool = skillToTool({ + skill: makeSkill({ name: 'x' }), + driver, + bindings: {}, + storage, + }) + await expect( + tool.execute!({ value: 1 }, mockContext() as any), + ).rejects.toThrow() + expect(spy).toHaveBeenCalledWith('x', false) + }) + + it('serializes input as a JSON literal in the sandbox code, preventing injection', async () => { + const { driver, executeSpy } = createMockDriver() + const storage = createMemorySkillStorage([]) + const tool = skillToTool({ + skill: makeSkill(), + driver, + bindings: {}, + storage, + }) + + // Zod requires a number; test injection via a nested field instead + await tool.execute!({ value: 1 }, mockContext() as any) + + const code = executeSpy.mock.calls[0]![0] + // esbuild reformats output, so compare as normalized JSON literal + expect(code.replace(/\s+/g, '')).toContain('constinput={"value":1}') + }) +}) + +describe('skillsToTools', () => { + it('returns one ServerTool per skill', () => { + const { driver } = createMockDriver() + const storage = createMemorySkillStorage([]) + const tools = skillsToTools({ + skills: [ + makeSkill({ id: '1', name: 'a' }), + makeSkill({ id: '2', name: 'b' }), + ], + driver, + tools: [ + toolDefinition({ + name: 'helper', + description: 'h', + inputSchema: z.object({ q: z.string() }), + outputSchema: z.object({ r: z.string() }), + }).server(async (i: any) => ({ r: i.q })), + ], + storage, + }) + expect(tools).toHaveLength(2) + expect(tools.map((t) => t.name)).toEqual(['a', 'b']) + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tests/trust-strategies.test.ts b/packages/typescript/ai-code-mode-skills/tests/trust-strategies.test.ts new file mode 100644 index 000000000..a70c67155 --- /dev/null +++ b/packages/typescript/ai-code-mode-skills/tests/trust-strategies.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, it } from 'vitest' +import { + createAlwaysTrustedStrategy, + createCustomTrustStrategy, + createDefaultTrustStrategy, + createRelaxedTrustStrategy, +} from '../src/trust-strategies' +import type { SkillStats, TrustLevel } from '../src/types' + +describe('createDefaultTrustStrategy', () => { + it('starts new skills as untrusted', () => { + const strategy = createDefaultTrustStrategy() + expect(strategy.getInitialTrustLevel()).toBe('untrusted') + }) + + it('promotes untrusted → provisional at 10 executions with 90% success', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 10, successRate: 0.9 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('provisional') + }) + + it('does not promote with 9 executions (below threshold)', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 9, successRate: 1.0 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('untrusted') + }) + + it('does not promote at 89% success rate', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 50, successRate: 0.89 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('untrusted') + }) + + it('promotes provisional → trusted at 100 executions with 95% success', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 100, successRate: 0.95 } + expect(strategy.calculateTrustLevel('provisional', stats)).toBe('trusted') + }) + + it('does not promote provisional → trusted at 99 executions', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 99, successRate: 1.0 } + expect(strategy.calculateTrustLevel('provisional', stats)).toBe( + 'provisional', + ) + }) + + it('never downgrades a trusted skill', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 1000, successRate: 0.1 } + expect(strategy.calculateTrustLevel('trusted', stats)).toBe('trusted') + }) + + it('never skips provisional (untrusted cannot jump to trusted)', () => { + const strategy = createDefaultTrustStrategy() + const stats: SkillStats = { executions: 500, successRate: 1.0 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('provisional') + }) +}) + +describe('createAlwaysTrustedStrategy', () => { + it('makes new skills trusted immediately', () => { + const strategy = createAlwaysTrustedStrategy() + expect(strategy.getInitialTrustLevel()).toBe('trusted') + }) + + it('keeps skills trusted regardless of stats', () => { + const strategy = createAlwaysTrustedStrategy() + const levels: Array = ['untrusted', 'provisional', 'trusted'] + for (const level of levels) { + expect( + strategy.calculateTrustLevel(level, { executions: 0, successRate: 0 }), + ).toBe('trusted') + } + }) +}) + +describe('createRelaxedTrustStrategy', () => { + it('starts new skills as untrusted', () => { + const strategy = createRelaxedTrustStrategy() + expect(strategy.getInitialTrustLevel()).toBe('untrusted') + }) + + it('promotes untrusted → provisional at 3 executions with 80% success', () => { + const strategy = createRelaxedTrustStrategy() + const stats: SkillStats = { executions: 3, successRate: 0.8 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('provisional') + }) + + it('promotes provisional → trusted at 10 executions with 90% success', () => { + const strategy = createRelaxedTrustStrategy() + const stats: SkillStats = { executions: 10, successRate: 0.9 } + expect(strategy.calculateTrustLevel('provisional', stats)).toBe('trusted') + }) +}) + +describe('createCustomTrustStrategy', () => { + it('respects custom initial level', () => { + const strategy = createCustomTrustStrategy({ initialLevel: 'provisional' }) + expect(strategy.getInitialTrustLevel()).toBe('provisional') + }) + + it('defaults to untrusted initial level when none provided', () => { + const strategy = createCustomTrustStrategy({}) + expect(strategy.getInitialTrustLevel()).toBe('untrusted') + }) + + it('respects custom provisional threshold', () => { + const strategy = createCustomTrustStrategy({ + provisionalThreshold: { executions: 5, successRate: 0.5 }, + }) + const stats: SkillStats = { executions: 5, successRate: 0.5 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('provisional') + }) + + it('respects custom trusted threshold', () => { + const strategy = createCustomTrustStrategy({ + trustedThreshold: { executions: 20, successRate: 0.85 }, + }) + const stats: SkillStats = { executions: 20, successRate: 0.85 } + expect(strategy.calculateTrustLevel('provisional', stats)).toBe('trusted') + }) + + it('does not promote below custom thresholds', () => { + const strategy = createCustomTrustStrategy({ + provisionalThreshold: { executions: 5, successRate: 0.9 }, + }) + const stats: SkillStats = { executions: 5, successRate: 0.8 } + expect(strategy.calculateTrustLevel('untrusted', stats)).toBe('untrusted') + }) +}) diff --git a/packages/typescript/ai-code-mode-skills/tsconfig.json b/packages/typescript/ai-code-mode-skills/tsconfig.json index e5e872741..31b14bdfe 100644 --- a/packages/typescript/ai-code-mode-skills/tsconfig.json +++ b/packages/typescript/ai-code-mode-skills/tsconfig.json @@ -3,6 +3,6 @@ "compilerOptions": { "outDir": "dist" }, - "include": ["vite.config.ts", "./src"], + "include": ["vite.config.ts", "./src", "./tests"], "exclude": ["node_modules", "dist", "**/*.config.ts"] } diff --git a/packages/typescript/ai-isolate-cloudflare/README.md b/packages/typescript/ai-isolate-cloudflare/README.md index 30241dad7..aeb922a7a 100644 --- a/packages/typescript/ai-isolate-cloudflare/README.md +++ b/packages/typescript/ai-isolate-cloudflare/README.md @@ -10,13 +10,13 @@ This package runs generated JavaScript in a Worker and keeps `external_*` tool e pnpm add @tanstack/ai-isolate-cloudflare ``` -## Environment Guidance (Conservative) +## Environment Guidance - **Local development:** supported with the package's Miniflare dev server (`pnpm dev:worker`) - **Remote dev:** supported with `wrangler dev --remote` -- **Production:** evaluate carefully before rollout; dynamic code execution with `unsafe_eval` has platform/security constraints and is often treated as an advanced or enterprise setup +- **Production:** supported on Cloudflare accounts with the `unsafe_eval` binding enabled. Before rollout, put the Worker behind authentication (e.g. Cloudflare Access or the `authorization` driver option), rate limiting, and CORS restrictions — running LLM-authored code is a high-trust operation. -If you need a fully local setup without Cloudflare constraints, prefer `@tanstack/ai-isolate-node` or `@tanstack/ai-isolate-quickjs`. +If you want a self-contained host without Cloudflare infrastructure, prefer `@tanstack/ai-isolate-node` or `@tanstack/ai-isolate-quickjs`. ## Quick Start @@ -68,7 +68,15 @@ From this package directory: pnpm dev:worker ``` -This starts a local Worker endpoint (default `http://localhost:8787`) with `UNSAFE_EVAL` configured for local testing. +This starts a local Worker endpoint (default `http://localhost:8787`) with the `UNSAFE_EVAL` binding configured in `wrangler.toml`. + +### Option 3: Production deployment + +```bash +wrangler deploy +``` + +The same `wrangler.toml` `[[unsafe.bindings]]` configuration applies in production. Deploying requires that your Cloudflare account has `unsafe_eval` enabled; without it, the Worker returns an `UnsafeEvalNotAvailable` error. Because this Worker executes LLM-generated code, only deploy it behind authentication, rate limiting, and an allow-listed origin. ### Option 2: Wrangler remote dev diff --git a/packages/typescript/ai-isolate-cloudflare/src/worker/index.ts b/packages/typescript/ai-isolate-cloudflare/src/worker/index.ts index 38d883c33..2e6d4ff79 100644 --- a/packages/typescript/ai-isolate-cloudflare/src/worker/index.ts +++ b/packages/typescript/ai-isolate-cloudflare/src/worker/index.ts @@ -17,8 +17,11 @@ import { wrapCode } from './wrap-code' import type { ExecuteRequest, ExecuteResponse, ToolCallRequest } from '../types' /** - * UnsafeEval binding type - * This is only available in local development with wrangler dev + * UnsafeEval binding type. + * + * Provides dynamic-code execution against the Worker's V8 isolate. Available + * locally (via wrangler dev) and in production deployments where the + * `unsafe_eval` binding has been enabled on the Cloudflare account. */ interface UnsafeEval { eval: (code: string) => unknown @@ -26,8 +29,7 @@ interface UnsafeEval { interface Env { /** - * UnsafeEval binding - provides eval() for local development - * Configured in wrangler.toml as an unsafe binding + * UnsafeEval binding. Configured in wrangler.toml as an unsafe binding. */ UNSAFE_EVAL?: UnsafeEval } @@ -49,8 +51,10 @@ async function executeCode( name: 'UnsafeEvalNotAvailable', message: 'UNSAFE_EVAL binding is not available. ' + - 'This Worker requires the unsafe_eval binding for local development. ' + - 'For production, consider using Workers for Platforms.', + 'This Worker requires the unsafe_eval binding. ' + + 'Declare it in wrangler.toml under [[unsafe.bindings]] ' + + '(works for local development and production where the ' + + 'account has unsafe_eval enabled).', }, } } @@ -63,8 +67,7 @@ async function executeCode( const timeoutId = setTimeout(() => controller.abort(), timeout) try { - // Use UNSAFE_EVAL binding to execute the code - // This is only available in local development with wrangler dev + // Execute the wrapped code through the UNSAFE_EVAL binding. const result = (await env.UNSAFE_EVAL.eval(wrappedCode)) as { status: string success?: boolean diff --git a/packages/typescript/ai-isolate-cloudflare/src/worker/wrap-code.ts b/packages/typescript/ai-isolate-cloudflare/src/worker/wrap-code.ts index f91e72c57..72aafb6c8 100644 --- a/packages/typescript/ai-isolate-cloudflare/src/worker/wrap-code.ts +++ b/packages/typescript/ai-isolate-cloudflare/src/worker/wrap-code.ts @@ -5,6 +5,77 @@ import type { ToolResultPayload, ToolSchema } from '../types' +// Tool names are interpolated into generated JS as (1) function identifiers +// and (2) string literals. Rejecting anything outside this pattern closes +// the injection vector that would otherwise let a malicious tool name +// break out of the wrapper. +const VALID_TOOL_NAME = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/ + +// Reserved words and contextual keywords that look like identifiers but can't +// be used as JS function names. Catching them here gives callers a clear +// "Invalid tool name" error at generation time instead of a cryptic +// SyntaxError when the wrapped code is eval'd. +const RESERVED_TOOL_NAMES = new Set([ + 'break', + 'case', + 'catch', + 'class', + 'const', + 'continue', + 'debugger', + 'default', + 'delete', + 'do', + 'else', + 'enum', + 'export', + 'extends', + 'false', + 'finally', + 'for', + 'function', + 'if', + 'import', + 'in', + 'instanceof', + 'new', + 'null', + 'return', + 'super', + 'switch', + 'this', + 'throw', + 'true', + 'try', + 'typeof', + 'var', + 'void', + 'while', + 'with', + 'yield', + 'let', + 'static', + 'implements', + 'interface', + 'package', + 'private', + 'protected', + 'public', + 'await', + 'async', +]) + +function assertSafeToolName(name: string): void { + if (!VALID_TOOL_NAME.test(name)) { + throw new Error( + `Invalid tool name '${name}': must match ${VALID_TOOL_NAME} (letters, digits, _, $; cannot start with a digit)`, + ) + } + if (RESERVED_TOOL_NAMES.has(name)) { + throw new Error(`Invalid tool name '${name}': reserved JavaScript keyword`) + } +} + /** * Generate tool wrapper code that collects calls or returns cached results. * @@ -19,6 +90,7 @@ export function generateToolWrappers( const wrappers: Array = [] for (const tool of tools) { + assertSafeToolName(tool.name) if (toolResults) { wrappers.push(` async function ${tool.name}(input) { diff --git a/packages/typescript/ai-isolate-cloudflare/tests/escape-attempts.test.ts b/packages/typescript/ai-isolate-cloudflare/tests/escape-attempts.test.ts new file mode 100644 index 000000000..f68ab8109 --- /dev/null +++ b/packages/typescript/ai-isolate-cloudflare/tests/escape-attempts.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from 'vitest' +import { generateToolWrappers, wrapCode } from '../src/worker/wrap-code' +import type { ToolResultPayload, ToolSchema } from '../src/types' + +/** + * The CF Worker delegates actual sandboxing to Workers' V8 isolate via the + * UNSAFE_EVAL binding, so we can't perform a real escape attempt in Node. What + * we verify here instead is structural — the wrapper must not let user inputs + * break out of their intended quoting/scoping. + */ + +const benignTool: ToolSchema = { + name: 'search', + description: 'd', + inputSchema: {}, +} + +describe('Cloudflare wrapCode — injection resilience', () => { + it('escapes tool-result values via JSON.stringify so quotes are backslash-escaped', () => { + const payload = '"); process.exit(1); (function leak(){ return `' + const toolResults: Record = { + search_0: { success: true, value: payload }, + } + const wrapped = wrapCode('return 1', [benignTool], toolResults) + // JSON.stringify must have escaped the leading quote; verify the escaped + // form is what appears, and that no unescaped quote lets the payload end + // the string literal (an unescaped `");` would close it). + expect(wrapped).toContain(JSON.stringify(payload)) + expect(wrapped).not.toMatch(/[^\\]"\);\s*process\.exit/) + }) + + it('lands tool-result errors inside a JSON object literal, not a template literal', () => { + const error = 'with `backtick` and ${alert(1)} template-looking stuff' + const toolResults: Record = { + search_0: { success: false, error }, + } + const wrapped = wrapCode('return 1', [benignTool], toolResults) + // JSON strings are always double-quoted, and `${…}` has no meaning inside + // double-quoted JS strings. The JSON-escaped payload should appear, and + // the assignment context should be a plain `const __toolResults = {…}` + // — not inside any template literal. + expect(wrapped).toContain(JSON.stringify(error)) + const assignment = wrapped.match(/const __toolResults =\s*([^;]+);/) + expect(assignment).not.toBeNull() + // The RHS should start with `{` (object literal), not a backtick + expect(assignment![1]!.trimStart().startsWith('{')).toBe(true) + }) + + it('rejects adversarial tool names that would break the wrapper function', () => { + const malicious: Array = [ + "evil'); throw Error(); //", + 'has space', + '1startsWithDigit', + "with'quote", + 'with"quote', + 'with`backtick', + 'with\nnewline', + 'with;semi', + '', + ] + for (const name of malicious) { + const tool: ToolSchema = { name, description: '', inputSchema: {} } + expect( + () => generateToolWrappers([tool]), + `should reject: ${JSON.stringify(name)}`, + ).toThrow(/Invalid tool name/) + } + }) + + it('accepts benign identifier-shaped tool names', () => { + const valid = ['search', 'fetchData', 'my_tool_42', '$special', '_internal'] + for (const name of valid) { + const tool: ToolSchema = { name, description: '', inputSchema: {} } + expect(() => generateToolWrappers([tool])).not.toThrow() + } + }) + + it('keeps tool names as bare identifiers only (never drops user input into strings unquoted)', () => { + const wrapped = generateToolWrappers([benignTool]) + // Tool name appears once as a function identifier and once as a quoted + // string literal. It must not appear elsewhere unquoted or interpolated. + expect(wrapped).toContain('async function search(input)') + expect(wrapped).toContain("name: 'search'") + }) + + it('tool-call IDs use an incrementing counter (not content-derived)', () => { + // This matters because re-executions of the same code should produce + // stable IDs across iterations — content-derived IDs would mismatch when + // user inputs contain non-deterministic values. + const wrapped = wrapCode('return 1', [benignTool]) + expect(wrapped).toContain('__toolCallIdx') + expect(wrapped).toContain("'tc_' + (__toolCallIdx++)") + }) +}) diff --git a/packages/typescript/ai-isolate-cloudflare/tests/worker.test.ts b/packages/typescript/ai-isolate-cloudflare/tests/worker.test.ts index 33a3ddc86..27534e1b9 100644 --- a/packages/typescript/ai-isolate-cloudflare/tests/worker.test.ts +++ b/packages/typescript/ai-isolate-cloudflare/tests/worker.test.ts @@ -47,6 +47,59 @@ describe('generateToolWrappers', () => { expect(code).toContain('result.error') expect(code).toContain('return result.value') }) + + it('rejects tool names that would break out of the function identifier', () => { + const malicious: ToolSchema = { + name: "foo'); process.exit(1); (function bar() {", + description: '', + inputSchema: {}, + } + expect(() => generateToolWrappers([malicious])).toThrow(/Invalid tool name/) + }) + + it('rejects tool names containing whitespace, quotes, or backticks', () => { + const cases = [ + 'has space', + 'with`backtick', + "with'quote", + 'with"quote', + 'with;semi', + 'with\nnewline', + ] + for (const name of cases) { + expect(() => + generateToolWrappers([{ name, description: '', inputSchema: {} }]), + ).toThrow(/Invalid tool name/) + } + }) + + it('rejects tool names that start with a digit', () => { + expect(() => + generateToolWrappers([ + { name: '123tool', description: '', inputSchema: {} }, + ]), + ).toThrow(/Invalid tool name/) + }) + + it('rejects reserved JS keywords that would pass the regex but break eval', () => { + const reserved = ['return', 'class', 'function', 'if', 'await', 'import'] + for (const name of reserved) { + expect( + () => + generateToolWrappers([{ name, description: '', inputSchema: {} }]), + `should reject reserved: ${name}`, + ).toThrow(/reserved JavaScript keyword/) + } + }) + + it('accepts conventional identifiers (camelCase, snake_case, $_)', () => { + const valid = ['camelCase', 'snake_case', '_leading_underscore', '$dollar'] + for (const name of valid) { + expect(() => + generateToolWrappers([{ name, description: '', inputSchema: {} }]), + ).not.toThrow() + } + }) }) describe('wrapCode', () => { @@ -137,6 +190,9 @@ describe('Worker fetch handler', () => { expect(json.status).toBe('error') expect(json.error.name).toBe('UnsafeEvalNotAvailable') expect(json.error.message).toContain('UNSAFE_EVAL') + expect(json.error.message).toContain('wrangler.toml') + // No longer steers users to Workers for Platforms + expect(json.error.message).not.toContain('Workers for Platforms') }) it('returns 500 with RequestError when body is invalid JSON', async () => { diff --git a/packages/typescript/ai-isolate-cloudflare/wrangler.toml b/packages/typescript/ai-isolate-cloudflare/wrangler.toml index 636b7a76b..39dab3696 100644 --- a/packages/typescript/ai-isolate-cloudflare/wrangler.toml +++ b/packages/typescript/ai-isolate-cloudflare/wrangler.toml @@ -9,10 +9,11 @@ main = "src/worker/index.ts" compatibility_date = "2024-12-01" compatibility_flags = ["nodejs_compat"] -# UnsafeEval binding - provides eval() for local development -# NOTE: This only works locally with wrangler dev. -# For production deployment, you need Workers for Platforms (enterprise) -# or a different execution strategy. +# UnsafeEval binding - enables dynamic code execution inside the Worker's V8 isolate. +# Works in both local dev (wrangler dev) and production deployments where the +# Cloudflare account has the unsafe_eval binding enabled. Because this lets the +# Worker evaluate arbitrary JavaScript, protect the Worker's public endpoint +# with authentication and rate limiting before deploying. [[unsafe.bindings]] name = "UNSAFE_EVAL" type = "unsafe_eval" diff --git a/packages/typescript/ai-isolate-node/tests/escape-attempts.test.ts b/packages/typescript/ai-isolate-node/tests/escape-attempts.test.ts new file mode 100644 index 000000000..140e6b9df --- /dev/null +++ b/packages/typescript/ai-isolate-node/tests/escape-attempts.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from 'vitest' +import { createNodeIsolateDriver, probeIsolatedVm } from '../src/isolate-driver' + +const addonAvailable = probeIsolatedVm().compatible + +/** + * Run a user snippet inside a fresh Node isolate and return the result. + * Each test gets its own context so side effects cannot bleed between them. + */ +async function runInIsolate( + code: string, + opts?: { timeout?: number }, +): Promise<{ + success: boolean + value: unknown + error?: { name: string; message: string } +}> { + const driver = createNodeIsolateDriver() + const context = await driver.createContext({ + bindings: {}, + timeout: opts?.timeout, + }) + try { + const res = await context.execute(code) + return { + success: res.success, + value: res.value, + error: res.error, + } + } finally { + await context.dispose() + } +} + +describe.skipIf(!addonAvailable)( + 'Node isolate — sandbox escape attempts', + () => { + it('does not expose `process`', async () => { + const res = await runInIsolate('return typeof process') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not expose `require`', async () => { + const res = await runInIsolate('return typeof require') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not expose `fetch`', async () => { + const res = await runInIsolate('return typeof fetch') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not expose Node built-ins via dynamic import', async () => { + const res = await runInIsolate( + `try { return await import('fs') } catch (e) { return 'blocked: ' + e.message }`, + ) + // Either: isolate throws because import is unavailable, or the result + // is an error string. The sandbox must NOT return the actual fs module. + if (res.success) { + expect(String(res.value)).toMatch(/blocked/i) + } else { + expect(res.error).toBeDefined() + } + }) + + it('cannot poll Object.prototype to reach host state', async () => { + const res = await runInIsolate(` + Object.prototype.__sandboxLeak = 'leaked' + return 'done' + `) + // Inside the sandbox, the pollution is possible but scoped to the + // isolate's own Object constructor. It MUST NOT leak to the host process. + expect( + (Object.prototype as { __sandboxLeak?: unknown }).__sandboxLeak, + ).toBeUndefined() + // Cleanup just in case + delete (Object.prototype as { __sandboxLeak?: unknown }).__sandboxLeak + expect(res.success).toBe(true) + }) + + it('does not leak prototype pollution between separate contexts', async () => { + const driver = createNodeIsolateDriver() + const ctxA = await driver.createContext({ bindings: {} }) + const ctxB = await driver.createContext({ bindings: {} }) + try { + await ctxA.execute(`Object.prototype.__ctxAProbe = 'a'; return 1;`) + const res = await ctxB.execute(`return ({}).__ctxAProbe`) + expect(res.success).toBe(true) + expect(res.value).toBeUndefined() + } finally { + await ctxA.dispose() + await ctxB.dispose() + } + }) + + it('terminates a synchronous CPU-spin loop via timeout (does not hang)', async () => { + const start = Date.now() + const res = await runInIsolate('while (true) {}', { timeout: 200 }) + const elapsed = Date.now() - start + expect(res.success).toBe(false) + // Must actually stop, not hang for multiple seconds + expect(elapsed).toBeLessThan(5000) + }) + + it('rejects attempts to redefine Function.prototype.constructor to escape', async () => { + // Even if user code tries to grab Function constructors, they operate + // inside the isolate — they cannot reach the host. + const res = await runInIsolate(` + try { + const C = (function(){}).constructor + return typeof C('return process')() + } catch (e) { + return 'blocked: ' + e.message + } + `) + // The inner Function() executes inside the isolate; 'process' is + // undefined there, so either "undefined" or "blocked" is acceptable — + // what's NOT acceptable is getting a real process object. + expect(res.success).toBe(true) + expect(['undefined', 'blocked:']).toContainEqual( + typeof res.value === 'string' && res.value.startsWith('blocked:') + ? 'blocked:' + : res.value, + ) + }) + }, +) diff --git a/packages/typescript/ai-isolate-quickjs/tests/escape-attempts.test.ts b/packages/typescript/ai-isolate-quickjs/tests/escape-attempts.test.ts new file mode 100644 index 000000000..a7a4b815e --- /dev/null +++ b/packages/typescript/ai-isolate-quickjs/tests/escape-attempts.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from 'vitest' +import { createQuickJSIsolateDriver } from '../src/isolate-driver' + +async function runInIsolate( + code: string, + opts?: { timeout?: number }, +): Promise<{ + success: boolean + value: unknown + error?: { name: string; message: string } +}> { + const driver = createQuickJSIsolateDriver() + const context = await driver.createContext({ + bindings: {}, + timeout: opts?.timeout, + }) + try { + const res = await context.execute(code) + return { success: res.success, value: res.value, error: res.error } + } finally { + await context.dispose() + } +} + +describe('QuickJS isolate — sandbox escape attempts', () => { + it('does not expose `process`', async () => { + const res = await runInIsolate('return typeof process') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not expose `require`', async () => { + const res = await runInIsolate('return typeof require') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not expose `fetch`', async () => { + const res = await runInIsolate('return typeof fetch') + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + }) + + it('does not leak Object.prototype pollution to the host', async () => { + await runInIsolate(` + Object.prototype.__qjsLeak = 'leaked' + return 1 + `) + expect( + (Object.prototype as { __qjsLeak?: unknown }).__qjsLeak, + ).toBeUndefined() + }) + + it('does not leak Object.prototype pollution between separate contexts', async () => { + const driver = createQuickJSIsolateDriver() + const ctxA = await driver.createContext({ bindings: {} }) + const ctxB = await driver.createContext({ bindings: {} }) + try { + await ctxA.execute(`Object.prototype.__qjsCtxProbe = 'a'; return 1;`) + const res = await ctxB.execute(`return ({}).__qjsCtxProbe`) + expect(res.success).toBe(true) + expect(res.value).toBeUndefined() + } finally { + await ctxA.dispose() + await ctxB.dispose() + } + }) + + it('terminates a synchronous CPU-spin loop via timeout (does not hang)', async () => { + const start = Date.now() + const res = await runInIsolate('while (true) {}', { timeout: 200 }) + const elapsed = Date.now() - start + expect(res.success).toBe(false) + expect(elapsed).toBeLessThan(5000) + }) + + it('rejects Function-constructor escape attempts', async () => { + const res = await runInIsolate(` + try { + const C = (function(){}).constructor + return typeof C('return process')() + } catch (e) { + return 'blocked: ' + e.message + } + `) + expect(res.success).toBe(true) + // Either undefined (Function runs in isolate where process doesn't exist) + // or a blocked message. The sandbox must not return a real process object. + expect( + res.value === 'undefined' || + (typeof res.value === 'string' && res.value.startsWith('blocked:')), + ).toBe(true) + }) + + it('treats global mutations within a context as scoped to that context only', async () => { + const driver = createQuickJSIsolateDriver() + const ctxA = await driver.createContext({ bindings: {} }) + const ctxB = await driver.createContext({ bindings: {} }) + try { + await ctxA.execute(`globalThis.__ctxMarker = 'A'; return 1;`) + const res = await ctxB.execute(`return typeof globalThis.__ctxMarker`) + expect(res.success).toBe(true) + expect(res.value).toBe('undefined') + } finally { + await ctxA.dispose() + await ctxB.dispose() + } + }) +}) diff --git a/packages/typescript/ai-ollama/src/adapters/text.ts b/packages/typescript/ai-ollama/src/adapters/text.ts index 07da8acab..209951569 100644 --- a/packages/typescript/ai-ollama/src/adapters/text.ts +++ b/packages/typescript/ai-ollama/src/adapters/text.ts @@ -1,6 +1,7 @@ import { BaseTextAdapter } from '@tanstack/ai/adapters' import { createOllamaClient, generateId, getOllamaHostFromEnv } from '../utils' +import { convertToolsToProviderFormat } from '../tools/tool-converter' import type { OllamaClientConfig } from '../utils/client' import type { @@ -486,25 +487,7 @@ export class OllamaTextAdapter extends BaseTextAdapter< private convertToolsToOllamaFormat( tools?: Array, ): Array | undefined { - if (!tools || tools.length === 0) { - return undefined - } - - // Tool schemas are already converted to JSON Schema in the ai layer. - // We use a type assertion because our JSONSchema type is more flexible - // than ollama's expected schema type (e.g., type can be string | string[]). - return tools.map((tool) => ({ - type: 'function', - function: { - name: tool.name, - description: tool.description, - parameters: (tool.inputSchema ?? { - type: 'object', - properties: {}, - required: [], - }) as OllamaTool['function']['parameters'], - }, - })) + return convertToolsToProviderFormat(tools) } private formatMessages(messages: TextOptions['messages']): Array { diff --git a/packages/typescript/ai-ollama/src/index.ts b/packages/typescript/ai-ollama/src/index.ts index 584314823..781aa7eab 100644 --- a/packages/typescript/ai-ollama/src/index.ts +++ b/packages/typescript/ai-ollama/src/index.ts @@ -24,6 +24,12 @@ export { } from './adapters/summarize' export { OLLAMA_TEXT_MODELS as OllamaSummarizeModels } from './model-meta' +// Tool converters +export { + convertFunctionToolToAdapterFormat, + convertToolsToProviderFormat, +} from './tools' + // =========================== // Type Exports // =========================== diff --git a/packages/typescript/ai-ollama/src/tools/function-tool.ts b/packages/typescript/ai-ollama/src/tools/function-tool.ts new file mode 100644 index 000000000..15a7910cc --- /dev/null +++ b/packages/typescript/ai-ollama/src/tools/function-tool.ts @@ -0,0 +1,27 @@ +import type { JSONSchema, Tool } from '@tanstack/ai' +import type { Tool as OllamaTool } from 'ollama' + +/** + * Converts a standard Tool to Ollama's function-tool format. + * + * Tool schemas are already converted to JSON Schema in the ai layer. We + * accept any JSONSchema and hand it to Ollama via a local type cast because + * our JSONSchema type is broader than Ollama's (e.g. `type` can be a union + * or array of strings). + */ +export function convertFunctionToolToAdapterFormat(tool: Tool): OllamaTool { + const inputSchema = (tool.inputSchema ?? { + type: 'object', + properties: {}, + required: [], + }) as JSONSchema + + return { + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: inputSchema as OllamaTool['function']['parameters'], + }, + } +} diff --git a/packages/typescript/ai-ollama/src/tools/index.ts b/packages/typescript/ai-ollama/src/tools/index.ts new file mode 100644 index 000000000..436c0c4ef --- /dev/null +++ b/packages/typescript/ai-ollama/src/tools/index.ts @@ -0,0 +1,2 @@ +export { convertFunctionToolToAdapterFormat } from './function-tool' +export { convertToolsToProviderFormat } from './tool-converter' diff --git a/packages/typescript/ai-ollama/src/tools/tool-converter.ts b/packages/typescript/ai-ollama/src/tools/tool-converter.ts new file mode 100644 index 000000000..58c2525d3 --- /dev/null +++ b/packages/typescript/ai-ollama/src/tools/tool-converter.ts @@ -0,0 +1,20 @@ +import { convertFunctionToolToAdapterFormat } from './function-tool' +import type { Tool } from '@tanstack/ai' +import type { Tool as OllamaTool } from 'ollama' + +/** + * Converts standard Tools to Ollama-specific format. + * + * Ollama only supports function-style tools today, so every entry flows + * through {@link convertFunctionToolToAdapterFormat}. Keeping this layered + * structure matches peer adapters (openai/anthropic/grok/groq) so special + * tool types can be added later without rewriting the adapter. + */ +export function convertToolsToProviderFormat( + tools?: Array, +): Array | undefined { + if (!tools || tools.length === 0) { + return undefined + } + return tools.map((tool) => convertFunctionToolToAdapterFormat(tool)) +} diff --git a/packages/typescript/ai-ollama/tests/text-adapter.test.ts b/packages/typescript/ai-ollama/tests/text-adapter.test.ts new file mode 100644 index 000000000..f2ece1065 --- /dev/null +++ b/packages/typescript/ai-ollama/tests/text-adapter.test.ts @@ -0,0 +1,329 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { resolveDebugOption } from '@tanstack/ai/adapter-internals' +import { + OllamaTextAdapter, + createOllamaChat, + ollamaText, +} from '../src/adapters/text' +import type { Mock } from 'vitest' +import type { StreamChunk, Tool } from '@tanstack/ai' + +const testLogger = resolveDebugOption(false) + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +let chatMock: Mock<(...args: Array) => any> +let ollamaConstructorCalls: Array<{ host?: string } | undefined> + +vi.mock('ollama', () => { + class Ollama { + chat: (...args: Array) => unknown + constructor(config?: { host?: string }) { + ollamaConstructorCalls.push(config) + this.chat = (...args) => chatMock(...args) + } + } + return { Ollama } +}) + +async function* asyncIterable(chunks: Array): AsyncIterable { + for (const c of chunks) yield c +} + +async function collectStream( + iter: AsyncIterable, +): Promise> { + const out: Array = [] + for await (const c of iter) out.push(c) + return out +} + +const searchTool: Tool = { + name: 'search', + description: 'search the web', + inputSchema: { + type: 'object', + properties: { q: { type: 'string' } }, + required: ['q'], + }, +} + +beforeEach(() => { + chatMock = vi.fn() + ollamaConstructorCalls = [] +}) + +afterEach(() => { + vi.unstubAllEnvs() +}) + +describe('OllamaTextAdapter construction', () => { + it('createOllamaChat wires kind=text, name=ollama, and the given model', () => { + const adapter = createOllamaChat('llama3.2') + expect(adapter.kind).toBe('text') + expect(adapter.name).toBe('ollama') + expect(adapter.model).toBe('llama3.2') + }) + + it('createOllamaChat accepts a string host', () => { + const adapter = createOllamaChat('llama3.2', 'http://remote:11434') + expect(adapter).toBeInstanceOf(OllamaTextAdapter) + }) + + it('createOllamaChat accepts a config object', () => { + const adapter = createOllamaChat('llama3.2', { + host: 'http://remote:11434', + headers: { Authorization: 'Bearer x' }, + }) + expect(adapter).toBeInstanceOf(OllamaTextAdapter) + }) + + it('ollamaText reads OLLAMA_HOST from env and forwards it to the Ollama client', () => { + vi.stubEnv('OLLAMA_HOST', 'http://from-env:11434') + const adapter = ollamaText('llama3.2') + expect(adapter.model).toBe('llama3.2') + // The adapter must instantiate the Ollama client with the env-derived host — + // asserting only on adapter.model would pass even if OLLAMA_HOST were ignored. + expect(ollamaConstructorCalls).toContainEqual( + expect.objectContaining({ host: 'http://from-env:11434' }), + ) + }) +}) + +describe('OllamaTextAdapter.chatStream (content streaming)', () => { + it('emits RUN_STARTED, a TEXT_MESSAGE lifecycle, and RUN_FINISHED for a plain text reply', async () => { + chatMock.mockResolvedValueOnce( + asyncIterable([ + { + message: { role: 'assistant', content: 'Hello ' }, + done: false, + }, + { + message: { role: 'assistant', content: 'world' }, + done: false, + }, + { + message: { role: 'assistant', content: '' }, + done: true, + done_reason: 'stop', + }, + ]), + ) + + const adapter = createOllamaChat('llama3.2') + const chunks = await collectStream( + adapter.chatStream({ + logger: testLogger, + model: 'llama3.2', + messages: [{ role: 'user', content: 'hi' }], + }), + ) + + const types = chunks.map((c) => c.type) + expect(types).toContain('RUN_STARTED') + expect(types).toContain('TEXT_MESSAGE_START') + expect(types).toContain('TEXT_MESSAGE_CONTENT') + expect(types).toContain('TEXT_MESSAGE_END') + expect(types).toContain('RUN_FINISHED') + + const contents = chunks + .filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + .map((c) => (c as { delta: string }).delta) + .join('') + expect(contents).toContain('Hello') + expect(contents).toContain('world') + }) +}) + +describe('OllamaTextAdapter.chatStream (tool calls)', () => { + it('emits TOOL_CALL lifecycle events when Ollama returns a tool call', async () => { + chatMock.mockResolvedValueOnce( + asyncIterable([ + { + message: { + role: 'assistant', + content: '', + tool_calls: [ + { + id: 'tc-123', + function: { + name: 'search', + arguments: { q: 'cats' }, + }, + }, + ], + }, + done: false, + }, + { + message: { role: 'assistant', content: '' }, + done: true, + done_reason: 'stop', + }, + ]), + ) + + const adapter = createOllamaChat('llama3.2') + const chunks = await collectStream( + adapter.chatStream({ + logger: testLogger, + model: 'llama3.2', + messages: [{ role: 'user', content: 'find cats' }], + tools: [searchTool], + }), + ) + + const types = chunks.map((c) => c.type) + expect(types).toContain('TOOL_CALL_START') + expect(types).toContain('TOOL_CALL_ARGS') + expect(types).toContain('TOOL_CALL_END') + + const startChunk = chunks.find((c) => c.type === 'TOOL_CALL_START') as + | { toolName: string; toolCallId: string } + | undefined + expect(startChunk!.toolName).toBe('search') + expect(startChunk!.toolCallId).toBe('tc-123') + }) + + it('synthesises a tool-call id when Ollama omits one', async () => { + chatMock.mockResolvedValueOnce( + asyncIterable([ + { + message: { + role: 'assistant', + content: '', + tool_calls: [ + { + function: { name: 'search', arguments: { q: 'x' } }, + }, + ], + }, + done: false, + }, + { + message: { role: 'assistant', content: '' }, + done: true, + done_reason: 'stop', + }, + ]), + ) + + const adapter = createOllamaChat('llama3.2') + const chunks = await collectStream( + adapter.chatStream({ + logger: testLogger, + model: 'llama3.2', + messages: [{ role: 'user', content: 'find' }], + tools: [searchTool], + }), + ) + const startChunk = chunks.find((c) => c.type === 'TOOL_CALL_START') as + | { toolCallId: string; toolName: string } + | undefined + expect(startChunk!.toolCallId).toMatch(/^search_\d+/) + }) + + it('forwards tools to the ollama client in provider format', async () => { + chatMock.mockResolvedValueOnce( + asyncIterable([ + { + message: { role: 'assistant', content: '' }, + done: true, + done_reason: 'stop', + }, + ]), + ) + + const adapter = createOllamaChat('llama3.2') + await collectStream( + adapter.chatStream({ + logger: testLogger, + model: 'llama3.2', + messages: [{ role: 'user', content: 'hi' }], + tools: [searchTool], + }), + ) + + expect(chatMock).toHaveBeenCalledOnce() + const call = chatMock.mock.calls[0]![0] as { + tools?: Array<{ type: string; function: { name: string } }> + stream?: boolean + } + expect(call.stream).toBe(true) + expect(call.tools).toHaveLength(1) + expect(call.tools![0]!.type).toBe('function') + expect(call.tools![0]!.function.name).toBe('search') + }) + + it('omits the tools field when no tools are provided', async () => { + chatMock.mockResolvedValueOnce( + asyncIterable([ + { + message: { role: 'assistant', content: 'ok' }, + done: true, + done_reason: 'stop', + }, + ]), + ) + const adapter = createOllamaChat('llama3.2') + await collectStream( + adapter.chatStream({ + logger: testLogger, + model: 'llama3.2', + messages: [{ role: 'user', content: 'hi' }], + }), + ) + const call = chatMock.mock.calls[0]![0] as { tools?: unknown } + expect(call.tools).toBeUndefined() + }) +}) + +describe('OllamaTextAdapter.structuredOutput', () => { + it('returns parsed JSON and the raw text', async () => { + chatMock.mockResolvedValueOnce({ + message: { role: 'assistant', content: '{"result":42}' }, + }) + const adapter = createOllamaChat('llama3.2') + const result = await adapter.structuredOutput({ + chatOptions: { + logger: testLogger, + messages: [{ role: 'user', content: 'q' }], + }, + outputSchema: { + type: 'object', + properties: { result: { type: 'number' } }, + }, + } as any) + expect(result.data).toEqual({ result: 42 }) + expect(result.rawText).toBe('{"result":42}') + }) + + it('wraps a JSON parse failure in an informative error', async () => { + chatMock.mockResolvedValueOnce({ + message: { role: 'assistant', content: 'not json' }, + }) + const adapter = createOllamaChat('llama3.2') + await expect( + adapter.structuredOutput({ + chatOptions: { + logger: testLogger, + messages: [{ role: 'user', content: 'q' }], + }, + outputSchema: { type: 'object', properties: {} }, + } as any), + ).rejects.toThrow(/Failed to parse structured output/) + }) + + it('surfaces upstream errors as structured-output errors', async () => { + chatMock.mockRejectedValueOnce(new Error('network down')) + const adapter = createOllamaChat('llama3.2') + await expect( + adapter.structuredOutput({ + chatOptions: { + logger: testLogger, + messages: [{ role: 'user', content: 'q' }], + }, + outputSchema: { type: 'object', properties: {} }, + } as any), + ).rejects.toThrow(/Structured output generation failed.*network down/) + }) +}) diff --git a/packages/typescript/ai-ollama/tests/tool-converter.test.ts b/packages/typescript/ai-ollama/tests/tool-converter.test.ts new file mode 100644 index 000000000..a58371fa1 --- /dev/null +++ b/packages/typescript/ai-ollama/tests/tool-converter.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from 'vitest' +import { convertFunctionToolToAdapterFormat } from '../src/tools/function-tool' +import { convertToolsToProviderFormat } from '../src/tools/tool-converter' +import type { Tool } from '@tanstack/ai' + +const baseTool: Tool = { + name: 'getGuitars', + description: 'Get guitar recommendations', + inputSchema: { + type: 'object', + properties: { brand: { type: 'string' } }, + required: ['brand'], + }, +} + +describe('convertFunctionToolToAdapterFormat', () => { + it('maps a standard Tool into Ollamas function-tool envelope', () => { + const converted = convertFunctionToolToAdapterFormat(baseTool) + expect(converted.type).toBe('function') + expect(converted.function.name).toBe('getGuitars') + expect(converted.function.description).toBe('Get guitar recommendations') + expect(converted.function.parameters).toEqual(baseTool.inputSchema) + }) + + it('supplies an empty object schema when tool.inputSchema is missing', () => { + const converted = convertFunctionToolToAdapterFormat({ + name: 'noop', + description: 'does nothing', + } as Tool) + expect(converted.function.parameters).toEqual({ + type: 'object', + properties: {}, + required: [], + }) + }) + + it('passes through complex nested schemas without modification', () => { + const complex: Tool = { + name: 'complex', + description: '', + inputSchema: { + type: 'object', + properties: { + nested: { + type: 'object', + properties: { + arr: { type: 'array', items: { type: 'string' } }, + }, + }, + }, + }, + } + expect( + convertFunctionToolToAdapterFormat(complex).function.parameters, + ).toEqual(complex.inputSchema) + }) +}) + +describe('convertToolsToProviderFormat', () => { + it('returns undefined for nullish input', () => { + expect(convertToolsToProviderFormat(undefined)).toBeUndefined() + }) + + it('returns undefined for an empty array (Ollama expects undefined, not [])', () => { + expect(convertToolsToProviderFormat([])).toBeUndefined() + }) + + it('converts each tool independently and preserves order', () => { + const tools: Array = [ + { name: 'first', description: 'one' }, + { name: 'second', description: 'two' }, + { name: 'third', description: 'three' }, + ] + const converted = convertToolsToProviderFormat(tools) + expect(converted).toHaveLength(3) + expect(converted!.map((t) => t.function.name)).toEqual([ + 'first', + 'second', + 'third', + ]) + }) + + it('delegates to convertFunctionToolToAdapterFormat (no special-casing today)', () => { + const tools: Array = [ + { + name: 'web_search', + description: 'Anthropic-style special tool name', + inputSchema: { type: 'object', properties: {}, required: [] }, + }, + ] + const converted = convertToolsToProviderFormat(tools) + // Ollama has no special tool types — everything is a function tool + expect(converted![0]!.type).toBe('function') + expect(converted![0]!.function.name).toBe('web_search') + }) +}) diff --git a/packages/typescript/ai-ollama/tests/utils.test.ts b/packages/typescript/ai-ollama/tests/utils.test.ts new file mode 100644 index 000000000..6a47edebc --- /dev/null +++ b/packages/typescript/ai-ollama/tests/utils.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { + createOllamaClient, + generateId, + getOllamaHostFromEnv, +} from '../src/utils' +import { estimateTokens } from '../src/utils/client' + +vi.mock('ollama', () => { + return { + Ollama: class { + constructor(public readonly config: unknown) {} + }, + } +}) + +afterEach(() => { + vi.unstubAllEnvs() +}) + +describe('createOllamaClient', () => { + it('defaults to http://localhost:11434 when no host is given', () => { + const client = createOllamaClient() as unknown as { + config: { host: string } + } + expect(client.config.host).toBe('http://localhost:11434') + }) + + it('respects an explicit host override', () => { + const client = createOllamaClient({ + host: 'https://ollama.example.com', + }) as unknown as { config: { host: string } } + expect(client.config.host).toBe('https://ollama.example.com') + }) + + it('forwards custom headers', () => { + const client = createOllamaClient({ + headers: { Authorization: 'Bearer xyz' }, + }) as unknown as { config: { headers: Record } } + expect(client.config.headers).toEqual({ Authorization: 'Bearer xyz' }) + }) +}) + +describe('getOllamaHostFromEnv', () => { + it('reads OLLAMA_HOST from process.env when set', () => { + vi.stubEnv('OLLAMA_HOST', 'http://custom-host:9999') + expect(getOllamaHostFromEnv()).toBe('http://custom-host:9999') + }) + + it('falls back to localhost:11434 when OLLAMA_HOST is empty', () => { + vi.stubEnv('OLLAMA_HOST', '') + expect(getOllamaHostFromEnv()).toBe('http://localhost:11434') + }) +}) + +describe('generateId', () => { + it('uses the provided prefix', () => { + expect(generateId('tool_call')).toMatch(/^tool_call-\d+-[a-z0-9]+$/) + }) + + it('defaults to msg prefix', () => { + expect(generateId()).toMatch(/^msg-\d+-/) + }) + + it('generates distinct ids on repeated calls', () => { + const ids = new Set() + for (let i = 0; i < 25; i++) ids.add(generateId('x')) + expect(ids.size).toBe(25) + }) +}) + +describe('estimateTokens', () => { + it('returns 0 for empty string', () => { + expect(estimateTokens('')).toBe(0) + }) + + it('rounds up to whole tokens (~4 chars each)', () => { + expect(estimateTokens('1234')).toBe(1) + expect(estimateTokens('12345')).toBe(2) + }) +}) diff --git a/packages/typescript/ai-ollama/tsconfig.json b/packages/typescript/ai-ollama/tsconfig.json index ea11c1096..9028fa3bd 100644 --- a/packages/typescript/ai-ollama/tsconfig.json +++ b/packages/typescript/ai-ollama/tsconfig.json @@ -1,9 +1,8 @@ { "extends": "../../../tsconfig.json", "compilerOptions": { - "outDir": "dist", - "rootDir": "src" + "outDir": "dist" }, - "include": ["src/**/*.ts", "src/**/*.tsx"], + "include": ["src/**/*.ts", "src/**/*.tsx", "tests/**/*.ts"], "exclude": ["node_modules", "dist", "**/*.config.ts"] } diff --git a/packages/typescript/ai-openai/src/adapters/text.ts b/packages/typescript/ai-openai/src/adapters/text.ts index 97752d737..139629869 100644 --- a/packages/typescript/ai-openai/src/adapters/text.ts +++ b/packages/typescript/ai-openai/src/adapters/text.ts @@ -1,4 +1,5 @@ import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { toRunErrorPayload } from '@tanstack/ai/adapter-internals' import { validateTextProviderOptions } from '../text/text-provider-options' import { convertToolsToProviderFormat } from '../tools' import { @@ -160,8 +161,10 @@ export class OpenAITextAdapter< logger, ) } catch (error: unknown) { + // Narrow before logging: raw SDK errors can carry request metadata + // (including auth headers) which we must never surface to user loggers. logger.errors('openai.chatStream fatal', { - error, + error: toRunErrorPayload(error, 'openai.chatStream failed'), source: 'openai.chatStream', }) throw error diff --git a/packages/typescript/ai-preact/src/use-chat.ts b/packages/typescript/ai-preact/src/use-chat.ts index dbfb5b32c..dacd2be88 100644 --- a/packages/typescript/ai-preact/src/use-chat.ts +++ b/packages/typescript/ai-preact/src/use-chat.ts @@ -60,14 +60,19 @@ export function useChat = any>( id: clientId, initialMessages: messagesToUse, body: optionsRef.current.body, - onResponse: optionsRef.current.onResponse, - onChunk: optionsRef.current.onChunk, + // Wrap every callback so the latest options are read at call time. + // Capturing the function reference directly would freeze it to whatever + // the parent passed on the first render. + onResponse: (response) => optionsRef.current.onResponse?.(response), + onChunk: (chunk) => optionsRef.current.onChunk?.(chunk), onFinish: (message) => { optionsRef.current.onFinish?.(message) }, onError: (err) => { optionsRef.current.onError?.(err) }, + onCustomEvent: (eventType, data, context) => + optionsRef.current.onCustomEvent?.(eventType, data, context), tools: optionsRef.current.tools, streamProcessor: options.streamProcessor, onMessagesChange: (newMessages: Array>) => { @@ -135,9 +140,8 @@ export function useChat = any>( } }, [client, options.live]) - // Note: Callback options (onResponse, onChunk, onFinish, onError, onToolCall) - // are captured at client creation time. Changes to these callbacks require - // remounting the component or changing the connection to recreate the client. + // All callback options are read through optionsRef at call time, so fresh + // closures from each render are picked up without recreating the client. const sendMessage = useCallback( async (content: string | MultimodalContent) => { await client.sendMessage(content) diff --git a/packages/typescript/ai-react/src/use-chat.ts b/packages/typescript/ai-react/src/use-chat.ts index 635b4b1bd..c95589874 100644 --- a/packages/typescript/ai-react/src/use-chat.ts +++ b/packages/typescript/ai-react/src/use-chat.ts @@ -58,8 +58,11 @@ export function useChat = any>( id: clientId, initialMessages: messagesToUse, body: optionsRef.current.body, - onResponse: optionsRef.current.onResponse, - onChunk: optionsRef.current.onChunk, + // Wrap every callback so the latest options are read at call time. + // Capturing the function reference directly would freeze it to whatever + // the parent passed on the first render. + onResponse: (response) => optionsRef.current.onResponse?.(response), + onChunk: (chunk) => optionsRef.current.onChunk?.(chunk), onFinish: (message: UIMessage) => { optionsRef.current.onFinish?.(message) }, @@ -67,7 +70,8 @@ export function useChat = any>( optionsRef.current.onError?.(error) }, tools: optionsRef.current.tools, - onCustomEvent: optionsRef.current.onCustomEvent, + onCustomEvent: (eventType, data, context) => + optionsRef.current.onCustomEvent?.(eventType, data, context), streamProcessor: options.streamProcessor, onMessagesChange: (newMessages: Array>) => { setMessages(newMessages) @@ -135,9 +139,8 @@ export function useChat = any>( } }, [client, options.live]) - // Note: Callback options (onResponse, onChunk, onFinish, onError, onToolCall) - // are captured at client creation time. Changes to these callbacks require - // remounting the component or changing the connection to recreate the client. + // All callback options are read through optionsRef at call time, so fresh + // closures from each render are picked up without recreating the client. const sendMessage = useCallback( async (content: string | MultimodalContent) => { diff --git a/packages/typescript/ai-react/tests/use-chat.test.ts b/packages/typescript/ai-react/tests/use-chat.test.ts index cc2be31b9..79cfdb638 100644 --- a/packages/typescript/ai-react/tests/use-chat.test.ts +++ b/packages/typescript/ai-react/tests/use-chat.test.ts @@ -799,6 +799,32 @@ describe('useChat', () => { expect(result.current.messages.length).toBeGreaterThan(0) }) }) + + it('should use the latest onChunk after the parent rerenders with a new callback', async () => { + const first = vi.fn() + const second = vi.fn() + const adapter = createMockConnectionAdapter({ + chunks: createTextChunks('Hello'), + }) + + const { result, rerender } = renderHook( + (opts: UseChatOptions) => useChat(opts), + { + initialProps: { connection: adapter, onChunk: first }, + }, + ) + + // Swap in a new callback before the next sendMessage + rerender({ connection: adapter, onChunk: second }) + + await result.current.sendMessage('Test') + + // Only the newer callback should have seen this stream + await waitFor(() => { + expect(second).toHaveBeenCalled() + }) + expect(first).not.toHaveBeenCalled() + }) }) describe('edge cases and error handling', () => { diff --git a/packages/typescript/ai-solid/src/use-chat.ts b/packages/typescript/ai-solid/src/use-chat.ts index 03f69f24d..0aff8603a 100644 --- a/packages/typescript/ai-solid/src/use-chat.ts +++ b/packages/typescript/ai-solid/src/use-chat.ts @@ -33,18 +33,20 @@ export function useChat = any>( createSignal('disconnected') const [sessionGenerating, setSessionGenerating] = createSignal(false) - // Create ChatClient instance with callbacks to sync state - // Note: Options are captured at client creation time. - // The connection adapter can use functions for dynamic values (url, headers, etc.) - // which are evaluated lazily on each request. + // Create ChatClient instance with callbacks to sync state. + // Every user-provided callback is wrapped so the LATEST `options.xxx` value + // is read at call time. Direct assignment would freeze the callback to the + // reference we saw at creation; the wrapper lets reactive `options` or + // in-place mutations propagate. When the user clears a callback (sets it to + // undefined), `?.` no-ops. const client = createMemo(() => { return new ChatClient({ connection: options.connection, id: clientId, initialMessages: options.initialMessages, body: options.body, - onResponse: options.onResponse, - onChunk: options.onChunk, + onResponse: (response) => options.onResponse?.(response), + onChunk: (chunk) => options.onChunk?.(chunk), onFinish: (message) => { options.onFinish?.(message) }, @@ -52,7 +54,8 @@ export function useChat = any>( options.onError?.(err) }, tools: options.tools, - onCustomEvent: options.onCustomEvent, + onCustomEvent: (eventType, data, context) => + options.onCustomEvent?.(eventType, data, context), streamProcessor: options.streamProcessor, onMessagesChange: (newMessages: Array>) => { setMessages(newMessages) @@ -123,9 +126,8 @@ export function useChat = any>( } }) - // Note: Callback options (onResponse, onChunk, onFinish, onError, onToolCall) - // are captured at client creation time. Changes to these callbacks require - // remounting the component or changing the connection to recreate the client. + // Callback options are read through `options.xxx` at call time, so reactive + // or mutated options propagate without recreating the client. const sendMessage = async (content: string | MultimodalContent) => { await client().sendMessage(content) diff --git a/packages/typescript/ai-svelte/src/create-chat.svelte.ts b/packages/typescript/ai-svelte/src/create-chat.svelte.ts index 6d5115fbc..3a9eeb232 100644 --- a/packages/typescript/ai-svelte/src/create-chat.svelte.ts +++ b/packages/typescript/ai-svelte/src/create-chat.svelte.ts @@ -55,7 +55,11 @@ export function createChat = any>( let connectionStatus = $state('disconnected') let sessionGenerating = $state(false) - // Create ChatClient instance + // Create ChatClient instance. + // Note: Svelte's createChat runs once per instance and `options` is captured + // by reference. Callbacks are therefore frozen to whatever the caller passed + // at creation — to swap them dynamically, mutate the options object + // in-place or call `client.updateOptions(...)` imperatively. const client = new ChatClient({ connection: options.connection, id: clientId, diff --git a/packages/typescript/ai-vue/src/use-chat.ts b/packages/typescript/ai-vue/src/use-chat.ts index 764e3efdb..3f10b4dcb 100644 --- a/packages/typescript/ai-vue/src/use-chat.ts +++ b/packages/typescript/ai-vue/src/use-chat.ts @@ -25,14 +25,20 @@ export function useChat = any>( const connectionStatus = shallowRef('disconnected') const sessionGenerating = shallowRef(false) - // Create ChatClient instance with callbacks to sync state + // Create ChatClient instance with callbacks to sync state. + // Every user-provided callback is wrapped so the LATEST `options.xxx` value + // is read at call time. Direct assignment would freeze the callback to the + // reference we saw at setup time; the wrapper lets reactive `options` or + // in-place mutations propagate. When the user clears a callback (sets it to + // undefined), `?.` no-ops — unlike `client.updateOptions`, which silently + // skips undefined and leaves the old callback installed. const client = new ChatClient({ connection: options.connection, id: clientId, initialMessages: options.initialMessages, body: options.body, - onResponse: options.onResponse, - onChunk: options.onChunk, + onResponse: (response) => options.onResponse?.(response), + onChunk: (chunk) => options.onChunk?.(chunk), onFinish: (message) => { options.onFinish?.(message) }, @@ -40,7 +46,8 @@ export function useChat = any>( options.onError?.(err) }, tools: options.tools, - onCustomEvent: options.onCustomEvent, + onCustomEvent: (eventType, data, context) => + options.onCustomEvent?.(eventType, data, context), streamProcessor: options.streamProcessor, onMessagesChange: (newMessages: Array>) => { messages.value = newMessages @@ -96,9 +103,8 @@ export function useChat = any>( } }) - // Note: Callback options (onResponse, onChunk, onFinish, onError, onToolCall) - // are captured at client creation time. Changes to these callbacks require - // remounting the component or changing the connection to recreate the client. + // Callback options are read through `options.xxx` at call time, so reactive + // or mutated options propagate without recreating the client. const sendMessage = async (content: string | MultimodalContent) => { await client.sendMessage(content) diff --git a/packages/typescript/ai/src/activities/error-payload.ts b/packages/typescript/ai/src/activities/error-payload.ts new file mode 100644 index 000000000..396c5573d --- /dev/null +++ b/packages/typescript/ai/src/activities/error-payload.ts @@ -0,0 +1,35 @@ +/** + * Shared error-narrowing helper for activities that convert thrown values + * into structured `RUN_ERROR` events. + * + * Accepts Error instances, objects with string-ish `message`/`code`, or bare + * strings; always returns a shape safe to serialize. Never leaks the full + * error object (which may carry request/response state from an SDK). + */ +export function toRunErrorPayload( + error: unknown, + fallbackMessage = 'Unknown error occurred', +): { message: string; code: string | undefined } { + if (error instanceof Error) { + const codeField = (error as Error & { code?: unknown }).code + return { + message: error.message || fallbackMessage, + code: typeof codeField === 'string' ? codeField : undefined, + } + } + if (typeof error === 'object' && error !== null) { + const messageField = (error as { message?: unknown }).message + const codeField = (error as { code?: unknown }).code + return { + message: + typeof messageField === 'string' && messageField.length > 0 + ? messageField + : fallbackMessage, + code: typeof codeField === 'string' ? codeField : undefined, + } + } + if (typeof error === 'string' && error.length > 0) { + return { message: error, code: undefined } + } + return { message: fallbackMessage, code: undefined } +} diff --git a/packages/typescript/ai/src/activities/generateVideo/index.ts b/packages/typescript/ai/src/activities/generateVideo/index.ts index 61b27ea54..cee2339f7 100644 --- a/packages/typescript/ai/src/activities/generateVideo/index.ts +++ b/packages/typescript/ai/src/activities/generateVideo/index.ts @@ -8,6 +8,7 @@ */ import { aiEventClient } from '@tanstack/ai-event-client' +import { toRunErrorPayload } from '../error-payload' import { resolveDebugOption } from '../../logger/resolve' import type { InternalLogger } from '../../logger/internal-logger' import type { DebugOption } from '../../logger/types' @@ -399,21 +400,20 @@ async function* runStreamingVideoGeneration< } throw new Error('Video generation timed out') - } catch (error: any) { + } catch (error: unknown) { + const payload = toRunErrorPayload(error, 'Video generation failed') logger.errors('generateVideo activity failed', { - error, + message: payload.message, + code: payload.code, source: 'generateVideo', }) yield { type: 'RUN_ERROR', runId, threadId, - message: error.message || 'Video generation failed', - code: error.code, - error: { - message: error.message || 'Video generation failed', - code: error.code, - }, + message: payload.message, + code: payload.code, + error: payload, timestamp: Date.now(), } as StreamChunk } diff --git a/packages/typescript/ai/src/activities/stream-generation-result.ts b/packages/typescript/ai/src/activities/stream-generation-result.ts index 7994a79bf..2a2274cbd 100644 --- a/packages/typescript/ai/src/activities/stream-generation-result.ts +++ b/packages/typescript/ai/src/activities/stream-generation-result.ts @@ -5,6 +5,7 @@ */ import { EventType } from '@ag-ui/core' +import { toRunErrorPayload } from './error-payload' import type { StreamChunk } from '../types' function createId(prefix: string): string { @@ -52,18 +53,16 @@ export async function* streamGenerationResult( finishReason: 'stop', timestamp: Date.now(), } as StreamChunk - } catch (error: any) { + } catch (error: unknown) { + const payload = toRunErrorPayload(error, 'Generation failed') yield { type: EventType.RUN_ERROR, runId, threadId, - message: error.message || 'Generation failed', - code: error.code, + message: payload.message, + code: payload.code, // Deprecated nested form for backward compatibility - error: { - message: error.message || 'Generation failed', - code: error.code, - }, + error: payload, timestamp: Date.now(), } as StreamChunk } diff --git a/packages/typescript/ai/src/adapter-internals.ts b/packages/typescript/ai/src/adapter-internals.ts index 467b0027e..875ae9b4e 100644 --- a/packages/typescript/ai/src/adapter-internals.ts +++ b/packages/typescript/ai/src/adapter-internals.ts @@ -5,3 +5,4 @@ export type { ResolvedCategories } from './logger/internal-logger' export { InternalLogger } from './logger/internal-logger' export { resolveDebugOption } from './logger/resolve' +export { toRunErrorPayload } from './activities/error-payload' diff --git a/packages/typescript/ai/src/stream-to-response.ts b/packages/typescript/ai/src/stream-to-response.ts index 2f83bc017..9850f4d60 100644 --- a/packages/typescript/ai/src/stream-to-response.ts +++ b/packages/typescript/ai/src/stream-to-response.ts @@ -1,3 +1,4 @@ +import { toRunErrorPayload } from './activities/error-payload' import type { StreamChunk } from './types' /** @@ -68,7 +69,7 @@ export function toServerSentEventsStream( } controller.close() - } catch (error: any) { + } catch (error: unknown) { // Don't send error if aborted if (abortController?.signal.aborted) { controller.close() @@ -81,10 +82,7 @@ export function toServerSentEventsStream( `data: ${JSON.stringify({ type: 'RUN_ERROR', timestamp: Date.now(), - error: { - message: error.message || 'Unknown error occurred', - code: error.code, - }, + error: toRunErrorPayload(error), })}\n\n`, ), ) @@ -190,7 +188,7 @@ export function toHttpStream( } controller.close() - } catch (error: any) { + } catch (error: unknown) { // Don't send error if aborted if (abortController?.signal.aborted) { controller.close() @@ -203,10 +201,7 @@ export function toHttpStream( `${JSON.stringify({ type: 'RUN_ERROR', timestamp: Date.now(), - error: { - message: error.message || 'Unknown error occurred', - code: error.code, - }, + error: toRunErrorPayload(error), })}\n`, ), ) diff --git a/packages/typescript/ai/tests/error-payload.test.ts b/packages/typescript/ai/tests/error-payload.test.ts new file mode 100644 index 000000000..1add82fc7 --- /dev/null +++ b/packages/typescript/ai/tests/error-payload.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest' +import { toRunErrorPayload } from '../src/activities/error-payload' + +describe('toRunErrorPayload', () => { + it('narrows an Error instance, extracting message and code', () => { + const err = Object.assign(new Error('boom'), { code: 'E_BOOM' }) + expect(toRunErrorPayload(err)).toEqual({ + message: 'boom', + code: 'E_BOOM', + }) + }) + + it('falls back when an Error has no message', () => { + const err = new Error('') + expect(toRunErrorPayload(err)).toEqual({ + message: 'Unknown error occurred', + code: undefined, + }) + }) + + it('uses the supplied fallback when provided', () => { + expect(toRunErrorPayload(new Error(''), 'Generation failed')).toEqual({ + message: 'Generation failed', + code: undefined, + }) + }) + + it('narrows plain objects with string message + code fields', () => { + expect(toRunErrorPayload({ message: 'rate-limited', code: '429' })).toEqual( + { + message: 'rate-limited', + code: '429', + }, + ) + }) + + it('ignores non-string code fields (returns undefined)', () => { + expect(toRunErrorPayload({ message: 'x', code: 500 })).toEqual({ + message: 'x', + code: undefined, + }) + }) + + it('ignores non-string code fields on Error instances too', () => { + const err = Object.assign(new Error('numeric code'), { code: 500 }) + expect(toRunErrorPayload(err)).toEqual({ + message: 'numeric code', + code: undefined, + }) + }) + + it('accepts a bare string as a thrown value', () => { + expect(toRunErrorPayload('plain string error')).toEqual({ + message: 'plain string error', + code: undefined, + }) + }) + + it('returns the fallback for null / undefined / numbers / empty strings', () => { + for (const value of [null, undefined, 42, false, '']) { + expect(toRunErrorPayload(value, 'default')).toEqual({ + message: 'default', + code: undefined, + }) + } + }) + + it('does not leak extra properties from the original error', () => { + const err = Object.assign(new Error('leaky'), { + request: { headers: { authorization: 'Bearer secret' } }, + }) + const payload = toRunErrorPayload(err) + expect(payload).toEqual({ message: 'leaky', code: undefined }) + expect(payload).not.toHaveProperty('request') + }) +})