diff --git a/src/commands/deployments/deploy.ts b/src/commands/deployments/deploy.ts index d9ab838..a0f5992 100644 --- a/src/commands/deployments/deploy.ts +++ b/src/commands/deployments/deploy.ts @@ -53,6 +53,9 @@ const EXCLUDE_PATTERNS = [ '.cache', 'skills', 'coverage', + 'test-results', + 'playwright-report', + '.playwright-mcp', IGNORE_FILE_NAME, ]; diff --git a/src/commands/verify/finding.ts b/src/commands/verify/finding.ts new file mode 100644 index 0000000..7b6caa4 --- /dev/null +++ b/src/commands/verify/finding.ts @@ -0,0 +1,48 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { getProjectConfig } from '../../lib/config.js'; + +// Record a "loud" error the browser surfaced during the drive — a 4xx/5xx, a +// `column does not exist`, a console exception — that the agent saw via +// `browser_console_messages` / `browser_network_requests`. The rls/truth probes +// only cover the *silent* findings; this is how the loud ones reach PostHog too. +export function registerVerifyFindingCommand(verify: Command): void { + verify + .command('finding') + .description('Record a loud error surfaced during the drive (4xx/5xx, column-not-found, console) as a finding (experimental)') + .requiredOption('--kind ', 'short error kind, e.g. pgrst_column_not_found, http_500, console_error') + .option('--type ', 'finding type', 'error') + .option('--status ', 'HTTP status, if any', (v) => parseInt(v, 10)) + .option('--endpoint ', 'the endpoint/URL that errored') + .option('--message ', 'the error message the page showed') + .option('--table ', 'related table, if known') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + const finding = { + type: opts.type as string, + kind: opts.kind as string, + status: Number.isNaN(opts.status) ? undefined : (opts.status as number | undefined), + endpoint: opts.endpoint as string | undefined, + message: opts.message as string | undefined, + table: opts.table as string | undefined, + }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ recorded: true, finding }); + } else { + outputInfo( + `📝 recorded ${finding.type} finding: ${finding.kind}${finding.status ? ` (${finding.status})` : ''}${finding.message ? ` — ${finding.message}` : ''}`, + ); + } + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/commands/verify/index.ts b/src/commands/verify/index.ts new file mode 100644 index 0000000..5287425 --- /dev/null +++ b/src/commands/verify/index.ts @@ -0,0 +1,14 @@ +// src/commands/verify/index.ts +import type { Command } from 'commander'; +import { registerVerifyRlsCommand } from './rls.js'; +import { registerVerifyTruthCommand } from './truth.js'; +import { registerVerifyFindingCommand } from './finding.js'; + +export function registerVerifyCommands(program: Command): void { + const verify = program + .command('verify', { hidden: true }) + .description('[experimental] Backend-truth & RLS probes + loud-error recording for insforge-verify'); + registerVerifyRlsCommand(verify); + registerVerifyTruthCommand(verify); + registerVerifyFindingCommand(verify); +} diff --git a/src/commands/verify/rls.ts b/src/commands/verify/rls.ts new file mode 100644 index 0000000..322c97e --- /dev/null +++ b/src/commands/verify/rls.ts @@ -0,0 +1,66 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { getProjectConfig } from '../../lib/config.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { classifyRls, getAnonKey, login, rawsqlRows, recordsCount } from '../../lib/verify-probe.js'; + +export function registerVerifyRlsCommand(verify: Command): void { + verify + .command('rls') + .description('Cross-user RLS isolation probe — checks B cannot read A, A can read own (experimental)') + .requiredOption('--table ', 'user-scoped table to probe') + .requiredOption('--owner ', 'owner column on the table (e.g. user_id)') + .option('--user-a ', 'seeded user A email', 'verify-a@example.com') + .option('--user-b ', 'seeded user B email', 'verify-b@example.com') + .option('--password ', 'seeded users password', 'Test1234!pass') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + const baseUrl = config.oss_host; + const adminKey = config.api_key; + + const aToken = await login(baseUrl, opts.userA, opts.password); + const bToken = await login(baseUrl, opts.userB, opts.password); + const anon = await getAnonKey(baseUrl, adminKey); + if (!aToken || !bToken || !anon) { + throw new CLIError( + 'Login or anon-key fetch returned empty — seed BOTH users first. An empty token turns every probe into an anonymous request that silently "passes" isolation.', + ); + } + + const rows = await rawsqlRows( + baseUrl, + adminKey, + `select id from auth.users where email='${String(opts.userA).replace(/'/g, "''")}'`, + ); + const aId = (rows[0] as { id?: string })?.id; + if (!aId) throw new CLIError(`Could not find user A (${opts.userA}) — seed it first.`); + + const filter = `${opts.owner}=eq.${aId}`; + const bReadRowsOfA = await recordsCount(baseUrl, opts.table, filter, bToken, anon); + const aReadOwnRows = await recordsCount(baseUrl, opts.table, filter, aToken, anon); + const anonReadRows = await recordsCount(baseUrl, opts.table, undefined, undefined, anon); + + const { type, evidence } = classifyRls({ bReadRowsOfA, aReadOwnRows, anonReadRows }); + const finding = { type, table: opts.table as string, evidence }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ passed: type === 'none', finding }); + } else if (type === 'rls_leak') { + outputInfo(`❌ rls_leak on ${opts.table}: B read ${bReadRowsOfA} of A's rows (anon read ${anonReadRows}).`); + } else if (type === 'rls_overrestrict') { + outputInfo(`❌ rls_overrestrict on ${opts.table}: A could not read its own rows (positive control empty).`); + } else { + outputInfo(`✅ isolation holds on ${opts.table}: B=0, anon=0, A=${aReadOwnRows}.`); + } + process.exitCode = type === 'none' ? 0 : 1; + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/commands/verify/truth.ts b/src/commands/verify/truth.ts new file mode 100644 index 0000000..e4f0443 --- /dev/null +++ b/src/commands/verify/truth.ts @@ -0,0 +1,59 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { getProjectConfig } from '../../lib/config.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { classifyTruth, isReadOnlyQuery, rawsqlRows } from '../../lib/verify-probe.js'; + +export function registerVerifyTruthCommand(verify: Command): void { + verify + .command('truth') + .description('Backend-truth cross-check — compare a DB read to what the UI claimed (experimental)') + .requiredOption('--query ', 'a read proving what the UI showed; compares the first column of the first row') + .option('--expect ', 'the value the UI displayed (compared as a scalar)') + .option('--expect-count ', 'expect this many rows instead of a scalar value') + .option('--table ', 'table name, for the finding label') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + if (!isReadOnlyQuery(opts.query)) { + throw new CLIError( + 'verify truth runs a single read-only query — it must start with SELECT or WITH and not chain statements.', + ); + } + + const rows = await rawsqlRows(config.oss_host, config.api_key, opts.query); + + let result: { type: 'false_pass' | 'none'; evidence: Record }; + if (opts.expectCount !== undefined) { + result = classifyTruth(rows.length, String(opts.expectCount)); + } else if (opts.expect !== undefined) { + const first = rows[0]; + const dbValue = + first && typeof first === 'object' ? Object.values(first as Record)[0] : first; + result = classifyTruth(dbValue, String(opts.expect)); + } else { + throw new CLIError('Provide --expect (scalar) or --expect-count (row count).'); + } + + const finding = { type: result.type, table: opts.table as string | undefined, evidence: result.evidence }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ passed: result.type === 'none', finding }); + } else if (result.type === 'false_pass') { + outputInfo( + `❌ false_pass${opts.table ? ` on ${opts.table}` : ''}: UI claimed ${JSON.stringify(result.evidence.ui_claimed)} but DB has ${JSON.stringify(result.evidence.db_actual)}.`, + ); + } else { + outputInfo(`✅ backend truth matches: ${JSON.stringify(result.evidence.db_actual)}.`); + } + process.exitCode = result.type === 'none' ? 0 : 1; + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/index.ts b/src/index.ts index 0210186..32aab51 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { registerWhoamiCommand } from './commands/whoami.js'; import { registerOrgsCommands } from './commands/orgs/list.js'; import { registerProjectsCommands } from './commands/projects/list.js'; import { registerBranchCommands } from './commands/branch/index.js'; +import { registerVerifyCommands } from './commands/verify/index.js'; import { registerProjectLinkCommand } from './commands/projects/link.js'; import { registerDbCommands } from './commands/db/query.js'; import { registerDbTablesCommand } from './commands/db/tables.js'; @@ -135,6 +136,9 @@ registerProjectsCommands(projectsCmd); // Branch commands registerBranchCommands(program); +// Verify probe commands (experimental, hidden from --help) +registerVerifyCommands(program); + // Database commands const dbCmd = program.command('db').description('Database operations'); registerDbCommands(dbCmd); diff --git a/src/lib/analytics.ts b/src/lib/analytics.ts index 824a89a..28607d5 100644 --- a/src/lib/analytics.ts +++ b/src/lib/analytics.ts @@ -128,3 +128,33 @@ export async function shutdownAnalytics(): Promise { // ignore } } + +export interface VerifyFinding { + type: string; + table?: string; + kind?: string; + status?: number; + endpoint?: string; + message?: string; + evidence?: Record; +} + +/** + * Emit a verify finding to PostHog — the central, cross-user rail (finding rate + what + * broke), same as the other track* helpers here. NOT the per-project `oss_host/api/usage/mcp` + * table, which only stores `(tool_name, success)` and drops the finding. The recording lives + * in the tool — a finding is recorded because the probe ran, not because the agent remembered + * to. Best-effort; the caller flushes via `shutdownAnalytics()` before exit. + */ +export function trackVerifyFinding(finding: VerifyFinding, config: ProjectConfig): void { + captureEvent(config.project_id, 'verify_finding', { + ...finding.evidence, + finding_type: finding.type, + passed: finding.type === 'none', + table: finding.table, + kind: finding.kind, + status: finding.status, + endpoint: finding.endpoint, + message: finding.message, + }); +} diff --git a/src/lib/browser-mcp.test.ts b/src/lib/browser-mcp.test.ts new file mode 100644 index 0000000..e827ef8 --- /dev/null +++ b/src/lib/browser-mcp.test.ts @@ -0,0 +1,99 @@ +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { ensureCodexToml, mergeJsonMcp } from './browser-mcp.js'; + +const HEADLESS_SERVER = { + command: 'npx', + args: ['@playwright/mcp@latest', '--headless'], +}; + +describe('mergeJsonMcp', () => { + let dir: string; + let file: string; + const read = () => JSON.parse(readFileSync(file, 'utf-8')); + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'insforge-mcp-')); + file = join(dir, '.cursor', 'mcp.json'); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it('creates the file (and parent dirs) with the server under mcpServers', () => { + expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(true); + expect(read().mcpServers['playwright']).toEqual(HEADLESS_SERVER); + }); + + it('merges without clobbering other servers', () => { + writeFileSync(join(dir, 'cfg.json'), JSON.stringify({ mcpServers: { other: { command: 'x' } } })); + expect(mergeJsonMcp(join(dir, 'cfg.json'), 'mcpServers', HEADLESS_SERVER)).toBe(true); + const cfg = JSON.parse(readFileSync(join(dir, 'cfg.json'), 'utf-8')); + expect(cfg.mcpServers.other).toEqual({ command: 'x' }); + expect(cfg.mcpServers['playwright']).toBeDefined(); + }); + + it('is idempotent — returns false when already present and identical', () => { + mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER); + expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(false); + }); + + it('recovers from malformed JSON by starting fresh', () => { + const bad = join(dir, 'bad.json'); + writeFileSync(bad, '{ not valid json'); + expect(mergeJsonMcp(bad, 'mcpServers', HEADLESS_SERVER)).toBe(true); + expect(JSON.parse(readFileSync(bad, 'utf-8')).mcpServers['playwright']).toBeDefined(); + }); + + it('supports the VS Code `servers` key', () => { + expect(mergeJsonMcp(file, 'servers', HEADLESS_SERVER)).toBe(true); + expect(read().servers['playwright']).toEqual(HEADLESS_SERVER); + }); + + it('starts fresh on valid-but-non-object JSON (array / null / primitive)', () => { + for (const bad of ['[1,2,3]', 'null', '"a string"', '42']) { + const f = join(dir, `${bad.replace(/\W/g, '')}.json`); + writeFileSync(f, bad); + expect(mergeJsonMcp(f, 'mcpServers', HEADLESS_SERVER)).toBe(true); + // No crash, no silent loss — server is written under a fresh object. + expect(JSON.parse(readFileSync(f, 'utf-8')).mcpServers['playwright']).toEqual(HEADLESS_SERVER); + } + }); +}); + +describe('ensureCodexToml', () => { + let dir: string; + let file: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'insforge-codex-')); + file = join(dir, '.codex', 'config.toml'); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it('appends a [mcp_servers.playwright] block when absent', () => { + expect(ensureCodexToml(file)).toBe(true); + const toml = readFileSync(file, 'utf-8'); + expect(toml).toContain('[mcp_servers.playwright]'); + expect(toml).toContain('command = "npx"'); + expect(toml).toContain('"--headless"'); + }); + + it('is idempotent — returns false when the block already exists', () => { + ensureCodexToml(file); + expect(ensureCodexToml(file)).toBe(false); + }); + + it('preserves existing TOML content', () => { + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, '[some_other_section]\nkey = "value"\n'); + expect(ensureCodexToml(file)).toBe(true); + const toml = readFileSync(file, 'utf-8'); + expect(toml).toContain('[some_other_section]'); + expect(toml).toContain('[mcp_servers.playwright]'); + }); +}); diff --git a/src/lib/browser-mcp.ts b/src/lib/browser-mcp.ts new file mode 100644 index 0000000..ec5634d --- /dev/null +++ b/src/lib/browser-mcp.ts @@ -0,0 +1,138 @@ +import { exec } from 'node:child_process'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { promisify } from 'node:util'; + +const execAsync = promisify(exec); + +const MCP_CONFIG_TIMEOUT_MS = 60_000; + +// `@playwright/mcp` is the browser-automation MCP (browser_navigate/click/snapshot + +// console/network tools) the light-mode `insforge-verify` skill drives directly — NOT +// `run-test-mcp-server`, which is the Test Agents (planner/generator) pipeline and has no +// browser_* tools. +const MCP_SERVER_NAME = 'playwright'; +const MCP_COMMAND = 'npx'; +const MCP_ARGS = ['@playwright/mcp@latest', '--headless']; + +/** + * Merge the Playwright MCP server into a JSON MCP config (user/global scope), + * returning true if it changed the file. `key` is the top-level object servers live + * under — `mcpServers` for Cursor/Windsurf/Gemini, `servers` for VS Code. Malformed + * JSON is replaced rather than crashing the link. + */ +export function mergeJsonMcp( + file: string, + key: 'mcpServers' | 'servers', + server: Record, +): boolean { + let config: Record> = {}; + if (existsSync(file)) { + try { + const parsed = JSON.parse(readFileSync(file, 'utf-8')); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + config = parsed as typeof config; + } + } catch { + config = {}; + } + } + config[key] ??= {}; + if (JSON.stringify(config[key][MCP_SERVER_NAME]) === JSON.stringify(server)) return false; + config[key][MCP_SERVER_NAME] = server; + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, `${JSON.stringify(config, null, 2)}\n`); + return true; +} + +/** Append a `[mcp_servers.playwright]` block to Codex's global TOML config if absent. */ +export function ensureCodexToml(file: string): boolean { + const existing = existsSync(file) ? readFileSync(file, 'utf-8') : ''; + if (existing.includes(`[mcp_servers.${MCP_SERVER_NAME}]`)) return false; + const args = MCP_ARGS.map((a) => `"${a}"`).join(', '); + const block = `\n[mcp_servers.${MCP_SERVER_NAME}]\ncommand = "${MCP_COMMAND}"\nargs = [${args}]\n`; + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, existing + block); + return true; +} + +async function commandExists(cmd: string): Promise { + return execAsync(`command -v ${cmd}`).then( + () => true, + () => false, + ); +} + +/** + * One agent's recipe for registering the browser MCP at user/global scope — mirroring + * how `skills add -a -g` delegates skill placement per agent. `apply` returns a + * label of what it configured, or null if the agent isn't present (skip it). Add an + * agent by adding one entry — no other call site changes (cf. AGENT_FLAGS). + */ +interface BrowserMcpTarget { + agent: string; + apply: (home: string) => Promise; +} + +const JSON_MCP_SERVER = { command: MCP_COMMAND, args: MCP_ARGS }; + +const BROWSER_MCP_TARGETS: BrowserMcpTarget[] = [ + { + // Claude Code: delegate to its own CLI at user scope (global across projects), + // exactly like the skills install delegates placement. Idempotent + quiet on repeat + // links: skip if already configured. Skipped if the `claude` CLI isn't on PATH. + agent: 'Claude Code', + apply: async () => { + if (!(await commandExists('claude'))) return null; + const present = await execAsync(`claude mcp get ${MCP_SERVER_NAME}`) + .then(() => true) + .catch(() => false); + if (present) return null; + await execAsync( + `claude mcp add ${MCP_SERVER_NAME} -s user -- ${MCP_COMMAND} ${MCP_ARGS.join(' ')}`, + { timeout: MCP_CONFIG_TIMEOUT_MS }, + ); + return 'user scope'; + }, + }, + { + // Cursor: no CLI — write its global config file, only if Cursor is set up. + agent: 'Cursor', + apply: async (home) => { + if (!existsSync(join(home, '.cursor'))) return null; + return mergeJsonMcp(join(home, '.cursor', 'mcp.json'), 'mcpServers', JSON_MCP_SERVER) + ? '~/.cursor/mcp.json' + : null; + }, + }, + { + // Codex: global TOML, only if Codex is set up. + agent: 'Codex', + apply: async (home) => { + if (!existsSync(join(home, '.codex'))) return null; + return ensureCodexToml(join(home, '.codex', 'config.toml')) ? '~/.codex/config.toml' : null; + }, + }, +]; + +/** + * Configure the Playwright browser MCP at user/global scope for whichever agents + * are present, so light-mode `insforge-verify` can drive the browser. Global to match + * how the InsForge skills install (`skills add … -g`); the server command is identical + * across agents — only where/how it's registered differs. No network beyond each agent's + * own CLI, no LLM, no subagents (the user's agent is the driving brain). Returns a label + * per agent configured. Best-effort: one agent failing never blocks the others. + */ +export async function configureBrowserMcp(home = homedir()): Promise { + const configured: string[] = []; + for (const target of BROWSER_MCP_TARGETS) { + try { + const label = await target.apply(home); + if (label) configured.push(`${target.agent} (${label})`); + } catch { + // best-effort per agent + } + } + return configured; +} diff --git a/src/lib/skills.ts b/src/lib/skills.ts index 913fcc8..abc1cb6 100644 --- a/src/lib/skills.ts +++ b/src/lib/skills.ts @@ -4,6 +4,7 @@ import { join } from 'node:path'; import { promisify } from 'node:util'; import * as clack from '@clack/prompts'; import { writeLocalAgentsMd } from './agents-md.js'; +import { configureBrowserMcp } from './browser-mcp.js'; import { getProjectConfig } from './config.js'; const execAsync = promisify(exec); @@ -84,7 +85,11 @@ const PROVIDER_SKILLS: Record = { 'better-auth': { repo: 'better-auth/skills', label: 'Better Auth skills' }, }; -export async function installSkills(json: boolean, authProvider?: string): Promise { +export async function installSkills( + json: boolean, + authProvider?: string, + withBrowserMcp = true, +): Promise { try { if (!json) clack.log.info('Installing InsForge agent skills (global)...'); await execAsync(`npx skills add insforge/agent-skills -g -y ${AGENT_FLAGS}`, { @@ -151,6 +156,36 @@ export async function installSkills(json: boolean, authProvider?: string): Promi } catch { // non-critical, silently ignore } + + // Opt-in: configure the Playwright browser MCP (`@playwright/mcp`) so the `insforge-verify` + // skill can drive the UI directly (light mode — no spec-generation subagents). + // This only declares the MCP server in `.mcp.json`; the driving "brain" is the + // user's own agent, so it stays agent-agnostic and needs no extra LLM key. The + // server loads at session start like any MCP config, so we do it at link time. + if (withBrowserMcp) { + try { + const configured = await configureBrowserMcp(); + if (!json) { + if (configured.length) { + clack.log.success(`Configured the Playwright browser MCP for: ${configured.join(', ')}.`); + clack.log.warn( + 'Restart your agent (or reload MCP servers) so the browser tools load before verifying.', + ); + } else { + clack.log.info( + 'No supported agent found to auto-configure the browser MCP. Add it manually — Claude: `claude mcp add playwright -s user -- npx @playwright/mcp@latest --headless`.', + ); + } + } + } catch (err) { + if (!json) { + clack.log.warn(`Could not configure the browser MCP: ${describeExecError(err)}`); + clack.log.info( + 'Add a `playwright` MCP server to your agent manually (command: `npx @playwright/mcp@latest --headless`).', + ); + } + } + } } export async function reportCliUsage( diff --git a/src/lib/verify-probe.test.ts b/src/lib/verify-probe.test.ts new file mode 100644 index 0000000..0753496 --- /dev/null +++ b/src/lib/verify-probe.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it } from 'vitest'; +import { classifyRls, classifyTruth, isReadOnlyQuery } from './verify-probe.js'; + +describe('classifyRls', () => { + it('flags rls_leak when B reads any of A\'s rows', () => { + const r = classifyRls({ bReadRowsOfA: 3, aReadOwnRows: 5, anonReadRows: 0 }); + expect(r.type).toBe('rls_leak'); + expect(r.evidence.user_b_read_rows_of_a).toBe(3); + }); + + it('flags rls_leak when anonymous reads any rows', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 5, anonReadRows: 2 }).type).toBe('rls_leak'); + }); + + it('flags rls_overrestrict when A cannot read its own rows (positive control empty)', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 0, anonReadRows: 0 }).type).toBe('rls_overrestrict'); + }); + + it('passes (none) when B=0, anon=0, and A sees its own rows', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 5, anonReadRows: 0 }).type).toBe('none'); + }); + + it('prioritises a real leak over the positive-control check', () => { + // B leaks AND A sees nothing — the leak is the more severe finding to surface + expect(classifyRls({ bReadRowsOfA: 4, aReadOwnRows: 0, anonReadRows: 0 }).type).toBe('rls_leak'); + }); +}); + +describe('classifyTruth', () => { + it('flags false_pass when the DB value differs from what the UI claimed', () => { + const r = classifyTruth(1, '3'); + expect(r.type).toBe('false_pass'); + expect(r.evidence).toEqual({ ui_claimed: '3', db_actual: 1 }); + }); + + it('passes when the DB value matches (number vs string normalised)', () => { + expect(classifyTruth(3, '3').type).toBe('none'); + expect(classifyTruth('3', '3').type).toBe('none'); + expect(classifyTruth(' 3 ', '3').type).toBe('none'); + }); + + it('treats null/undefined as empty and mismatching a non-empty expectation', () => { + expect(classifyTruth(null, '3').type).toBe('false_pass'); + expect(classifyTruth(undefined, '0').type).toBe('false_pass'); + }); + + it('passes when both sides are empty', () => { + expect(classifyTruth(null, '').type).toBe('none'); + }); +}); + +describe('isReadOnlyQuery', () => { + it('allows SELECT / WITH (any case, leading whitespace, trailing semicolon)', () => { + expect(isReadOnlyQuery('select 1')).toBe(true); + expect(isReadOnlyQuery('SELECT * FROM t')).toBe(true); + expect(isReadOnlyQuery(' with c as (select 1) select * from c')).toBe(true); + expect(isReadOnlyQuery('select 1;')).toBe(true); + }); + + it('rejects writes / DDL', () => { + expect(isReadOnlyQuery('delete from users')).toBe(false); + expect(isReadOnlyQuery('UPDATE accounts SET balance = 0')).toBe(false); + expect(isReadOnlyQuery('insert into t values (1)')).toBe(false); + expect(isReadOnlyQuery('drop table t')).toBe(false); + }); + + it('rejects statement chaining', () => { + expect(isReadOnlyQuery('select 1; delete from users')).toBe(false); + expect(isReadOnlyQuery('select 1; update t set x = 1')).toBe(false); + }); + + it('rejects DML hidden inside a CTE (WITH … DELETE/UPDATE/INSERT … SELECT)', () => { + expect(isReadOnlyQuery('with x as (delete from users returning id) select id from x')).toBe(false); + expect(isReadOnlyQuery('WITH x AS (UPDATE t SET c = 1 RETURNING id) SELECT * FROM x')).toBe(false); + expect(isReadOnlyQuery('with x as (insert into t values (1) returning id) select id from x')).toBe(false); + }); +}); diff --git a/src/lib/verify-probe.ts b/src/lib/verify-probe.ts new file mode 100644 index 0000000..cc8c394 --- /dev/null +++ b/src/lib/verify-probe.ts @@ -0,0 +1,137 @@ +// Deterministic verify probes for `insforge verify rls/truth`. +// +// The verdict logic is pure + unit-tested; the fetch wiring is a thin layer on +// top. Findings are emitted via `trackVerifyFinding` (src/lib/analytics.ts) so the +// recording is in the tool, not in agent prose. + +export type RlsFindingType = 'rls_leak' | 'rls_overrestrict' | 'none'; +export type TruthFindingType = 'false_pass' | 'none'; + +/** + * Classify a cross-user RLS isolation probe from its row counts. Deterministic: + * - B reading A's rows (or anon reading any) -> rls_leak + * - A failing to read its own rows (positive control empty) -> rls_overrestrict + * (catches a policy that silently empties a real user's data — the break no + * scanner catches, since it returns 200 + []) + */ +export function classifyRls(input: { + bReadRowsOfA: number; + aReadOwnRows: number; + anonReadRows: number; +}): { type: RlsFindingType; evidence: Record } { + const evidence = { + user_b_read_rows_of_a: input.bReadRowsOfA, + user_a_read_own_rows: input.aReadOwnRows, + anon_read_rows: input.anonReadRows, + }; + if (input.bReadRowsOfA > 0) return { type: 'rls_leak', evidence }; + if (input.anonReadRows > 0) return { type: 'rls_leak', evidence }; + if (input.aReadOwnRows === 0) return { type: 'rls_overrestrict', evidence }; + return { type: 'none', evidence }; +} + +function normalizeScalar(v: unknown): string { + if (v === null || v === undefined) return ''; + return String(v).trim(); +} + +/** + * Classify a backend-truth check. The UI claimed `expected`; the DB returned + * `dbValue`. A mismatch is a false pass (the write returned 200 + optimistic UI + * but never persisted, or persisted the wrong value). Compared as normalized + * scalars so `3` and `"3"` agree. + */ +export function classifyTruth( + dbValue: unknown, + expected: string, +): { type: TruthFindingType; evidence: Record } { + const evidence = { ui_claimed: expected, db_actual: dbValue }; + return { + type: normalizeScalar(dbValue) === normalizeScalar(expected) ? 'none' : 'false_pass', + evidence, + }; +} + +/** + * A query is safe for `verify truth` only if it's a single read — starts with SELECT or + * WITH and chains no further statements (a trailing `;` is fine). Guards against an + * agent-generated destructive query (`DELETE FROM …`, `…; UPDATE …`) running with the + * admin key. Not a full SQL parser, but it blocks the common destructive shapes. + */ +export function isReadOnlyQuery(query: string): boolean { + const q = query.trim(); + if (!/^(select|with)\b/i.test(q)) return false; + // No statement chaining beyond a single trailing semicolon. + if (q.replace(/;\s*$/, '').includes(';')) return false; + if (/\b(insert|update|delete|truncate|drop|alter|create|grant|revoke)\b/i.test(q)) return false; + return true; +} + +// ---- fetch wiring (not unit-tested; the verdicts above are) ---- + +function extractToken(j: unknown): string { + const obj = j as { accessToken?: string; data?: { accessToken?: string } }; + return obj?.accessToken ?? obj?.data?.accessToken ?? ''; +} + +function extractRows(j: unknown): unknown[] { + if (Array.isArray(j)) return j; + const obj = j as { data?: unknown[]; records?: unknown[]; rows?: unknown[] }; + return obj?.data ?? obj?.records ?? obj?.rows ?? []; +} + +/** Throw on a non-2xx response so a backend error (expired key, bad SQL, 500) isn't read + * as an empty/zero result — which would masquerade as a passing probe. */ +async function assertOk(res: Response, what: string): Promise { + if (res.ok) return; + const body = await res.text().catch(() => ''); + throw new Error(`${what} failed (HTTP ${res.status})${body ? `: ${body.slice(0, 200)}` : ''}`); +} + +export async function login(baseUrl: string, email: string, password: string): Promise { + const res = await fetch(`${baseUrl}/api/auth/sessions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ email, password }), + }); + await assertOk(res, `login (${email})`); + return extractToken(await res.json().catch(() => ({}))); +} + +export async function getAnonKey(baseUrl: string, adminKey: string): Promise { + const res = await fetch(`${baseUrl}/api/auth/tokens/anon`, { + method: 'POST', + headers: { Authorization: `Bearer ${adminKey}` }, + }); + await assertOk(res, 'anon-key fetch'); + return extractToken(await res.json().catch(() => ({}))); +} + +export async function rawsqlRows(baseUrl: string, adminKey: string, query: string): Promise { + const res = await fetch(`${baseUrl}/api/database/advance/rawsql`, { + method: 'POST', + headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${adminKey}` }, + body: JSON.stringify({ query, params: [] }), + }); + await assertOk(res, 'rawsql query'); + return extractRows(await res.json().catch(() => ({}))); +} + +/** Count rows from the data API. A 401/403 (RLS/auth blocked) counts as 0 rows — the + * expected "can't see it" result; any other non-2xx throws so a transport/server error + * isn't read as 0 rows (which would be a false isolation pass). */ +export async function recordsCount( + baseUrl: string, + table: string, + query: string | undefined, + token: string | undefined, + anon: string, +): Promise { + const url = `${baseUrl}/api/database/records/${encodeURIComponent(table)}${query ? `?${query}` : ''}`; + const headers: Record = { apikey: anon }; + if (token) headers.Authorization = `Bearer ${token}`; + const res = await fetch(url, { headers }); + if (res.status === 401 || res.status === 403) return 0; + await assertOk(res, `data API read (${table})`); + return extractRows(await res.json().catch(() => [])).length; +}