diff --git a/src/commands/deployments/deploy.ts b/src/commands/deployments/deploy.ts index d9ab838..a0f5992 100644 --- a/src/commands/deployments/deploy.ts +++ b/src/commands/deployments/deploy.ts @@ -53,6 +53,9 @@ const EXCLUDE_PATTERNS = [ '.cache', 'skills', 'coverage', + 'test-results', + 'playwright-report', + '.playwright-mcp', IGNORE_FILE_NAME, ]; diff --git a/src/commands/verify/finding.ts b/src/commands/verify/finding.ts new file mode 100644 index 0000000..7b6caa4 --- /dev/null +++ b/src/commands/verify/finding.ts @@ -0,0 +1,48 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { getProjectConfig } from '../../lib/config.js'; + +// Record a "loud" error the browser surfaced during the drive — a 4xx/5xx, a +// `column does not exist`, a console exception — that the agent saw via +// `browser_console_messages` / `browser_network_requests`. The rls/truth probes +// only cover the *silent* findings; this is how the loud ones reach PostHog too. +export function registerVerifyFindingCommand(verify: Command): void { + verify + .command('finding') + .description('Record a loud error surfaced during the drive (4xx/5xx, column-not-found, console) as a finding (experimental)') + .requiredOption('--kind ', 'short error kind, e.g. pgrst_column_not_found, http_500, console_error') + .option('--type ', 'finding type', 'error') + .option('--status ', 'HTTP status, if any', (v) => parseInt(v, 10)) + .option('--endpoint ', 'the endpoint/URL that errored') + .option('--message ', 'the error message the page showed') + .option('--table ', 'related table, if known') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + const finding = { + type: opts.type as string, + kind: opts.kind as string, + status: Number.isNaN(opts.status) ? undefined : (opts.status as number | undefined), + endpoint: opts.endpoint as string | undefined, + message: opts.message as string | undefined, + table: opts.table as string | undefined, + }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ recorded: true, finding }); + } else { + outputInfo( + `📝 recorded ${finding.type} finding: ${finding.kind}${finding.status ? ` (${finding.status})` : ''}${finding.message ? ` — ${finding.message}` : ''}`, + ); + } + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/commands/verify/index.ts b/src/commands/verify/index.ts new file mode 100644 index 0000000..5287425 --- /dev/null +++ b/src/commands/verify/index.ts @@ -0,0 +1,14 @@ +// src/commands/verify/index.ts +import type { Command } from 'commander'; +import { registerVerifyRlsCommand } from './rls.js'; +import { registerVerifyTruthCommand } from './truth.js'; +import { registerVerifyFindingCommand } from './finding.js'; + +export function registerVerifyCommands(program: Command): void { + const verify = program + .command('verify', { hidden: true }) + .description('[experimental] Backend-truth & RLS probes + loud-error recording for insforge-verify'); + registerVerifyRlsCommand(verify); + registerVerifyTruthCommand(verify); + registerVerifyFindingCommand(verify); +} diff --git a/src/commands/verify/rls.test.ts b/src/commands/verify/rls.test.ts new file mode 100644 index 0000000..5caae80 --- /dev/null +++ b/src/commands/verify/rls.test.ts @@ -0,0 +1,77 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { Command } from 'commander'; +import type * as VerifyProbe from '../../lib/verify-probe.js'; +import { registerVerifyRlsCommand } from './rls.js'; + +vi.mock('../../lib/config.js', () => ({ + getProjectConfig: vi.fn(() => ({ + project_id: 'p1', project_name: 'n', org_id: 'o1', region: 'us-east', + api_key: 'key', oss_host: 'https://h', + })), +})); +vi.mock('../../lib/api/oss.js', () => ({ + getAnonKey: vi.fn(async () => 'anon'), + runRawSql: vi.fn(async () => ({ rows: [{ id: 'aid' }] })), +})); +vi.mock('../../lib/analytics.js', () => ({ + trackVerifyFinding: vi.fn(), + shutdownAnalytics: vi.fn(async () => {}), +})); +// Keep the pure helpers (classifyRls / isSafeIdentifier / isLikelyEmail) real; mock the +// two network calls. +vi.mock('../../lib/verify-probe.js', async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, login: vi.fn(async () => 'token'), recordsCount: vi.fn(async () => 0) }; +}); + +function makeProgram() { + const program = new Command().exitOverride(); + program.option('--json'); + registerVerifyRlsCommand(program.command('verify')); + return program; +} + +describe('verify rls (command)', () => { + let exitSpy: ReturnType; + beforeEach(() => { + vi.clearAllMocks(); + process.exitCode = undefined; + exitSpy = vi.spyOn(process, 'exit').mockImplementation(((code?: number) => { + throw new Error(`exit:${code}`); + }) as never); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'log').mockImplementation(() => {}); + }); + afterEach(() => { + exitSpy.mockRestore(); + vi.restoreAllMocks(); + process.exitCode = undefined; + }); + + it('rejects an --owner that smuggles PostgREST params, before any login', async () => { + const { login } = await import('../../lib/verify-probe.js'); + await expect( + makeProgram().parseAsync(['verify', 'rls', '--table', 'orders', '--owner', 'user_id&select=secret', '--json'], { from: 'user' }), + ).rejects.toThrow(/exit:/); + expect(login).not.toHaveBeenCalled(); + }); + + it('rejects a non-email --user-a, before any login', async () => { + const { login } = await import('../../lib/verify-probe.js'); + await expect( + makeProgram().parseAsync(['verify', 'rls', '--table', 'orders', '--owner', 'user_id', '--user-a', 'not-an-email', '--json'], { from: 'user' }), + ).rejects.toThrow(/exit:/); + expect(login).not.toHaveBeenCalled(); + }); + + it('scopes the anonymous control to A\'s owner filter (not the whole table)', async () => { + const { recordsCount } = await import('../../lib/verify-probe.js'); + await makeProgram().parseAsync(['verify', 'rls', '--table', 'orders', '--owner', 'user_id', '--json'], { from: 'user' }); + // 3 probes: B-of-A, A-own, anon — all must use the same owner-scoped filter. + expect(recordsCount).toHaveBeenCalledTimes(3); + // The anon probe (3rd call) must pass the filter + no token, NOT undefined for the filter. + expect(recordsCount).toHaveBeenNthCalledWith( + 3, 'https://h', 'orders', expect.stringContaining('user_id=eq.'), undefined, 'anon', + ); + }); +}); diff --git a/src/commands/verify/rls.ts b/src/commands/verify/rls.ts new file mode 100644 index 0000000..735c66e --- /dev/null +++ b/src/commands/verify/rls.ts @@ -0,0 +1,87 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { getProjectConfig } from '../../lib/config.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { + classifyRls, + isLikelyEmail, + isSafeIdentifier, + login, + recordsCount, +} from '../../lib/verify-probe.js'; +import { getAnonKey, runRawSql } from '../../lib/api/oss.js'; + +export function registerVerifyRlsCommand(verify: Command): void { + verify + .command('rls') + .description('Cross-user RLS isolation probe — checks B cannot read A, A can read own (experimental)') + .requiredOption('--table ', 'user-scoped table to probe') + .requiredOption('--owner ', 'owner column on the table (e.g. user_id)') + .option('--user-a ', 'seeded user A email', 'verify-a@example.com') + .option('--user-b ', 'seeded user B email', 'verify-b@example.com') + .option('--password ', 'seeded users password', 'Test1234!pass') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + const baseUrl = config.oss_host; + + // --table/--owner are interpolated into a PostgREST resource path and filter; keep + // them to bare identifiers so a value like `user_id&select=secret` can't inject extra + // params. --user-a/-b go into a raw SQL lookup; require an email shape (the single- + // quote escaping below already blocks string-literal injection — this removes the rest). + if (!isSafeIdentifier(String(opts.table))) { + throw new CLIError(`--table must be a bare table name (got ${JSON.stringify(opts.table)}).`); + } + if (!isSafeIdentifier(String(opts.owner))) { + throw new CLIError(`--owner must be a bare column name (got ${JSON.stringify(opts.owner)}).`); + } + if (!isLikelyEmail(String(opts.userA)) || !isLikelyEmail(String(opts.userB))) { + throw new CLIError('--user-a and --user-b must be valid email addresses.'); + } + + const aToken = await login(baseUrl, opts.userA, opts.password); + const bToken = await login(baseUrl, opts.userB, opts.password); + const anon = await getAnonKey(); + if (!aToken || !bToken || !anon) { + throw new CLIError( + 'Login or anon-key fetch returned empty — seed BOTH users first. An empty token turns every probe into an anonymous request that silently "passes" isolation.', + ); + } + + const { rows } = await runRawSql( + `select id from auth.users where email='${String(opts.userA).replace(/'/g, "''")}'`, + ); + const aId = (rows[0] as { id?: string })?.id; + if (!aId) throw new CLIError(`Could not find user A (${opts.userA}) — seed it first.`); + + // All three probes use the SAME owner-scoped filter so we measure "can X read A's + // rows", not "can X read any row". Checking the whole table for the anon control would + // false-positive a leak on any table that intentionally exposes some public rows. + const filter = `${opts.owner}=eq.${encodeURIComponent(aId)}`; + const bReadRowsOfA = await recordsCount(baseUrl, opts.table, filter, bToken, anon); + const aReadOwnRows = await recordsCount(baseUrl, opts.table, filter, aToken, anon); + const anonReadRows = await recordsCount(baseUrl, opts.table, filter, undefined, anon); + + const { type, evidence } = classifyRls({ bReadRowsOfA, aReadOwnRows, anonReadRows }); + const finding = { type, table: opts.table as string, evidence }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ passed: type === 'none', finding }); + } else if (type === 'rls_leak') { + outputInfo(`❌ rls_leak on ${opts.table}: B read ${bReadRowsOfA} of A's rows (anon read ${anonReadRows}).`); + } else if (type === 'rls_overrestrict') { + outputInfo(`❌ rls_overrestrict on ${opts.table}: A could not read its own rows (positive control empty).`); + } else { + outputInfo(`✅ isolation holds on ${opts.table}: B=0, anon=0, A=${aReadOwnRows}.`); + } + process.exitCode = type === 'none' ? 0 : 1; + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/commands/verify/truth.test.ts b/src/commands/verify/truth.test.ts new file mode 100644 index 0000000..563e9dc --- /dev/null +++ b/src/commands/verify/truth.test.ts @@ -0,0 +1,83 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { Command } from 'commander'; +import { registerVerifyTruthCommand } from './truth.js'; + +vi.mock('../../lib/config.js', () => ({ + getProjectConfig: vi.fn(() => ({ + project_id: 'p1', project_name: 'n', org_id: 'o1', region: 'us-east', + api_key: 'key', oss_host: 'https://h', + })), +})); +vi.mock('../../lib/api/oss.js', () => ({ runRawSql: vi.fn() })); +vi.mock('../../lib/analytics.js', () => ({ + trackVerifyFinding: vi.fn(), + shutdownAnalytics: vi.fn(async () => {}), +})); + +function makeProgram() { + const program = new Command().exitOverride(); + program.option('--json'); + registerVerifyTruthCommand(program.command('verify')); + return program; +} + +describe('verify truth (command)', () => { + let exitSpy: ReturnType; + beforeEach(async () => { + vi.clearAllMocks(); + process.exitCode = undefined; + const { runRawSql } = await import('../../lib/api/oss.js'); + (runRawSql as ReturnType).mockResolvedValue({ rows: [] }); + exitSpy = vi.spyOn(process, 'exit').mockImplementation(((code?: number) => { + throw new Error(`exit:${code}`); + }) as never); + vi.spyOn(console, 'error').mockImplementation(() => {}); + vi.spyOn(console, 'log').mockImplementation(() => {}); + }); + afterEach(() => { + exitSpy.mockRestore(); + vi.restoreAllMocks(); + process.exitCode = undefined; + }); + + it('rejects a non-read query before touching the DB', async () => { + const { runRawSql } = await import('../../lib/api/oss.js'); + await expect( + makeProgram().parseAsync(['verify', 'truth', '--query', 'delete from t', '--expect', '1', '--json'], { from: 'user' }), + ).rejects.toThrow(/exit:/); + expect(runRawSql).not.toHaveBeenCalled(); + }); + + it('rejects when both --expect and --expect-count are given', async () => { + const { runRawSql } = await import('../../lib/api/oss.js'); + await expect( + makeProgram().parseAsync(['verify', 'truth', '--query', 'select 1', '--expect', '1', '--expect-count', '1', '--json'], { from: 'user' }), + ).rejects.toThrow(/exit:/); + expect(runRawSql).not.toHaveBeenCalled(); + }); + + it('rejects a non-integer --expect-count before touching the DB', async () => { + const { runRawSql } = await import('../../lib/api/oss.js'); + await expect( + makeProgram().parseAsync(['verify', 'truth', '--query', 'select count(*) from t', '--expect-count', 'abc', '--json'], { from: 'user' }), + ).rejects.toThrow(/exit:/); + expect(runRawSql).not.toHaveBeenCalled(); + }); + + it('passes (exit 0) + records & flushes a finding when DB matches the claim', async () => { + const oss = await import('../../lib/api/oss.js'); + (oss.runRawSql as ReturnType).mockResolvedValue({ rows: [{ n: 3 }] }); + await makeProgram().parseAsync(['verify', 'truth', '--query', 'select n', '--expect', '3', '--json'], { from: 'user' }); + expect(process.exitCode).toBe(0); + const { trackVerifyFinding, shutdownAnalytics } = await import('../../lib/analytics.js'); + expect(trackVerifyFinding).toHaveBeenCalledTimes(1); + expect(shutdownAnalytics).toHaveBeenCalled(); + }); + + it('flags false_pass (exit 1) when DB differs from the claim', async () => { + const oss = await import('../../lib/api/oss.js'); + (oss.runRawSql as ReturnType).mockResolvedValue({ rows: [{ n: 1 }] }); + await makeProgram().parseAsync(['verify', 'truth', '--query', 'select n', '--expect', '3', '--json'], { from: 'user' }); + expect(process.exitCode).toBe(1); + }); +}); diff --git a/src/commands/verify/truth.ts b/src/commands/verify/truth.ts new file mode 100644 index 0000000..c221043 --- /dev/null +++ b/src/commands/verify/truth.ts @@ -0,0 +1,73 @@ +import type { Command } from 'commander'; +import { CLIError, getRootOpts, handleError } from '../../lib/errors.js'; +import { getProjectConfig } from '../../lib/config.js'; +import { outputJson, outputInfo } from '../../lib/output.js'; +import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js'; +import { classifyTruth, isReadOnlyQuery } from '../../lib/verify-probe.js'; +import { runRawSql } from '../../lib/api/oss.js'; + +export function registerVerifyTruthCommand(verify: Command): void { + verify + .command('truth') + .description('Backend-truth cross-check — compare a DB read to what the UI claimed (experimental)') + .requiredOption('--query ', 'a read proving what the UI showed; compares the first column of the first row') + .option('--expect ', 'the value the UI displayed (compared as a scalar)') + .option('--expect-count ', 'expect this many rows instead of a scalar value') + .option('--table ', 'table name, for the finding label') + .action(async (opts, cmd) => { + const { json } = getRootOpts(cmd); + try { + const config = getProjectConfig(); + if (!config) throw new CLIError('No linked project found — run `insforge link` first.'); + if (!isReadOnlyQuery(opts.query)) { + throw new CLIError( + 'verify truth expects a single read query — it must start with SELECT or WITH and not chain statements. (This guard blocks common destructive forms, not a hard read-only guarantee — pass a plain read.)', + ); + } + // All input validation runs before any I/O — a bad flag must never fire the admin-key + // query (fast-fail). `--expect-count` is parsed/checked here, then reused after. + if (opts.expect !== undefined && opts.expectCount !== undefined) { + throw new CLIError('Provide either --expect or --expect-count , not both.'); + } + if (opts.expect === undefined && opts.expectCount === undefined) { + throw new CLIError('Provide --expect (scalar) or --expect-count (row count).'); + } + if (opts.expectCount !== undefined) { + const n = Number(opts.expectCount); + if (!Number.isInteger(n) || n < 0) { + throw new CLIError(`--expect-count must be a non-negative integer (got ${JSON.stringify(opts.expectCount)}).`); + } + } + + const { rows } = await runRawSql(opts.query); + + let result: { type: 'false_pass' | 'none'; evidence: Record }; + if (opts.expectCount !== undefined) { + // Compare as a number so `--expect-count 03` matches 3 rows (string compare wouldn't). + result = classifyTruth(rows.length, String(Number(opts.expectCount))); + } else { + const first = rows[0]; + const dbValue = + first && typeof first === 'object' ? Object.values(first as Record)[0] : first; + result = classifyTruth(dbValue, String(opts.expect)); + } + + const finding = { type: result.type, table: opts.table as string | undefined, evidence: result.evidence }; + trackVerifyFinding(finding, config); + await shutdownAnalytics(); // flush the PostHog event before exit + + if (json) { + outputJson({ passed: result.type === 'none', finding }); + } else if (result.type === 'false_pass') { + outputInfo( + `❌ false_pass${opts.table ? ` on ${opts.table}` : ''}: UI claimed ${JSON.stringify(result.evidence.ui_claimed)} but DB has ${JSON.stringify(result.evidence.db_actual)}.`, + ); + } else { + outputInfo(`✅ backend truth matches: ${JSON.stringify(result.evidence.db_actual)}.`); + } + process.exitCode = result.type === 'none' ? 0 : 1; + } catch (e) { + handleError(e, json); + } + }); +} diff --git a/src/index.ts b/src/index.ts index 0210186..32aab51 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { registerWhoamiCommand } from './commands/whoami.js'; import { registerOrgsCommands } from './commands/orgs/list.js'; import { registerProjectsCommands } from './commands/projects/list.js'; import { registerBranchCommands } from './commands/branch/index.js'; +import { registerVerifyCommands } from './commands/verify/index.js'; import { registerProjectLinkCommand } from './commands/projects/link.js'; import { registerDbCommands } from './commands/db/query.js'; import { registerDbTablesCommand } from './commands/db/tables.js'; @@ -135,6 +136,9 @@ registerProjectsCommands(projectsCmd); // Branch commands registerBranchCommands(program); +// Verify probe commands (experimental, hidden from --help) +registerVerifyCommands(program); + // Database commands const dbCmd = program.command('db').description('Database operations'); registerDbCommands(dbCmd); diff --git a/src/lib/analytics.ts b/src/lib/analytics.ts index 824a89a..046723b 100644 --- a/src/lib/analytics.ts +++ b/src/lib/analytics.ts @@ -128,3 +128,47 @@ export async function shutdownAnalytics(): Promise { // ignore } } + +export interface VerifyFinding { + type: string; + table?: string; + kind?: string; + status?: number; + endpoint?: string; + message?: string; + evidence?: Record; +} + +/** + * Emit a verify finding to PostHog — the central, cross-user rail (finding rate + what + * broke), same as the other track* helpers here. NOT the per-project `oss_host/api/usage/mcp` + * table, which only stores `(tool_name, success)` and drops the finding. The recording lives + * in the tool — a finding is recorded because the probe ran, not because the agent remembered + * to. Best-effort; the caller flushes via `shutdownAnalytics()` before exit. + */ +// Only structured, non-free-text fields reach PostHog (DEVELOPMENT.md telemetry guidance: +// never send user-entered free text). The agent-supplied `endpoint`/`message` are dropped +// entirely — even sanitized, they can leak params/emails/tokens. The PII-bearing evidence +// keys (`db_actual`/`ui_claimed`, the raw DB value the UI claimed) are filtered out too; RLS +// evidence (row counts) is not sensitive and stays. `endpoint`/`message` remain on the local +// `--json` finding for the caller — they're just never transmitted. +const SENSITIVE_EVIDENCE_KEYS = new Set(['db_actual', 'ui_claimed']); + +export function trackVerifyFinding(finding: VerifyFinding, config: ProjectConfig): void { + const safeEvidence = Object.fromEntries( + Object.entries(finding.evidence ?? {}).filter(([k]) => !SENSITIVE_EVIDENCE_KEYS.has(k)), + ); + captureEvent(config.project_id, 'cli_verify_finding', { + ...safeEvidence, + finding_type: finding.type, + passed: finding.type === 'none', + table: finding.table, + kind: finding.kind, + status: finding.status, + project_id: config.project_id, + project_name: config.project_name, + org_id: config.org_id, + region: config.region, + oss_mode: config.project_id === FAKE_PROJECT_ID, + }); +} diff --git a/src/lib/browser-mcp.test.ts b/src/lib/browser-mcp.test.ts new file mode 100644 index 0000000..0be62c4 --- /dev/null +++ b/src/lib/browser-mcp.test.ts @@ -0,0 +1,109 @@ +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { ensureCodexToml, mergeJsonMcp } from './browser-mcp.js'; + +const HEADLESS_SERVER = { + command: 'npx', + args: ['@playwright/mcp@latest', '--headless'], +}; + +describe('mergeJsonMcp', () => { + let dir: string; + let file: string; + const read = () => JSON.parse(readFileSync(file, 'utf-8')); + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'insforge-mcp-')); + file = join(dir, '.cursor', 'mcp.json'); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it('creates the file (and parent dirs) with the server under mcpServers', () => { + expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(true); + expect(read().mcpServers['playwright']).toEqual(HEADLESS_SERVER); + }); + + it('merges without clobbering other servers', () => { + writeFileSync(join(dir, 'cfg.json'), JSON.stringify({ mcpServers: { other: { command: 'x' } } })); + expect(mergeJsonMcp(join(dir, 'cfg.json'), 'mcpServers', HEADLESS_SERVER)).toBe(true); + const cfg = JSON.parse(readFileSync(join(dir, 'cfg.json'), 'utf-8')); + expect(cfg.mcpServers.other).toEqual({ command: 'x' }); + expect(cfg.mcpServers['playwright']).toBeDefined(); + }); + + it('is idempotent — returns false when already present and identical', () => { + mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER); + expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(false); + }); + + it('recovers from malformed JSON by starting fresh', () => { + const bad = join(dir, 'bad.json'); + writeFileSync(bad, '{ not valid json'); + expect(mergeJsonMcp(bad, 'mcpServers', HEADLESS_SERVER)).toBe(true); + expect(JSON.parse(readFileSync(bad, 'utf-8')).mcpServers['playwright']).toBeDefined(); + }); + + it('supports the VS Code `servers` key', () => { + expect(mergeJsonMcp(file, 'servers', HEADLESS_SERVER)).toBe(true); + expect(read().servers['playwright']).toEqual(HEADLESS_SERVER); + }); + + it('starts fresh on valid-but-non-object JSON (array / null / primitive)', () => { + for (const bad of ['[1,2,3]', 'null', '"a string"', '42']) { + const f = join(dir, `${bad.replace(/\W/g, '')}.json`); + writeFileSync(f, bad); + expect(mergeJsonMcp(f, 'mcpServers', HEADLESS_SERVER)).toBe(true); + // No crash, no silent loss — server is written under a fresh object. + expect(JSON.parse(readFileSync(f, 'utf-8')).mcpServers['playwright']).toEqual(HEADLESS_SERVER); + } + }); + + it('normalizes a non-object value under the section key (e.g. {"mcpServers": "x"})', () => { + for (const badSection of ['"x"', '[1,2]', '5']) { + const f = join(dir, `section${badSection.replace(/\W/g, '')}.json`); + writeFileSync(f, `{"mcpServers": ${badSection}}`); + // Would otherwise throw (assigning a prop on a string in strict ESM) or drop the server. + expect(mergeJsonMcp(f, 'mcpServers', HEADLESS_SERVER)).toBe(true); + expect(JSON.parse(readFileSync(f, 'utf-8')).mcpServers['playwright']).toEqual(HEADLESS_SERVER); + } + }); +}); + +describe('ensureCodexToml', () => { + let dir: string; + let file: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'insforge-codex-')); + file = join(dir, '.codex', 'config.toml'); + }); + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it('appends a [mcp_servers.playwright] block when absent', () => { + expect(ensureCodexToml(file)).toBe(true); + const toml = readFileSync(file, 'utf-8'); + expect(toml).toContain('[mcp_servers.playwright]'); + expect(toml).toContain('command = "npx"'); + expect(toml).toContain('"--headless"'); + }); + + it('is idempotent — returns false when the block already exists', () => { + ensureCodexToml(file); + expect(ensureCodexToml(file)).toBe(false); + }); + + it('preserves existing TOML content', () => { + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, '[some_other_section]\nkey = "value"\n'); + expect(ensureCodexToml(file)).toBe(true); + const toml = readFileSync(file, 'utf-8'); + expect(toml).toContain('[some_other_section]'); + expect(toml).toContain('[mcp_servers.playwright]'); + }); +}); diff --git a/src/lib/browser-mcp.ts b/src/lib/browser-mcp.ts new file mode 100644 index 0000000..6004c77 --- /dev/null +++ b/src/lib/browser-mcp.ts @@ -0,0 +1,140 @@ +import { exec } from 'node:child_process'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { promisify } from 'node:util'; + +const execAsync = promisify(exec); + +const MCP_CONFIG_TIMEOUT_MS = 60_000; + +// `@playwright/mcp` is the browser-automation MCP (browser_navigate/click/snapshot + +// console/network tools) the light-mode `insforge-verify` skill drives directly — NOT +// `run-test-mcp-server`, which is the Test Agents (planner/generator) pipeline and has no +// browser_* tools. +const MCP_SERVER_NAME = 'playwright'; +const MCP_COMMAND = 'npx'; +const MCP_ARGS = ['@playwright/mcp@latest', '--headless']; + +/** + * Merge the Playwright MCP server into a JSON MCP config (user/global scope), + * returning true if it changed the file. `key` is the top-level object servers live + * under — `mcpServers` for Cursor/Windsurf/Gemini, `servers` for VS Code. Malformed + * JSON is replaced rather than crashing the link. + */ +export function mergeJsonMcp( + file: string, + key: 'mcpServers' | 'servers', + server: Record, +): boolean { + let config: Record> = {}; + if (existsSync(file)) { + try { + const parsed = JSON.parse(readFileSync(file, 'utf-8')); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + config = parsed as typeof config; + } + } catch { + config = {}; + } + } + const section = config[key]; + if (!section || typeof section !== 'object' || Array.isArray(section)) config[key] = {}; + if (JSON.stringify(config[key][MCP_SERVER_NAME]) === JSON.stringify(server)) return false; + config[key][MCP_SERVER_NAME] = server; + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, `${JSON.stringify(config, null, 2)}\n`); + return true; +} + +/** Append a `[mcp_servers.playwright]` block to Codex's global TOML config if absent. */ +export function ensureCodexToml(file: string): boolean { + const existing = existsSync(file) ? readFileSync(file, 'utf-8') : ''; + if (existing.includes(`[mcp_servers.${MCP_SERVER_NAME}]`)) return false; + const args = MCP_ARGS.map((a) => `"${a}"`).join(', '); + const block = `\n[mcp_servers.${MCP_SERVER_NAME}]\ncommand = "${MCP_COMMAND}"\nargs = [${args}]\n`; + mkdirSync(dirname(file), { recursive: true }); + writeFileSync(file, existing + block); + return true; +} + +async function commandExists(cmd: string): Promise { + const check = process.platform === 'win32' ? `where ${cmd}` : `command -v ${cmd}`; + return execAsync(check).then( + () => true, + () => false, + ); +} + +/** + * One agent's recipe for registering the browser MCP at user/global scope — mirroring + * how `skills add -a -g` delegates skill placement per agent. `apply` returns a + * label of what it configured, or null if the agent isn't present (skip it). Add an + * agent by adding one entry — no other call site changes (cf. AGENT_FLAGS). + */ +interface BrowserMcpTarget { + agent: string; + apply: (home: string) => Promise; +} + +const JSON_MCP_SERVER = { command: MCP_COMMAND, args: MCP_ARGS }; + +const BROWSER_MCP_TARGETS: BrowserMcpTarget[] = [ + { + // Claude Code: delegate to its own CLI at user scope (global across projects), + // exactly like the skills install delegates placement. Idempotent + quiet on repeat + // links: skip if already configured. Skipped if the `claude` CLI isn't on PATH. + agent: 'Claude Code', + apply: async () => { + if (!(await commandExists('claude'))) return null; + const present = await execAsync(`claude mcp get ${MCP_SERVER_NAME}`) + .then(() => true) + .catch(() => false); + if (present) return null; + await execAsync( + `claude mcp add ${MCP_SERVER_NAME} -s user -- ${MCP_COMMAND} ${MCP_ARGS.join(' ')}`, + { timeout: MCP_CONFIG_TIMEOUT_MS }, + ); + return 'user scope'; + }, + }, + { + // Cursor: no CLI — write its global config file, only if Cursor is set up. + agent: 'Cursor', + apply: async (home) => { + if (!existsSync(join(home, '.cursor'))) return null; + return mergeJsonMcp(join(home, '.cursor', 'mcp.json'), 'mcpServers', JSON_MCP_SERVER) + ? '~/.cursor/mcp.json' + : null; + }, + }, + { + // Codex: global TOML, only if Codex is set up. + agent: 'Codex', + apply: async (home) => { + if (!existsSync(join(home, '.codex'))) return null; + return ensureCodexToml(join(home, '.codex', 'config.toml')) ? '~/.codex/config.toml' : null; + }, + }, +]; + +/** + * Configure the Playwright browser MCP at user/global scope for whichever agents + * are present, so light-mode `insforge-verify` can drive the browser. Global to match + * how the InsForge skills install (`skills add … -g`); the server command is identical + * across agents — only where/how it's registered differs. No network beyond each agent's + * own CLI, no LLM, no subagents (the user's agent is the driving brain). Returns a label + * per agent configured. Best-effort: one agent failing never blocks the others. + */ +export async function configureBrowserMcp(home = homedir()): Promise { + const configured: string[] = []; + for (const target of BROWSER_MCP_TARGETS) { + try { + const label = await target.apply(home); + if (label) configured.push(`${target.agent} (${label})`); + } catch { + // best-effort per agent + } + } + return configured; +} diff --git a/src/lib/skills.ts b/src/lib/skills.ts index 913fcc8..abc1cb6 100644 --- a/src/lib/skills.ts +++ b/src/lib/skills.ts @@ -4,6 +4,7 @@ import { join } from 'node:path'; import { promisify } from 'node:util'; import * as clack from '@clack/prompts'; import { writeLocalAgentsMd } from './agents-md.js'; +import { configureBrowserMcp } from './browser-mcp.js'; import { getProjectConfig } from './config.js'; const execAsync = promisify(exec); @@ -84,7 +85,11 @@ const PROVIDER_SKILLS: Record = { 'better-auth': { repo: 'better-auth/skills', label: 'Better Auth skills' }, }; -export async function installSkills(json: boolean, authProvider?: string): Promise { +export async function installSkills( + json: boolean, + authProvider?: string, + withBrowserMcp = true, +): Promise { try { if (!json) clack.log.info('Installing InsForge agent skills (global)...'); await execAsync(`npx skills add insforge/agent-skills -g -y ${AGENT_FLAGS}`, { @@ -151,6 +156,36 @@ export async function installSkills(json: boolean, authProvider?: string): Promi } catch { // non-critical, silently ignore } + + // Opt-in: configure the Playwright browser MCP (`@playwright/mcp`) so the `insforge-verify` + // skill can drive the UI directly (light mode — no spec-generation subagents). + // This only declares the MCP server in `.mcp.json`; the driving "brain" is the + // user's own agent, so it stays agent-agnostic and needs no extra LLM key. The + // server loads at session start like any MCP config, so we do it at link time. + if (withBrowserMcp) { + try { + const configured = await configureBrowserMcp(); + if (!json) { + if (configured.length) { + clack.log.success(`Configured the Playwright browser MCP for: ${configured.join(', ')}.`); + clack.log.warn( + 'Restart your agent (or reload MCP servers) so the browser tools load before verifying.', + ); + } else { + clack.log.info( + 'No supported agent found to auto-configure the browser MCP. Add it manually — Claude: `claude mcp add playwright -s user -- npx @playwright/mcp@latest --headless`.', + ); + } + } + } catch (err) { + if (!json) { + clack.log.warn(`Could not configure the browser MCP: ${describeExecError(err)}`); + clack.log.info( + 'Add a `playwright` MCP server to your agent manually (command: `npx @playwright/mcp@latest --headless`).', + ); + } + } + } } export async function reportCliUsage( diff --git a/src/lib/verify-probe.test.ts b/src/lib/verify-probe.test.ts new file mode 100644 index 0000000..b4b1ca6 --- /dev/null +++ b/src/lib/verify-probe.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, it } from 'vitest'; +import { classifyRls, classifyTruth, isLikelyEmail, isReadOnlyQuery, isSafeIdentifier } from './verify-probe.js'; + +describe('classifyRls', () => { + it('flags rls_leak when B reads any of A\'s rows', () => { + const r = classifyRls({ bReadRowsOfA: 3, aReadOwnRows: 5, anonReadRows: 0 }); + expect(r.type).toBe('rls_leak'); + expect(r.evidence.user_b_read_rows_of_a).toBe(3); + }); + + it('flags rls_leak when anonymous reads any rows', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 5, anonReadRows: 2 }).type).toBe('rls_leak'); + }); + + it('flags rls_overrestrict when A cannot read its own rows (positive control empty)', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 0, anonReadRows: 0 }).type).toBe('rls_overrestrict'); + }); + + it('passes (none) when B=0, anon=0, and A sees its own rows', () => { + expect(classifyRls({ bReadRowsOfA: 0, aReadOwnRows: 5, anonReadRows: 0 }).type).toBe('none'); + }); + + it('prioritises a real leak over the positive-control check', () => { + // B leaks AND A sees nothing — the leak is the more severe finding to surface + expect(classifyRls({ bReadRowsOfA: 4, aReadOwnRows: 0, anonReadRows: 0 }).type).toBe('rls_leak'); + }); +}); + +describe('classifyTruth', () => { + it('flags false_pass when the DB value differs from what the UI claimed', () => { + const r = classifyTruth(1, '3'); + expect(r.type).toBe('false_pass'); + expect(r.evidence).toEqual({ ui_claimed: '3', db_actual: 1 }); + }); + + it('passes when the DB value matches (number vs string normalised)', () => { + expect(classifyTruth(3, '3').type).toBe('none'); + expect(classifyTruth('3', '3').type).toBe('none'); + expect(classifyTruth(' 3 ', '3').type).toBe('none'); + }); + + it('treats null/undefined as empty and mismatching a non-empty expectation', () => { + expect(classifyTruth(null, '3').type).toBe('false_pass'); + expect(classifyTruth(undefined, '0').type).toBe('false_pass'); + }); + + it('passes when both sides are empty', () => { + expect(classifyTruth(null, '').type).toBe('none'); + }); +}); + +describe('isReadOnlyQuery', () => { + it('allows SELECT / WITH (any case, leading whitespace, trailing semicolon)', () => { + expect(isReadOnlyQuery('select 1')).toBe(true); + expect(isReadOnlyQuery('SELECT * FROM t')).toBe(true); + expect(isReadOnlyQuery(' with c as (select 1) select * from c')).toBe(true); + expect(isReadOnlyQuery('select 1;')).toBe(true); + }); + + it('rejects writes / DDL', () => { + expect(isReadOnlyQuery('delete from users')).toBe(false); + expect(isReadOnlyQuery('UPDATE accounts SET balance = 0')).toBe(false); + expect(isReadOnlyQuery('insert into t values (1)')).toBe(false); + expect(isReadOnlyQuery('drop table t')).toBe(false); + }); + + it('rejects statement chaining', () => { + expect(isReadOnlyQuery('select 1; delete from users')).toBe(false); + expect(isReadOnlyQuery('select 1; update t set x = 1')).toBe(false); + }); + + it('rejects DML hidden inside a CTE (WITH … DELETE/UPDATE/INSERT/MERGE … SELECT)', () => { + expect(isReadOnlyQuery('with x as (delete from users returning id) select id from x')).toBe(false); + expect(isReadOnlyQuery('WITH x AS (UPDATE t SET c = 1 RETURNING id) SELECT * FROM x')).toBe(false); + expect(isReadOnlyQuery('with x as (insert into t values (1) returning id) select id from x')).toBe(false); + expect(isReadOnlyQuery('with x as (merge into t using s on t.id = s.id returning t.id) select id from x')).toBe(false); + }); + + it('rejects SELECT … INTO because it creates a table', () => { + expect(isReadOnlyQuery('select * into evil_copy from auth.users')).toBe(false); + expect(isReadOnlyQuery('SELECT id INTO TEMP t FROM users')).toBe(false); + expect(isReadOnlyQuery('with x as (select 1) select * into newtbl from x')).toBe(false); + }); +}); + +describe('isSafeIdentifier', () => { + it('allows bare table/column names', () => { + expect(isSafeIdentifier('user_id')).toBe(true); + expect(isSafeIdentifier('_private')).toBe(true); + expect(isSafeIdentifier('Orders2')).toBe(true); + }); + + it('rejects PostgREST-parameter / injection shapes', () => { + expect(isSafeIdentifier('user_id&select=secret')).toBe(false); + expect(isSafeIdentifier('user_id=eq.1')).toBe(false); + expect(isSafeIdentifier('2cols')).toBe(false); + expect(isSafeIdentifier('a b')).toBe(false); + expect(isSafeIdentifier('')).toBe(false); + }); +}); + +describe('isLikelyEmail', () => { + it('allows ordinary emails', () => { + expect(isLikelyEmail('verify-a@example.com')).toBe(true); + expect(isLikelyEmail('a.b+tag@sub.domain.io')).toBe(true); + }); + + it('rejects quotes / spaces / chaining used for SQL injection', () => { + expect(isLikelyEmail("x' OR '1'='1")).toBe(false); + expect(isLikelyEmail('a@b.com; drop table users')).toBe(false); + expect(isLikelyEmail('not-an-email')).toBe(false); + expect(isLikelyEmail('a @b.com')).toBe(false); + }); +}); diff --git a/src/lib/verify-probe.ts b/src/lib/verify-probe.ts new file mode 100644 index 0000000..f434001 --- /dev/null +++ b/src/lib/verify-probe.ts @@ -0,0 +1,145 @@ +// Deterministic verify probes for `insforge verify rls/truth`. +// +// The verdict logic is pure + unit-tested; the fetch wiring is a thin layer on +// top. Findings are emitted via `trackVerifyFinding` (src/lib/analytics.ts) so the +// recording is in the tool, not in agent prose. + +export type RlsFindingType = 'rls_leak' | 'rls_overrestrict' | 'none'; +export type TruthFindingType = 'false_pass' | 'none'; + +/** + * Classify a cross-user RLS isolation probe from its row counts. Deterministic: + * - B reading A's rows (or anon reading any) -> rls_leak + * - A failing to read its own rows (positive control empty) -> rls_overrestrict + * (catches a policy that silently empties a real user's data — the break no + * scanner catches, since it returns 200 + []) + */ +export function classifyRls(input: { + bReadRowsOfA: number; + aReadOwnRows: number; + anonReadRows: number; +}): { type: RlsFindingType; evidence: Record } { + const evidence = { + user_b_read_rows_of_a: input.bReadRowsOfA, + user_a_read_own_rows: input.aReadOwnRows, + anon_read_rows: input.anonReadRows, + }; + if (input.bReadRowsOfA > 0) return { type: 'rls_leak', evidence }; + if (input.anonReadRows > 0) return { type: 'rls_leak', evidence }; + if (input.aReadOwnRows === 0) return { type: 'rls_overrestrict', evidence }; + return { type: 'none', evidence }; +} + +function normalizeScalar(v: unknown): string { + if (v === null || v === undefined) return ''; + return String(v).trim(); +} + +/** + * Classify a backend-truth check. The UI claimed `expected`; the DB returned + * `dbValue`. A mismatch is a false pass (the write returned 200 + optimistic UI + * but never persisted, or persisted the wrong value). Compared as normalized + * scalars so `3` and `"3"` agree. + */ +export function classifyTruth( + dbValue: unknown, + expected: string, +): { type: TruthFindingType; evidence: Record } { + const evidence = { ui_claimed: expected, db_actual: dbValue }; + return { + type: normalizeScalar(dbValue) === normalizeScalar(expected) ? 'none' : 'false_pass', + evidence, + }; +} + +/** + * Best-effort guard that rejects the obvious destructive shapes before a `verify truth` + * query runs with the admin key: it must start with SELECT/WITH, chain no further statements + * (a trailing `;` is fine), and contain no DML/DDL keyword (`into` included — `SELECT … INTO + * new_table` creates a table while still starting with SELECT). + * + * This is NOT a real read-only guarantee. A side-effecting function call hidden behind a + * SELECT — `SELECT setval(…)`, `SELECT pg_terminate_backend(…)` — passes the keyword scan and + * still executes, because the query runs with the project admin key. A true guarantee needs a + * server-side read-only transaction; until that lands, this only blocks the common + * destructive *statement* forms, so callers must not treat it as a sandbox. + */ +export function isReadOnlyQuery(query: string): boolean { + const q = query.trim(); + if (!/^(select|with)\b/i.test(q)) return false; + // No statement chaining beyond a single trailing semicolon. + if (q.replace(/;\s*$/, '').includes(';')) return false; + if (/\b(insert|update|delete|merge|truncate|drop|alter|create|grant|revoke|into)\b/i.test(q)) return false; + return true; +} + +export function isSafeIdentifier(s: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(s); +} + +export function isLikelyEmail(s: string): boolean { + return /^[^\s'";@]+@[^\s'";@]+\.[^\s'";@]+$/.test(s); +} + +// ---- fetch wiring (not unit-tested; the verdicts above are) ---- + +const REQUEST_TIMEOUT_MS = 30_000; + +async function fetchWithTimeout(url: string, init: RequestInit = {}): Promise { + const ac = new AbortController(); + const timer = setTimeout(() => ac.abort(), REQUEST_TIMEOUT_MS); + try { + return await fetch(url, { ...init, signal: ac.signal }); + } finally { + clearTimeout(timer); + } +} + +function extractToken(j: unknown): string { + const obj = j as { accessToken?: string; data?: { accessToken?: string } }; + return obj?.accessToken ?? obj?.data?.accessToken ?? ''; +} + +function extractRows(j: unknown): unknown[] { + if (Array.isArray(j)) return j; + const obj = j as { data?: unknown[]; records?: unknown[]; rows?: unknown[] }; + return obj?.data ?? obj?.records ?? obj?.rows ?? []; +} + +/** Throw on a non-2xx response so a backend error (expired key, bad SQL, 500) isn't read + * as an empty/zero result — which would masquerade as a passing probe. */ +async function assertOk(res: Response, what: string): Promise { + if (res.ok) return; + const body = await res.text().catch(() => ''); + throw new Error(`${what} failed (HTTP ${res.status})${body ? `: ${body.slice(0, 200)}` : ''}`); +} + +export async function login(baseUrl: string, email: string, password: string): Promise { + const res = await fetchWithTimeout(`${baseUrl}/api/auth/sessions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ email, password }), + }); + await assertOk(res, `login (${email})`); + return extractToken(await res.json().catch(() => ({}))); +} + + +/** Count rows from the data API. A 401/403 (RLS/auth blocked) counts as 0 rows — the + * expected "can't see it" result; any other non-2xx throws so a transport/server error + * isn't read as 0 rows (which would be a false isolation pass). */ +export async function recordsCount( + baseUrl: string, + table: string, + query: string | undefined, + token: string | undefined, + anon: string, +): Promise { + const url = `${baseUrl}/api/database/records/${encodeURIComponent(table)}${query ? `?${query}` : ''}`; + const headers: Record = { apikey: anon }; + if (token) headers.Authorization = `Bearer ${token}`; + const res = await fetchWithTimeout(url, { headers }); + if (res.status === 401 || res.status === 403) return 0; + await assertOk(res, `data API read (${table})`); + return extractRows(await res.json().catch(() => [])).length; +}