InsForge · CarmenDou · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026 · Jun 13, 2026
diff --git a/src/commands/deployments/deploy.ts b/src/commands/deployments/deploy.ts
@@ -53,6 +53,9 @@ const EXCLUDE_PATTERNS = [
   '.cache',
   'skills',
   'coverage',
+  'test-results',
+  'playwright-report',
+  '.playwright-mcp',
   IGNORE_FILE_NAME,
 ];
 

diff --git a/src/commands/verify/finding.ts b/src/commands/verify/finding.ts
@@ -0,0 +1,48 @@
+import type { Command } from 'commander';
+import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
+import { outputJson, outputInfo } from '../../lib/output.js';
+import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
+import { getProjectConfig } from '../../lib/config.js';
+
+// Record a "loud" error the browser surfaced during the drive — a 4xx/5xx, a
+// `column does not exist`, a console exception — that the agent saw via
+// `browser_console_messages` / `browser_network_requests`. The rls/truth probes
+// only cover the *silent* findings; this is how the loud ones reach PostHog too.
+export function registerVerifyFindingCommand(verify: Command): void {
+  verify
+    .command('finding')
+    .description('Record a loud error surfaced during the drive (4xx/5xx, column-not-found, console) as a finding (experimental)')
+    .requiredOption('--kind <kind>', 'short error kind, e.g. pgrst_column_not_found, http_500, console_error')
+    .option('--type <type>', 'finding type', 'error')
+    .option('--status <n>', 'HTTP status, if any', (v) => parseInt(v, 10))
+    .option('--endpoint <path>', 'the endpoint/URL that errored')
+    .option('--message <text>', 'the error message the page showed')
+    .option('--table <name>', 'related table, if known')
+    .action(async (opts, cmd) => {
+      const { json } = getRootOpts(cmd);
+      try {
+        const config = getProjectConfig();
+        if (!config) throw new CLIError('No linked project found — run `insforge link` first.');
+        const finding = {
+          type: opts.type as string,
+          kind: opts.kind as string,
+          status: Number.isNaN(opts.status) ? undefined : (opts.status as number | undefined),
+          endpoint: opts.endpoint as string | undefined,
+          message: opts.message as string | undefined,
+          table: opts.table as string | undefined,
+        };
+        trackVerifyFinding(finding, config);
+        await shutdownAnalytics(); // flush the PostHog event before exit
+
+        if (json) {
+          outputJson({ recorded: true, finding });
+        } else {
+          outputInfo(
+            `📝 recorded ${finding.type} finding: ${finding.kind}${finding.status ? ` (${finding.status})` : ''}${finding.message ? ` — ${finding.message}` : ''}`,
+          );
+        }
+      } catch (e) {
+        handleError(e, json);
+      }
+    });
+}
diff --git a/src/commands/verify/index.ts b/src/commands/verify/index.ts
@@ -0,0 +1,14 @@
+// src/commands/verify/index.ts
+import type { Command } from 'commander';
+import { registerVerifyRlsCommand } from './rls.js';
+import { registerVerifyTruthCommand } from './truth.js';
+import { registerVerifyFindingCommand } from './finding.js';
+
+export function registerVerifyCommands(program: Command): void {
+  const verify = program
+    .command('verify', { hidden: true })
+    .description('[experimental] Backend-truth & RLS probes + loud-error recording for insforge-verify');
+  registerVerifyRlsCommand(verify);
+  registerVerifyTruthCommand(verify);
+  registerVerifyFindingCommand(verify);
+}
diff --git a/src/commands/verify/rls.ts b/src/commands/verify/rls.ts
@@ -0,0 +1,88 @@
+import type { Command } from 'commander';
+import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
+import { getProjectConfig } from '../../lib/config.js';
+import { outputJson, outputInfo } from '../../lib/output.js';
+import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
+import {
+  classifyRls,
+  getAnonKey,
+  isLikelyEmail,
+  isSafeIdentifier,
+  login,
+  rawsqlRows,
+  recordsCount,
+} from '../../lib/verify-probe.js';
+
+export function registerVerifyRlsCommand(verify: Command): void {
+  verify
+    .command('rls')
+    .description('Cross-user RLS isolation probe — checks B cannot read A, A can read own (experimental)')
+    .requiredOption('--table <name>', 'user-scoped table to probe')
+    .requiredOption('--owner <column>', 'owner column on the table (e.g. user_id)')
+    .option('--user-a <email>', 'seeded user A email', 'verify-a@example.com')
+    .option('--user-b <email>', 'seeded user B email', 'verify-b@example.com')
+    .option('--password <pw>', 'seeded users password', 'Test1234!pass')
+    .action(async (opts, cmd) => {
+      const { json } = getRootOpts(cmd);
+      try {
+        const config = getProjectConfig();
+        if (!config) throw new CLIError('No linked project found — run `insforge link` first.');
+        const baseUrl = config.oss_host;
+        const adminKey = config.api_key;
+
+        // --table/--owner are interpolated into a PostgREST resource path and filter; keep
+        // them to bare identifiers so a value like `user_id&select=secret` can't inject extra
+        // params. --user-a/-b go into a raw SQL lookup; require an email shape (the single-
+        // quote escaping below already blocks string-literal injection — this removes the rest).
+        if (!isSafeIdentifier(String(opts.table))) {
+          throw new CLIError(`--table must be a bare table name (got ${JSON.stringify(opts.table)}).`);
+        }
+        if (!isSafeIdentifier(String(opts.owner))) {
+          throw new CLIError(`--owner must be a bare column name (got ${JSON.stringify(opts.owner)}).`);
+        }
+        if (!isLikelyEmail(String(opts.userA)) || !isLikelyEmail(String(opts.userB))) {
+          throw new CLIError('--user-a and --user-b must be valid email addresses.');
+        }
+
+        const aToken = await login(baseUrl, opts.userA, opts.password);
+        const bToken = await login(baseUrl, opts.userB, opts.password);
+        const anon = await getAnonKey(baseUrl, adminKey);
+        if (!aToken || !bToken || !anon) {
+          throw new CLIError(
+            'Login or anon-key fetch returned empty — seed BOTH users first. An empty token turns every probe into an anonymous request that silently "passes" isolation.',
+          );
+        }
+
+        const rows = await rawsqlRows(
+          baseUrl,
+          adminKey,
+          `select id from auth.users where email='${String(opts.userA).replace(/'/g, "''")}'`,
+        );
+        const aId = (rows[0] as { id?: string })?.id;
+        if (!aId) throw new CLIError(`Could not find user A (${opts.userA}) — seed it first.`);
+
+        const filter = `${opts.owner}=eq.${encodeURIComponent(aId)}`;
+        const bReadRowsOfA = await recordsCount(baseUrl, opts.table, filter, bToken, anon);
+        const aReadOwnRows = await recordsCount(baseUrl, opts.table, filter, aToken, anon);
+        const anonReadRows = await recordsCount(baseUrl, opts.table, undefined, undefined, anon);
+
+        const { type, evidence } = classifyRls({ bReadRowsOfA, aReadOwnRows, anonReadRows });
+        const finding = { type, table: opts.table as string, evidence };
+        trackVerifyFinding(finding, config);
+        await shutdownAnalytics(); // flush the PostHog event before exit
+
+        if (json) {
+          outputJson({ passed: type === 'none', finding });
+        } else if (type === 'rls_leak') {
+          outputInfo(`❌ rls_leak on ${opts.table}: B read ${bReadRowsOfA} of A's rows (anon read ${anonReadRows}).`);
+        } else if (type === 'rls_overrestrict') {
+          outputInfo(`❌ rls_overrestrict on ${opts.table}: A could not read its own rows (positive control empty).`);
+        } else {
+          outputInfo(`✅ isolation holds on ${opts.table}: B=0, anon=0, A=${aReadOwnRows}.`);
+        }
+        process.exitCode = type === 'none' ? 0 : 1;
+      } catch (e) {
+        handleError(e, json);
+      }
+    });
+}
diff --git a/src/commands/verify/truth.ts b/src/commands/verify/truth.ts
@@ -0,0 +1,62 @@
+import type { Command } from 'commander';
+import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
+import { getProjectConfig } from '../../lib/config.js';
+import { outputJson, outputInfo } from '../../lib/output.js';
+import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
+import { classifyTruth, isReadOnlyQuery, rawsqlRows } from '../../lib/verify-probe.js';
+
+export function registerVerifyTruthCommand(verify: Command): void {
+  verify
+    .command('truth')
+    .description('Backend-truth cross-check — compare a DB read to what the UI claimed (experimental)')
+    .requiredOption('--query <sql>', 'a read proving what the UI showed; compares the first column of the first row')
+    .option('--expect <value>', 'the value the UI displayed (compared as a scalar)')
+    .option('--expect-count <n>', 'expect this many rows instead of a scalar value')
+    .option('--table <name>', 'table name, for the finding label')
+    .action(async (opts, cmd) => {
+      const { json } = getRootOpts(cmd);
+      try {
+        const config = getProjectConfig();
+        if (!config) throw new CLIError('No linked project found — run `insforge link` first.');
+        if (!isReadOnlyQuery(opts.query)) {
+          throw new CLIError(
+            'verify truth runs a single read-only query — it must start with SELECT or WITH and not chain statements.',
+          );
+        }
+        if (opts.expect !== undefined && opts.expectCount !== undefined) {
+          throw new CLIError('Provide either --expect <value> or --expect-count <n>, not both.');
+        }
+
+        const rows = await rawsqlRows(config.oss_host, config.api_key, opts.query);
+
+        let result: { type: 'false_pass' | 'none'; evidence: Record<string, unknown> };
+        if (opts.expectCount !== undefined) {
+          result = classifyTruth(rows.length, String(opts.expectCount));
+        } else if (opts.expect !== undefined) {
+          const first = rows[0];
+          const dbValue =
+            first && typeof first === 'object' ? Object.values(first as Record<string, unknown>)[0] : first;
+          result = classifyTruth(dbValue, String(opts.expect));
+        } else {
+          throw new CLIError('Provide --expect <value> (scalar) or --expect-count <n> (row count).');
+        }
+
+        const finding = { type: result.type, table: opts.table as string | undefined, evidence: result.evidence };
+        trackVerifyFinding(finding, config);
+        await shutdownAnalytics(); // flush the PostHog event before exit
+
+        if (json) {
+          outputJson({ passed: result.type === 'none', finding });
+        } else if (result.type === 'false_pass') {
+          outputInfo(
+            `❌ false_pass${opts.table ? ` on ${opts.table}` : ''}: UI claimed ${JSON.stringify(result.evidence.ui_claimed)} but DB has ${JSON.stringify(result.evidence.db_actual)}.`,
+          );
+        } else {
+          outputInfo(`✅ backend truth matches: ${JSON.stringify(result.evidence.db_actual)}.`);
+        }
+        process.exitCode = result.type === 'none' ? 0 : 1;
+      } catch (e) {
+        handleError(e, json);
+      }
+    });
+}
diff --git a/src/index.ts b/src/index.ts
@@ -11,6 +11,7 @@ import { registerWhoamiCommand } from './commands/whoami.js';
 import { registerOrgsCommands } from './commands/orgs/list.js';
 import { registerProjectsCommands } from './commands/projects/list.js';
 import { registerBranchCommands } from './commands/branch/index.js';
+import { registerVerifyCommands } from './commands/verify/index.js';
 import { registerProjectLinkCommand } from './commands/projects/link.js';
 import { registerDbCommands } from './commands/db/query.js';
 import { registerDbTablesCommand } from './commands/db/tables.js';
@@ -135,6 +136,9 @@ registerProjectsCommands(projectsCmd);
 // Branch commands
 registerBranchCommands(program);
 
+// Verify probe commands (experimental, hidden from --help)
+registerVerifyCommands(program);
+
 // Database commands
 const dbCmd = program.command('db').description('Database operations');
 registerDbCommands(dbCmd);

diff --git a/src/lib/analytics.ts b/src/lib/analytics.ts
@@ -128,3 +128,41 @@ export async function shutdownAnalytics(): Promise<void> {
     // ignore
   }
 }
+
+export interface VerifyFinding {
+  type: string;
+  table?: string;
+  kind?: string;
+  status?: number;
+  endpoint?: string;
+  message?: string;
+  evidence?: Record<string, unknown>;
+}
+
+/**
+ * Emit a verify finding to PostHog — the central, cross-user rail (finding rate + what
+ * broke), same as the other track* helpers here. NOT the per-project `oss_host/api/usage/mcp`
+ * table, which only stores `(tool_name, success)` and drops the finding. The recording lives
+ * in the tool — a finding is recorded because the probe ran, not because the agent remembered
+ * to. Best-effort; the caller flushes via `shutdownAnalytics()` before exit.
+ */
+// `verify truth` evidence holds the raw DB value the UI claimed (`db_actual`/`ui_claimed`),
+// which can be PII (a name, email, balance). Drop those before sending — finding rate only
+// needs the type + table, not the value. RLS evidence (row counts) is not sensitive and stays.
+const SENSITIVE_EVIDENCE_KEYS = new Set(['db_actual', 'ui_claimed']);
+
+export function trackVerifyFinding(finding: VerifyFinding, config: ProjectConfig): void {
+  const safeEvidence = Object.fromEntries(
+    Object.entries(finding.evidence ?? {}).filter(([k]) => !SENSITIVE_EVIDENCE_KEYS.has(k)),
+  );
+  captureEvent(config.project_id, 'verify_finding', {
+    ...safeEvidence,
+    finding_type: finding.type,
+    passed: finding.type === 'none',
+    table: finding.table,
+    kind: finding.kind,
+    status: finding.status,
+    endpoint: finding.endpoint,
+    message: finding.message,
+  });
+}
diff --git a/src/lib/browser-mcp.test.ts b/src/lib/browser-mcp.test.ts
@@ -0,0 +1,109 @@
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { afterEach, beforeEach, describe, expect, it } from 'vitest';
+import { ensureCodexToml, mergeJsonMcp } from './browser-mcp.js';
+
+const HEADLESS_SERVER = {
+  command: 'npx',
+  args: ['@playwright/mcp@latest', '--headless'],
+};
+
+describe('mergeJsonMcp', () => {
+  let dir: string;
+  let file: string;
+  const read = () => JSON.parse(readFileSync(file, 'utf-8'));
+
+  beforeEach(() => {
+    dir = mkdtempSync(join(tmpdir(), 'insforge-mcp-'));
+    file = join(dir, '.cursor', 'mcp.json');
+  });
+  afterEach(() => {
+    rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('creates the file (and parent dirs) with the server under mcpServers', () => {
+    expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(true);
+    expect(read().mcpServers['playwright']).toEqual(HEADLESS_SERVER);
+  });
+
+  it('merges without clobbering other servers', () => {
+    writeFileSync(join(dir, 'cfg.json'), JSON.stringify({ mcpServers: { other: { command: 'x' } } }));
+    expect(mergeJsonMcp(join(dir, 'cfg.json'), 'mcpServers', HEADLESS_SERVER)).toBe(true);
+    const cfg = JSON.parse(readFileSync(join(dir, 'cfg.json'), 'utf-8'));
+    expect(cfg.mcpServers.other).toEqual({ command: 'x' });
+    expect(cfg.mcpServers['playwright']).toBeDefined();
+  });
+
+  it('is idempotent — returns false when already present and identical', () => {
+    mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER);
+    expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(false);
+  });
+
+  it('recovers from malformed JSON by starting fresh', () => {
+    const bad = join(dir, 'bad.json');
+    writeFileSync(bad, '{ not valid json');
+    expect(mergeJsonMcp(bad, 'mcpServers', HEADLESS_SERVER)).toBe(true);
+    expect(JSON.parse(readFileSync(bad, 'utf-8')).mcpServers['playwright']).toBeDefined();
+  });
+
+  it('supports the VS Code `servers` key', () => {
+    expect(mergeJsonMcp(file, 'servers', HEADLESS_SERVER)).toBe(true);
+    expect(read().servers['playwright']).toEqual(HEADLESS_SERVER);
+  });
+
+  it('starts fresh on valid-but-non-object JSON (array / null / primitive)', () => {
+    for (const bad of ['[1,2,3]', 'null', '"a string"', '42']) {
+      const f = join(dir, `${bad.replace(/\W/g, '')}.json`);
+      writeFileSync(f, bad);
+      expect(mergeJsonMcp(f, 'mcpServers', HEADLESS_SERVER)).toBe(true);
+      // No crash, no silent loss — server is written under a fresh object.
+      expect(JSON.parse(readFileSync(f, 'utf-8')).mcpServers['playwright']).toEqual(HEADLESS_SERVER);
+    }
+  });
+
+  it('normalizes a non-object value under the section key (e.g. {"mcpServers": "x"})', () => {
+    for (const badSection of ['"x"', '[1,2]', '5']) {
+      const f = join(dir, `section${badSection.replace(/\W/g, '')}.json`);
+      writeFileSync(f, `{"mcpServers": ${badSection}}`);
+      // Would otherwise throw (assigning a prop on a string in strict ESM) or drop the server.
+      expect(mergeJsonMcp(f, 'mcpServers', HEADLESS_SERVER)).toBe(true);
+      expect(JSON.parse(readFileSync(f, 'utf-8')).mcpServers['playwright']).toEqual(HEADLESS_SERVER);
+    }
+  });
+});
+
+describe('ensureCodexToml', () => {
+  let dir: string;
+  let file: string;
+
+  beforeEach(() => {
+    dir = mkdtempSync(join(tmpdir(), 'insforge-codex-'));
+    file = join(dir, '.codex', 'config.toml');
+  });
+  afterEach(() => {
+    rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('appends a [mcp_servers.playwright] block when absent', () => {
+    expect(ensureCodexToml(file)).toBe(true);
+    const toml = readFileSync(file, 'utf-8');
+    expect(toml).toContain('[mcp_servers.playwright]');
+    expect(toml).toContain('command = "npx"');
+    expect(toml).toContain('"--headless"');
+  });
+
+  it('is idempotent — returns false when the block already exists', () => {
+    ensureCodexToml(file);
+    expect(ensureCodexToml(file)).toBe(false);
+  });
+
+  it('preserves existing TOML content', () => {
+    mkdirSync(dirname(file), { recursive: true });
+    writeFileSync(file, '[some_other_section]\nkey = "value"\n');
+    expect(ensureCodexToml(file)).toBe(true);
+    const toml = readFileSync(file, 'utf-8');
+    expect(toml).toContain('[some_other_section]');
+    expect(toml).toContain('[mcp_servers.playwright]');
+  });
+});