Skip to content
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/commands/deployments/deploy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ const EXCLUDE_PATTERNS = [
'.cache',
'skills',
'coverage',
'test-results',
'playwright-report',
'.playwright-mcp',
IGNORE_FILE_NAME,
];

Expand Down
48 changes: 48 additions & 0 deletions src/commands/verify/finding.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import type { Command } from 'commander';
import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
import { outputJson, outputInfo } from '../../lib/output.js';
import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
import { getProjectConfig } from '../../lib/config.js';

// Record a "loud" error the browser surfaced during the drive — a 4xx/5xx, a
// `column does not exist`, a console exception — that the agent saw via
// `browser_console_messages` / `browser_network_requests`. The rls/truth probes
// only cover the *silent* findings; this is how the loud ones reach PostHog too.
export function registerVerifyFindingCommand(verify: Command): void {
verify
.command('finding')
.description('Record a loud error surfaced during the drive (4xx/5xx, column-not-found, console) as a finding (experimental)')
.requiredOption('--kind <kind>', 'short error kind, e.g. pgrst_column_not_found, http_500, console_error')
.option('--type <type>', 'finding type', 'error')
.option('--status <n>', 'HTTP status, if any', (v) => parseInt(v, 10))
.option('--endpoint <path>', 'the endpoint/URL that errored')
.option('--message <text>', 'the error message the page showed')
.option('--table <name>', 'related table, if known')
.action(async (opts, cmd) => {
const { json } = getRootOpts(cmd);
try {
const config = getProjectConfig();
if (!config) throw new CLIError('No linked project found — run `insforge link` first.');
const finding = {
type: opts.type as string,
kind: opts.kind as string,
status: Number.isNaN(opts.status) ? undefined : (opts.status as number | undefined),
endpoint: opts.endpoint as string | undefined,
message: opts.message as string | undefined,
table: opts.table as string | undefined,
};
trackVerifyFinding(finding, config);
await shutdownAnalytics(); // flush the PostHog event before exit

if (json) {
outputJson({ recorded: true, finding });
} else {
outputInfo(
`📝 recorded ${finding.type} finding: ${finding.kind}${finding.status ? ` (${finding.status})` : ''}${finding.message ? ` — ${finding.message}` : ''}`,
);
}
} catch (e) {
handleError(e, json);
}
});
}
14 changes: 14 additions & 0 deletions src/commands/verify/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// src/commands/verify/index.ts
import type { Command } from 'commander';
import { registerVerifyRlsCommand } from './rls.js';
import { registerVerifyTruthCommand } from './truth.js';
import { registerVerifyFindingCommand } from './finding.js';

export function registerVerifyCommands(program: Command): void {
const verify = program
.command('verify', { hidden: true })
.description('[experimental] Backend-truth & RLS probes + loud-error recording for insforge-verify');
registerVerifyRlsCommand(verify);
registerVerifyTruthCommand(verify);
registerVerifyFindingCommand(verify);
}
66 changes: 66 additions & 0 deletions src/commands/verify/rls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import type { Command } from 'commander';
import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
import { getProjectConfig } from '../../lib/config.js';
import { outputJson, outputInfo } from '../../lib/output.js';
import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
import { classifyRls, getAnonKey, login, rawsqlRows, recordsCount } from '../../lib/verify-probe.js';

export function registerVerifyRlsCommand(verify: Command): void {
verify
.command('rls')
.description('Cross-user RLS isolation probe — checks B cannot read A, A can read own (experimental)')
.requiredOption('--table <name>', 'user-scoped table to probe')
.requiredOption('--owner <column>', 'owner column on the table (e.g. user_id)')
.option('--user-a <email>', 'seeded user A email', 'verify-a@example.com')
.option('--user-b <email>', 'seeded user B email', 'verify-b@example.com')
.option('--password <pw>', 'seeded users password', 'Test1234!pass')
.action(async (opts, cmd) => {
const { json } = getRootOpts(cmd);
try {
const config = getProjectConfig();
if (!config) throw new CLIError('No linked project found — run `insforge link` first.');
const baseUrl = config.oss_host;
const adminKey = config.api_key;

const aToken = await login(baseUrl, opts.userA, opts.password);
const bToken = await login(baseUrl, opts.userB, opts.password);
const anon = await getAnonKey(baseUrl, adminKey);
if (!aToken || !bToken || !anon) {
throw new CLIError(
'Login or anon-key fetch returned empty — seed BOTH users first. An empty token turns every probe into an anonymous request that silently "passes" isolation.',
);
}

const rows = await rawsqlRows(
baseUrl,
adminKey,
`select id from auth.users where email='${String(opts.userA).replace(/'/g, "''")}'`,
);
const aId = (rows[0] as { id?: string })?.id;
if (!aId) throw new CLIError(`Could not find user A (${opts.userA}) — seed it first.`);

const filter = `${opts.owner}=eq.${aId}`;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 The --owner column name is interpolated directly into the URL query string without encoding. PostgREST parses everything after ? as key=op.value pairs, so an agent-generated value like user_id&limit=0 would produce ?user_id&limit=0=eq.<uuid>, forcing the PostgREST endpoint to return 0 rows regardless of RLS state. That silences a real rls_leak and produces a false "isolation holds" verdict — the worst possible outcome for a security probe.

Suggested change
const filter = `${opts.owner}=eq.${aId}`;
const filter = `${encodeURIComponent(opts.owner)}=eq.${encodeURIComponent(aId)}`;

const bReadRowsOfA = await recordsCount(baseUrl, opts.table, filter, bToken, anon);
const aReadOwnRows = await recordsCount(baseUrl, opts.table, filter, aToken, anon);
const anonReadRows = await recordsCount(baseUrl, opts.table, undefined, undefined, anon);

const { type, evidence } = classifyRls({ bReadRowsOfA, aReadOwnRows, anonReadRows });
const finding = { type, table: opts.table as string, evidence };
trackVerifyFinding(finding, config);
await shutdownAnalytics(); // flush the PostHog event before exit

if (json) {
outputJson({ passed: type === 'none', finding });
} else if (type === 'rls_leak') {
outputInfo(`❌ rls_leak on ${opts.table}: B read ${bReadRowsOfA} of A's rows (anon read ${anonReadRows}).`);
} else if (type === 'rls_overrestrict') {
outputInfo(`❌ rls_overrestrict on ${opts.table}: A could not read its own rows (positive control empty).`);
} else {
outputInfo(`✅ isolation holds on ${opts.table}: B=0, anon=0, A=${aReadOwnRows}.`);
}
process.exitCode = type === 'none' ? 0 : 1;
} catch (e) {
handleError(e, json);
}
});
}
54 changes: 54 additions & 0 deletions src/commands/verify/truth.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import type { Command } from 'commander';
import { CLIError, getRootOpts, handleError } from '../../lib/errors.js';
import { getProjectConfig } from '../../lib/config.js';
import { outputJson, outputInfo } from '../../lib/output.js';
import { shutdownAnalytics, trackVerifyFinding } from '../../lib/analytics.js';
import { classifyTruth, rawsqlRows } from '../../lib/verify-probe.js';

export function registerVerifyTruthCommand(verify: Command): void {
verify
.command('truth')
.description('Backend-truth cross-check — compare a DB read to what the UI claimed (experimental)')
.requiredOption('--query <sql>', 'a read proving what the UI showed; compares the first column of the first row')
.option('--expect <value>', 'the value the UI displayed (compared as a scalar)')
.option('--expect-count <n>', 'expect this many rows instead of a scalar value')
.option('--table <name>', 'table name, for the finding label')
.action(async (opts, cmd) => {
const { json } = getRootOpts(cmd);
try {
const config = getProjectConfig();
if (!config) throw new CLIError('No linked project found — run `insforge link` first.');

const rows = await rawsqlRows(config.oss_host, config.api_key, opts.query);
Comment thread
greptile-apps[bot] marked this conversation as resolved.

let result: { type: 'false_pass' | 'none'; evidence: Record<string, unknown> };
if (opts.expectCount !== undefined) {
result = classifyTruth(rows.length, String(opts.expectCount));
} else if (opts.expect !== undefined) {
const first = rows[0];
const dbValue =
first && typeof first === 'object' ? Object.values(first as Record<string, unknown>)[0] : first;
result = classifyTruth(dbValue, String(opts.expect));
} else {
throw new CLIError('Provide --expect <value> (scalar) or --expect-count <n> (row count).');
}

const finding = { type: result.type, table: opts.table as string | undefined, evidence: result.evidence };
trackVerifyFinding(finding, config);
await shutdownAnalytics(); // flush the PostHog event before exit

if (json) {
outputJson({ passed: result.type === 'none', finding });
} else if (result.type === 'false_pass') {
outputInfo(
`❌ false_pass${opts.table ? ` on ${opts.table}` : ''}: UI claimed ${JSON.stringify(result.evidence.ui_claimed)} but DB has ${JSON.stringify(result.evidence.db_actual)}.`,
);
} else {
outputInfo(`✅ backend truth matches: ${JSON.stringify(result.evidence.db_actual)}.`);
}
process.exitCode = result.type === 'none' ? 0 : 1;
} catch (e) {
handleError(e, json);
}
});
}
4 changes: 4 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { registerWhoamiCommand } from './commands/whoami.js';
import { registerOrgsCommands } from './commands/orgs/list.js';
import { registerProjectsCommands } from './commands/projects/list.js';
import { registerBranchCommands } from './commands/branch/index.js';
import { registerVerifyCommands } from './commands/verify/index.js';
import { registerProjectLinkCommand } from './commands/projects/link.js';
import { registerDbCommands } from './commands/db/query.js';
import { registerDbTablesCommand } from './commands/db/tables.js';
Expand Down Expand Up @@ -135,6 +136,9 @@ registerProjectsCommands(projectsCmd);
// Branch commands
registerBranchCommands(program);

// Verify probe commands (experimental, hidden from --help)
registerVerifyCommands(program);

// Database commands
const dbCmd = program.command('db').description('Database operations');
registerDbCommands(dbCmd);
Expand Down
30 changes: 30 additions & 0 deletions src/lib/analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,33 @@ export async function shutdownAnalytics(): Promise<void> {
// ignore
}
}

export interface VerifyFinding {
type: string;
table?: string;
kind?: string;
status?: number;
endpoint?: string;
message?: string;
evidence?: Record<string, unknown>;
}

/**
* Emit a verify finding to PostHog — the central, cross-user rail (finding rate + what
* broke), same as the other track* helpers here. NOT the per-project `oss_host/api/usage/mcp`
* table, which only stores `(tool_name, success)` and drops the finding. The recording lives
* in the tool — a finding is recorded because the probe ran, not because the agent remembered
* to. Best-effort; the caller flushes via `shutdownAnalytics()` before exit.
*/
export function trackVerifyFinding(finding: VerifyFinding, config: ProjectConfig): void {
captureEvent(config.project_id, 'verify_finding', {
finding_type: finding.type,
passed: finding.type === 'none',
table: finding.table,
kind: finding.kind,
status: finding.status,
endpoint: finding.endpoint,
message: finding.message,
...finding.evidence,
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
Outdated
});
}
12 changes: 10 additions & 2 deletions src/lib/api/platform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,16 @@ export async function getBranchApi(branchId: string, apiUrl?: string): Promise<B
return data.branch;
}

export async function deleteBranchApi(branchId: string, apiUrl?: string): Promise<void> {
await platformFetch(`/projects/v1/branches/${branchId}`, { method: 'DELETE' }, apiUrl);
export async function deleteBranchApi(
branchId: string,
apiUrl?: string,
opts?: { ignoreNotFound?: boolean },
): Promise<void> {
await platformFetch(
`/projects/v1/branches/${branchId}`,
{ method: 'DELETE', ...(opts?.ignoreNotFound ? { passThroughStatuses: [404] } : {}) },
apiUrl,
);
}

/**
Expand Down
89 changes: 89 additions & 0 deletions src/lib/browser-mcp.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { tmpdir } from 'node:os';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { ensureCodexToml, mergeJsonMcp } from './browser-mcp.js';

const HEADLESS_SERVER = {
command: 'npx',
args: ['@playwright/mcp@latest', '--headless'],
};

describe('mergeJsonMcp', () => {
let dir: string;
let file: string;
const read = () => JSON.parse(readFileSync(file, 'utf-8'));

beforeEach(() => {
dir = mkdtempSync(join(tmpdir(), 'insforge-mcp-'));
file = join(dir, '.cursor', 'mcp.json');
});
afterEach(() => {
rmSync(dir, { recursive: true, force: true });
});

it('creates the file (and parent dirs) with the server under mcpServers', () => {
expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(true);
expect(read().mcpServers['playwright']).toEqual(HEADLESS_SERVER);
});

it('merges without clobbering other servers', () => {
writeFileSync(join(dir, 'cfg.json'), JSON.stringify({ mcpServers: { other: { command: 'x' } } }));
expect(mergeJsonMcp(join(dir, 'cfg.json'), 'mcpServers', HEADLESS_SERVER)).toBe(true);
const cfg = JSON.parse(readFileSync(join(dir, 'cfg.json'), 'utf-8'));
expect(cfg.mcpServers.other).toEqual({ command: 'x' });
expect(cfg.mcpServers['playwright']).toBeDefined();
});

it('is idempotent — returns false when already present and identical', () => {
mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER);
expect(mergeJsonMcp(file, 'mcpServers', HEADLESS_SERVER)).toBe(false);
});

it('recovers from malformed JSON by starting fresh', () => {
const bad = join(dir, 'bad.json');
writeFileSync(bad, '{ not valid json');
expect(mergeJsonMcp(bad, 'mcpServers', HEADLESS_SERVER)).toBe(true);
expect(JSON.parse(readFileSync(bad, 'utf-8')).mcpServers['playwright']).toBeDefined();
});

it('supports the VS Code `servers` key', () => {
expect(mergeJsonMcp(file, 'servers', HEADLESS_SERVER)).toBe(true);
expect(read().servers['playwright']).toEqual(HEADLESS_SERVER);
});
});

describe('ensureCodexToml', () => {
let dir: string;
let file: string;

beforeEach(() => {
dir = mkdtempSync(join(tmpdir(), 'insforge-codex-'));
file = join(dir, '.codex', 'config.toml');
});
afterEach(() => {
rmSync(dir, { recursive: true, force: true });
});

it('appends a [mcp_servers.playwright] block when absent', () => {
expect(ensureCodexToml(file)).toBe(true);
const toml = readFileSync(file, 'utf-8');
expect(toml).toContain('[mcp_servers.playwright]');
expect(toml).toContain('command = "npx"');
expect(toml).toContain('"--headless"');
});

it('is idempotent — returns false when the block already exists', () => {
ensureCodexToml(file);
expect(ensureCodexToml(file)).toBe(false);
});

it('preserves existing TOML content', () => {
mkdirSync(dirname(file), { recursive: true });
writeFileSync(file, '[some_other_section]\nkey = "value"\n');
expect(ensureCodexToml(file)).toBe(true);
const toml = readFileSync(file, 'utf-8');
expect(toml).toContain('[some_other_section]');
expect(toml).toContain('[mcp_servers.playwright]');
});
});
Loading
Loading