From 1cd473d81139052c3e9d937c8e78b9e67440935d Mon Sep 17 00:00:00 2001
From: shubh24 <shubhankar24@gmail.com>
Date: Fri, 26 Jun 2026 23:54:00 -0700
Subject: [PATCH 1/3] Add record-and-replay skill

Record a human browser flow on a Browserbase cloud session and replay it
deterministically through the browse CLI, with accessibility-snapshot
selector healing.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md                                   |   1 +
 skills/record-and-replay/LICENSE.txt        |  21 +++
 skills/record-and-replay/SKILL.md           |  97 ++++++++++
 skills/record-and-replay/evals/evals.json   |  17 ++
 skills/record-and-replay/package.json       |  10 ++
 skills/record-and-replay/scripts/inject.js  |  94 ++++++++++
 skills/record-and-replay/scripts/record.mjs |  94 ++++++++++
 skills/record-and-replay/scripts/replay.mjs | 187 ++++++++++++++++++++
 8 files changed, 521 insertions(+)
 create mode 100644 skills/record-and-replay/LICENSE.txt
 create mode 100644 skills/record-and-replay/SKILL.md
 create mode 100644 skills/record-and-replay/evals/evals.json
 create mode 100644 skills/record-and-replay/package.json
 create mode 100644 skills/record-and-replay/scripts/inject.js
 create mode 100644 skills/record-and-replay/scripts/record.mjs
 create mode 100644 skills/record-and-replay/scripts/replay.mjs

diff --git a/README.md b/README.md
index 6b06aea7..ae71d95a 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ This plugin includes the following skills (see `skills/` for details):
 | [company-research](skills/company-research/SKILL.md) | Discover target companies matching your ICP using the Browserbase Search API, deep-research each one, and score fit into a research report and CSV |
 | [event-prospecting](skills/event-prospecting/SKILL.md) | Extract speakers from a conference page, filter their companies against your ICP, and deep-research the best-fit people into a person-first prospecting report |
 | [competitor-analysis](skills/competitor-analysis/SKILL.md) | Auto-discover a company's competitors via the Browserbase Search API, deep-research each across marketing, signal, benchmark, and strategic-diff lanes, and compile a browsable HTML report with an overview, per-competitor deep dives, a feature/pricing matrix, and a mentions feed |
+| [record-and-replay](skills/record-and-replay/SKILL.md) | Record a human browser flow on a Browserbase cloud session and replay it deterministically through the `browse` CLI with accessibility-snapshot selector healing — "show, don't prompt" a bug or turn a flow into a self-healing test |
 
 ## Installation
 
diff --git a/skills/record-and-replay/LICENSE.txt b/skills/record-and-replay/LICENSE.txt
new file mode 100644
index 00000000..f2f43974
--- /dev/null
+++ b/skills/record-and-replay/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Browserbase, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/skills/record-and-replay/SKILL.md b/skills/record-and-replay/SKILL.md
new file mode 100644
index 00000000..b5e444e1
--- /dev/null
+++ b/skills/record-and-replay/SKILL.md
@@ -0,0 +1,97 @@
+---
+name: record-and-replay
+description: Record a human browser flow on a Browserbase session and replay it as a self-healing test. Use when you want to "show, don't prompt" a bug or workflow — capture clicks/typing/scrolls in a live cloud browser, save them as a Chrome DevTools Recorder file, then re-run them (with optional healing) to verify a flow still works. Triggers on "record this flow", "replay the recording", "record and replay", "turn this into a browser test".
+compatibility: "Requires Node 18+ and the browse CLI (`npm install -g browse`). Cloud sessions need `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID`. Replay is zero-dependency (it drives the browse CLI); record uses `@browserbasehq/sdk` + `playwright-core`, so run `npm install` in this skill dir first."
+license: MIT
+allowed-tools: Bash, Read
+---
+
+# Record & Replay
+
+"Show the bug instead of prompting it." Capture a human browser flow, get back a
+portable recording, then replay it deterministically with selector healing.
+Sessions run on Browserbase cloud browsers with a live, interactive view.
+
+## Fundamental logic
+
+- **Record** — attach to a Browserbase session over CDP, inject a listener that
+  captures each human action as a *semantic step* (a priority-ordered list of
+  selectors: `aria/Name` → `text/Label` → CSS path → XPath — plus value and
+  timestamp). The fallback list IS the healing.
+- **Replay** — drive every step through deterministic `browse` CLI subcommands,
+  resolving each step highest-confidence-first:
+  1. **semantic** — a recorded `aria/` or `text/` selector matched to a live
+     accessibility-snapshot ref (survives dynamic-id churn, e.g. Google's `#cNNN`),
+  2. **recorded** — the recorded XPath, then CSS, each *verified* with
+     `browse get visible` before acting (`browse click` reports success even on a
+     no-match, so a selector is never trusted blind),
+  3. **heal** (`RR_HEAL=1`) — match the step's typed value (or the value typed
+     just before) to a snapshot ref; this is what rescues unlabeled autocomplete
+     picks whose only recorded selector was a dynamic id,
+  4. **coords** (`RR_HEAL=1`) — last resort: `browse get box` a recorded selector
+     and click its center.
+
+Output is **Chrome DevTools Recorder** compatible, so recordings are also runnable
+by `@puppeteer/replay`.
+
+## Setup (once)
+
+```bash
+cd skills/record-and-replay && npm install   # only needed for `record`
+npm install -g browse                         # the replay/record driver CLI
+export BROWSERBASE_API_KEY=...  BROWSERBASE_PROJECT_ID=...
+```
+
+## Record
+
+```bash
+RR_URL="https://www.saucedemo.com" RR_OUT=/tmp/rec.json RR_TITLE="login flow" \
+  node --env-file=.env scripts/record.mjs
+```
+
+1. The script prints a **live view URL** — open it and perform the flow (click, type, scroll).
+2. Stop and save by pressing **ENTER** (interactive), creating the stop file
+   (`touch /tmp/rr-stop`, lets an agent stop it conversationally), or setting
+   `RR_SECONDS=30` to auto-stop.
+3. A recording is written to `RR_OUT`.
+
+| Var | Default | Meaning |
+|-----|---------|---------|
+| `RR_URL` | `https://example.com` | start URL |
+| `RR_OUT` | `/tmp/recording-<ts>.json` | output recording path |
+| `RR_TITLE` | `Recorded flow` | recording title |
+| `RR_STOP` | `/tmp/rr-stop` | create this file to stop recording |
+| `RR_SECONDS` | _(none)_ | auto-stop after N seconds instead of ENTER |
+
+## Replay
+
+```bash
+RR_FILE=/tmp/rec.json RR_HEAL=1 \
+  node --env-file=.env scripts/replay.mjs
+```
+
+Prints a per-step pass/fail report (with the resolution path used per step:
+`semantic` / `xpath` / `css` / `healed`), a best-effort live-view URL, and saves a
+screenshot of every step to `RR_SHOTS`.
+
+| Var | Default | Meaning |
+|-----|---------|---------|
+| `RR_FILE` | _(required)_ | recording to replay |
+| `RR_HEAL` | `0` | `1` = snapshot-ref + coordinate healing on selector miss |
+| `RR_SHOTS` | `/tmp/replay-<ts>` | screenshot output dir |
+| `RR_SESSION` | `rr-<ts>` | browse CLI session name |
+
+## Recording shape
+
+```json
+{
+  "title": "login flow",
+  "source": "browserbase-record-replay",
+  "startUrl": "https://www.saucedemo.com",
+  "steps": [
+    { "type": "navigate", "url": "https://www.saucedemo.com" },
+    { "type": "change", "selectors": [["aria/Username"], ["#user-name"]], "value": "standard_user" },
+    { "type": "click",  "selectors": [["text/Login"], ["#login-button"]] }
+  ]
+}
+```
diff --git a/skills/record-and-replay/evals/evals.json b/skills/record-and-replay/evals/evals.json
new file mode 100644
index 00000000..da40b36a
--- /dev/null
+++ b/skills/record-and-replay/evals/evals.json
@@ -0,0 +1,17 @@
+{
+  "skill": "record-and-replay",
+  "evals": [
+    {
+      "prompt": "Record my login flow on https://www.saucedemo.com so I can replay it later.",
+      "expected": "Runs scripts/record.mjs with RR_URL set to the site, surfaces the live-view URL for the user to perform the flow, and saves a Chrome DevTools Recorder JSON when the user stops."
+    },
+    {
+      "prompt": "Replay /tmp/rec.json and tell me which steps passed.",
+      "expected": "Runs scripts/replay.mjs with RR_FILE=/tmp/rec.json and RR_HEAL=1, then reports the per-step pass/fail scorecard including the resolution path (semantic/xpath/css/healed) and the screenshots directory."
+    },
+    {
+      "prompt": "The recorded flow stopped selecting the right autocomplete option on replay — can you make it self-heal?",
+      "expected": "Explains that replay heals selector misses by matching the typed value to a live accessibility-snapshot ref (RR_HEAL=1), which rescues autocomplete picks recorded only via dynamic ids, and re-runs the replay to verify."
+    }
+  ]
+}
diff --git a/skills/record-and-replay/package.json b/skills/record-and-replay/package.json
new file mode 100644
index 00000000..e3335deb
--- /dev/null
+++ b/skills/record-and-replay/package.json
@@ -0,0 +1,10 @@
+{
+  "name": "record-and-replay",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "@browserbasehq/sdk": "^2.7.0",
+    "playwright-core": "^1.49.0"
+  }
+}
diff --git a/skills/record-and-replay/scripts/inject.js b/skills/record-and-replay/scripts/inject.js
new file mode 100644
index 00000000..f7503c3e
--- /dev/null
+++ b/skills/record-and-replay/scripts/inject.js
@@ -0,0 +1,94 @@
+// Injected into every page/frame of the recording session (via addInitScript).
+// Captures human interactions as SEMANTIC steps (not raw x/y) and buffers them in
+// window.__rr_events (mirrored to localStorage so they survive same-origin
+// navigations). The Node side polls + drains this buffer via page.evaluate.
+// (We avoid page.exposeBinding because it does not wire up over Browserbase CDP.)
+(() => {
+  if (window.__rr_installed) return;
+  window.__rr_installed = true;
+  const KEY = '__rr_buf';
+
+  // restore anything buffered before a navigation
+  window.__rr_events = window.__rr_events || (() => {
+    try { return JSON.parse(localStorage.getItem(KEY) || '[]'); } catch { return []; }
+  })();
+
+  const send = (ev) => {
+    window.__rr_events.push(ev);
+    try { localStorage.setItem(KEY, JSON.stringify(window.__rr_events)); } catch (_) {}
+  };
+  const now = () => Date.now();
+  const esc = (s) => { try { return CSS.escape(s); } catch { return s; } };
+
+  function cssPath(el) {
+    if (!(el instanceof Element)) return '';
+    const parts = [];
+    while (el && el.nodeType === 1 && parts.length < 6) {
+      if (el.id) { parts.unshift('#' + esc(el.id)); break; }
+      let nth = 1, sib = el;
+      while ((sib = sib.previousElementSibling)) if (sib.nodeName === el.nodeName) nth++;
+      parts.unshift(el.nodeName.toLowerCase() + ':nth-of-type(' + nth + ')');
+      el = el.parentElement;
+    }
+    return parts.join(' > ');
+  }
+
+  function xPath(el) {
+    if (el.id) return '//*[@id="' + el.id + '"]';
+    const parts = [];
+    while (el && el.nodeType === 1) {
+      let i = 1, sib = el;
+      while ((sib = sib.previousElementSibling)) if (sib.nodeName === el.nodeName) i++;
+      parts.unshift(el.nodeName.toLowerCase() + '[' + i + ']');
+      el = el.parentElement;
+    }
+    return '/' + parts.join('/');
+  }
+
+  function accName(el) {
+    const g = (a) => (el.getAttribute && el.getAttribute(a)) || '';
+    return (g('aria-label') || g('placeholder') || g('name') || g('title') || '').trim();
+  }
+
+  // Chrome DevTools Recorder format: selectors is an array of selector-groups,
+  // tried in priority order during replay. This list IS the healing.
+  function selectorsFor(el) {
+    const out = [];
+    if (el.id) out.push('#' + esc(el.id));
+    const an = accName(el);
+    if (an) out.push('aria/' + an.slice(0, 80));
+    const txt = (el.innerText || el.textContent || '').trim();
+    if (txt && txt.length <= 60 && ['BUTTON', 'A', 'SUMMARY', 'LABEL', 'SPAN'].includes(el.tagName)) {
+      out.push('text/' + txt);
+    }
+    out.push(cssPath(el));
+    out.push('xpath/' + xPath(el));
+    return out.filter(Boolean).map((s) => [s]);
+  }
+
+  document.addEventListener('click', (e) => {
+    const el = e.target;
+    if (!el || el.nodeType !== 1) return;
+    send({ type: 'click', selectors: selectorsFor(el), url: location.href, ts: now() });
+  }, true);
+
+  // 'change' fires on commit/blur -> captures the final field value, not keystrokes.
+  document.addEventListener('change', (e) => {
+    const el = e.target;
+    if (!el || el.nodeType !== 1) return;
+    const value = ('value' in el) ? el.value : '';
+    send({ type: 'change', selectors: selectorsFor(el), value, url: location.href, ts: now() });
+  }, true);
+
+  document.addEventListener('keydown', (e) => {
+    if (['Enter', 'Tab', 'Escape'].includes(e.key)) {
+      send({ type: 'keyDown', key: e.key, url: location.href, ts: now() });
+    }
+  }, true);
+
+  let st;
+  window.addEventListener('scroll', () => {
+    clearTimeout(st);
+    st = setTimeout(() => send({ type: 'scroll', x: window.scrollX, y: window.scrollY, url: location.href, ts: now() }), 400);
+  }, true);
+})();
diff --git a/skills/record-and-replay/scripts/record.mjs b/skills/record-and-replay/scripts/record.mjs
new file mode 100644
index 00000000..fa878a04
--- /dev/null
+++ b/skills/record-and-replay/scripts/record.mjs
@@ -0,0 +1,94 @@
+// Record a human browser flow on a Browserbase session.
+// Opens a cloud browser, hands you a live-view URL to click around in, captures
+// each interaction as a semantic step, and saves a Chrome DevTools Recorder file.
+//
+//   RR_URL=https://site.com RR_OUT=/tmp/rec.json \
+//     node --env-file=.env scripts/record.mjs
+//
+// Stop with ENTER (interactive) or after RR_SECONDS (non-interactive).
+//
+// NOTE: uses raw Playwright over CDP (not Stagehand) because Stagehand's wrapped
+// page breaks page.exposeBinding, which is how we ship captured events to Node.
+import Browserbase from '@browserbasehq/sdk';
+import { chromium } from 'playwright-core';
+import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'node:fs';
+import readline from 'node:readline';
+
+const START_URL = process.env.RR_URL || 'https://example.com';
+const OUT = process.env.RR_OUT || `/tmp/recording-${Date.now()}.json`;
+const TITLE = process.env.RR_TITLE || 'Recorded flow';
+const SECONDS = process.env.RR_SECONDS ? parseInt(process.env.RR_SECONDS, 10) : null;
+// stop the recording by creating this file (lets the agent stop it conversationally)
+const STOP_FILE = process.env.RR_STOP || '/tmp/rr-stop';
+try { unlinkSync(STOP_FILE); } catch (_) {}
+const inject = readFileSync(new URL('./inject.js', import.meta.url), 'utf8');
+
+const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY });
+const session = await bb.sessions.create({ projectId: process.env.BROWSERBASE_PROJECT_ID });
+const browser = await chromium.connectOverCDP(session.connectUrl);
+const context = browser.contexts()[0];
+const page = context.pages()[0] ?? (await context.newPage());
+
+const events = [];
+// context-level so it applies to every page/tab the user opens
+await context.addInitScript({ content: inject });
+
+await page.goto(START_URL, { waitUntil: 'domcontentloaded' });
+
+// Drain the in-page buffer across all open tabs. We poll instead of using
+// exposeBinding, which does not deliver over Browserbase's CDP connection.
+async function drain() {
+  for (const p of context.pages()) {
+    try {
+      const evs = await p.evaluate(() => {
+        const e = window.__rr_events || [];
+        window.__rr_events = [];
+        try { localStorage.removeItem('__rr_buf'); } catch (_) {}
+        return e;
+      });
+      if (evs && evs.length) events.push(...evs);
+    } catch (_) { /* context navigating; next tick */ }
+  }
+}
+const poll = setInterval(drain, 600);
+
+let liveUrl = 'https://www.browserbase.com/sessions';
+try {
+  const dbg = await bb.sessions.debug(session.id);
+  liveUrl = dbg.debuggerFullscreenUrl || dbg.debuggerUrl || liveUrl;
+} catch (e) {
+  console.error('(could not fetch live view url:', String(e).slice(0, 120), ')');
+}
+
+console.log('\n=== RECORDING ===');
+console.log('Open this live view and interact (click around, fill forms):');
+console.log('  ' + liveUrl);
+console.log(`Stop by: pressing ENTER, creating ${STOP_FILE}` + (SECONDS ? `, or after ${SECONDS}s.` : '.') + '\n');
+
+await new Promise((resolve) => {
+  let done = false;
+  const fin = () => { if (!done) { done = true; clearInterval(sentinel); resolve(); } };
+  // conversational stop: agent runs `touch /tmp/rr-stop` to end the recording
+  const sentinel = setInterval(() => { if (existsSync(STOP_FILE)) fin(); }, 500);
+  if (process.stdin.isTTY) {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    rl.question('Press ENTER to stop recording... ', () => { rl.close(); fin(); });
+  }
+  if (SECONDS) setTimeout(fin, SECONDS * 1000);
+  else if (!process.stdin.isTTY) setTimeout(fin, 600000); // 10-min safety cap
+});
+try { unlinkSync(STOP_FILE); } catch (_) {}
+
+clearInterval(poll);
+await drain(); // final flush
+
+const recording = {
+  title: TITLE,
+  source: 'browserbase-record-replay',          // Chrome DevTools Recorder compatible
+  startUrl: START_URL,
+  steps: [{ type: 'navigate', url: START_URL }, ...events],
+};
+writeFileSync(OUT, JSON.stringify(recording, null, 2));
+console.log(`\nSaved ${events.length} interaction step(s) -> ${OUT}`);
+
+await browser.close();
diff --git a/skills/record-and-replay/scripts/replay.mjs b/skills/record-and-replay/scripts/replay.mjs
new file mode 100644
index 00000000..cc46a3ac
--- /dev/null
+++ b/skills/record-and-replay/scripts/replay.mjs
@@ -0,0 +1,187 @@
+// Replay a recorded flow against a fresh Browserbase session using the `browse` CLI.
+//
+// Engine: every step is executed through deterministic `browse` subcommands
+// (open / click / fill / select / key / eval / mouse). Selection is resolved
+// "highest-confidence first":
+//   1. semantic    — recorded aria/ or text/ selector -> matched to a live
+//                    snapshot ref (survives Google-style dynamic id churn)
+//   2. recorded    — recorded xpath, then css, each VERIFIED with `get visible`
+//                    before acting (browse click reports success even on a
+//                    no-match, so we never trust it blind)
+//   3. heal        — snapshot-match the step's value / prior typed value to a
+//                    ref (this is what rescues unlabeled autocomplete picks)
+//   4. coords      — last resort: `get box` a recorded selector, click center
+// Passes 3-4 are gated behind RR_HEAL=1.
+//
+//   RR_FILE=/tmp/rec.json [RR_HEAL=1] [RR_SHOTS=/tmp/shots] \
+//     node --env-file=.env scripts/replay.mjs
+import { execFileSync } from 'node:child_process';
+import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
+
+const FILE = process.env.RR_FILE;
+if (!FILE) { console.error('Set RR_FILE=path/to/recording.json'); process.exit(1); }
+const HEAL = process.env.RR_HEAL === '1';
+const SHOTS = process.env.RR_SHOTS || `/tmp/replay-${Date.now()}`;
+const SESSION = process.env.RR_SESSION || `rr-${Date.now()}`;
+mkdirSync(SHOTS, { recursive: true });
+const rec = JSON.parse(readFileSync(FILE, 'utf8'));
+
+// --- browse CLI wrapper -----------------------------------------------------
+// Runs `browse <args> --remote -s <session>`, strips the npm update banner, and
+// best-effort parses the JSON body. Never throws: a failed/empty call returns
+// { ok:false }, which the resolver treats as "this selector didn't resolve".
+function browse(args, { json = true } = {}) {
+  let raw = '';
+  try {
+    raw = execFileSync('browse', [...args, '--remote', '-s', SESSION], {
+      encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], env: process.env,
+    });
+  } catch (e) {
+    raw = (e.stdout || '') + (e.stderr || '');
+    if (!raw) return { ok: false, raw: String(e).slice(0, 160) };
+  }
+  if (!json) return { ok: true, raw };
+  const a = raw.indexOf('{'), b = raw.lastIndexOf('}');
+  if (a === -1 || b <= a) return { ok: false, raw }; // e.g. "Could not find an element..."
+  try { return { ok: true, json: JSON.parse(raw.slice(a, b + 1)), raw }; }
+  catch { return { ok: false, raw }; }
+}
+
+// --- selector handling ------------------------------------------------------
+function classify(sel) {
+  if (sel.startsWith('aria/')) return { kind: 'aria', label: sel.slice(5), sel };
+  if (sel.startsWith('text/')) return { kind: 'text', label: sel.slice(5), sel };
+  if (sel.startsWith('xpath/')) return { kind: 'xpath', value: sel.slice(6), sel: sel.slice(6) };
+  if (sel.startsWith('pierce/')) return { kind: 'css', value: sel.slice(7), sel: sel.slice(7) };
+  return { kind: 'css', value: sel, sel };
+}
+function candidates(step) {
+  const seen = new Set(), out = [];
+  for (const g of step.selectors || []) for (const s of g) {
+    if (typeof s === 'string' && !seen.has(s)) { seen.add(s); out.push(classify(s)); }
+  }
+  return out;
+}
+// `get visible` resolves css/xpath/ref and returns {visible:true|false}; a
+// no-match falls out as ok:false. This is our trustworthy existence probe.
+const resolves = (sel) => { const r = browse(['get', 'visible', sel]); return r.ok && r.json?.visible === true; };
+
+// Match a human label to a live snapshot ref. --compact yields lines like
+// "  [0-41] combobox: Where to?"; we score interactive nodes by name overlap.
+const INTERACTIVE = ['option', 'button', 'link', 'menuitem', 'menuitemradio', 'tab', 'combobox', 'listitem', 'checkbox', 'radio', 'cell', 'gridcell'];
+function snapshotRef(label) {
+  if (!label) return null;
+  const want = label.toLowerCase().trim();
+  const tokens = want.split(/\s+/).filter((t) => t.length > 1);
+  for (const filter of [label, tokens[0]].filter(Boolean)) {
+    const r = browse(['snapshot', '--compact', '--filter', filter]);
+    const tree = r.json?.tree;
+    if (!tree) continue;
+    let best = null, bestScore = 0;
+    for (const line of tree.split('\n')) {
+      const m = line.match(/\[(\d+-\d+)\]\s+([^:\n]+?)(?::\s*(.*))?\s*$/);
+      if (!m) continue;
+      const [, ref, roleRaw, nameRaw] = m;
+      const role = roleRaw.trim().toLowerCase(), name = (nameRaw || '').toLowerCase().trim();
+      if (!name) continue;
+      let score = 0;
+      if (name === want) score = 100;
+      else if (name.startsWith(want)) score = 80;
+      else if (name.includes(want)) score = 60;
+      else if (tokens.some((t) => name.includes(t))) score = 40;
+      if (!score) continue;
+      if (INTERACTIVE.includes(role)) score += 10;
+      if (score > bestScore) { bestScore = score; best = `@${ref}`; }
+    }
+    if (best) return best;
+  }
+  return null;
+}
+
+// --- per-step execution -----------------------------------------------------
+const act = (type, sel, value) =>
+  type === 'change' ? browse(['fill', sel, value ?? '']) : browse(['click', sel]);
+
+function doStep(step, ctx) {
+  const cands = candidates(step);
+  const arias = cands.filter((c) => c.kind === 'aria' || c.kind === 'text');
+  const xpaths = cands.filter((c) => c.kind === 'xpath');
+  const csses = cands.filter((c) => c.kind === 'css');
+
+  // 1. semantic: recorded aria/text label -> live snapshot ref
+  for (const c of arias) {
+    const ref = snapshotRef(c.label);
+    if (ref) { act(step.type, ref, step.value); return { status: 'ok', via: 'semantic', selector: c.sel, ref }; }
+  }
+  // 2. recorded selectors, verified before acting (xpath is anchored; css last)
+  for (const c of [...xpaths, ...csses]) {
+    if (resolves(c.sel)) { act(step.type, c.sel, step.value); return { status: 'ok', via: c.kind, selector: c.sel }; }
+  }
+  if (!HEAL) return { status: 'failed', reason: 'no selector matched (try RR_HEAL=1)' };
+
+  // 3. heal: match this step's value, or the value typed just before (rescues
+  //    autocomplete suggestion clicks whose only recorded selector was a
+  //    dynamic #id) to a snapshot ref.
+  for (const label of [step.value, ctx.lastValue].filter(Boolean)) {
+    const ref = snapshotRef(label);
+    if (ref) { act(step.type, ref, step.value); return { status: 'healed', via: 'value-snapshot', label, ref }; }
+  }
+  // 4. coords: a recorded selector that exists but wasn't "visible" — click its box center
+  for (const c of [...xpaths, ...csses]) {
+    const r = browse(['get', 'box', c.sel]);
+    const box = r.json;
+    if (box && typeof box.x === 'number') {
+      browse(['mouse', 'click', String(Math.round(box.x)), String(Math.round(box.y))]);
+      return { status: 'healed', via: 'coords', selector: c.sel };
+    }
+  }
+  return { status: 'failed', reason: 'unresolvable after heal' };
+}
+
+// --- run --------------------------------------------------------------------
+const sleep = (ms) => Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
+const results = [];
+let i = 0, ctx = { lastValue: null };
+
+for (const step of rec.steps) {
+  i++;
+  try {
+    if (step.type === 'navigate') {
+      const r = browse(['open', step.url]);
+      results.push({ step: i, type: 'navigate', status: r.ok ? 'ok' : 'error', url: step.url });
+    } else if (step.type === 'scroll') {
+      browse(['eval', `window.scrollBy(${step.x || 0}, ${step.y || 0}); 'ok'`]);
+      results.push({ step: i, type: 'scroll', status: 'ok' });
+    } else if (step.type === 'keyDown' || step.type === 'keyUp') {
+      if (step.type === 'keyDown') browse(['key', step.key]);
+      results.push({ step: i, type: step.type, status: 'ok', key: step.key });
+    } else if (step.type === 'click' || step.type === 'change') {
+      results.push({ step: i, type: step.type, ...doStep(step, ctx) });
+      if (step.type === 'change' && step.value) ctx.lastValue = step.value;
+    } else {
+      results.push({ step: i, type: step.type, status: 'skipped' });
+    }
+  } catch (e) {
+    results.push({ step: i, type: step.type, status: 'error', error: String(e).slice(0, 200) });
+  }
+  // autocomplete/menus need a beat to render before the next step snapshots
+  sleep(step.type === 'change' ? 900 : 400);
+  browse(['screenshot', '-p', `${SHOTS}/step-${String(i).padStart(2, '0')}-${step.type}.png`]);
+}
+
+// best-effort live-view link (newest running session is the one we just drove)
+let liveView = null;
+try {
+  const ls = browse(['cloud', 'sessions', 'list', '--status', 'RUNNING', '--json'], { json: false });
+  const id = JSON.parse(ls.raw.slice(ls.raw.indexOf('['), ls.raw.lastIndexOf(']') + 1))[0]?.id;
+  if (id) {
+    const dbg = browse(['cloud', 'sessions', 'debug', id]);
+    liveView = dbg.json?.debuggerFullscreenUrl || null;
+  }
+} catch { /* live view is optional */ }
+
+try { browse(['stop'], { json: false }); } catch { /* daemon may already be gone */ }
+
+const ok = results.filter((r) => r.status === 'ok' || r.status === 'healed').length;
+console.log(JSON.stringify({ file: FILE, session: SESSION, liveView, screenshots: SHOTS, results }, null, 2));
+console.log(`\nReplay: ${ok}/${results.length} steps succeeded. Screenshots -> ${SHOTS}`);

From ba0ce1eead20e2e11ae3032dac8624163cac2d58 Mon Sep 17 00:00:00 2001
From: shubh24 <shubhankar24@gmail.com>
Date: Sat, 27 Jun 2026 00:19:43 -0700
Subject: [PATCH 2/3] Rework record-and-replay around an intent-distilling
 teacher agent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shift from deterministic selector-replay to "capture wide, reason narrow":

- inject.js: capture each step's accessible name + role (ungated), so an
  autocomplete suggestion ("New York") is recorded even when its only
  selector is a dynamic id — this is the intent signal.
- record.mjs: per-step screenshots (intent evidence + replay oracle) and an
  RR_CONNECT_URL attach mode so the recorder can join a browser-trace
  keep-alive session and share the full CDP firehose.
- Distillation is now an agent, not a script (removed distill.mjs). The
  teacher agent reads the interaction stream + screenshots + trace and
  reconstructs intent — collapsing self-corrections, dropping abandoned
  actions, parameterizing inputs — then authors a task skill. See
  references/distill.md.
- SKILL.md rewritten around record -> trace -> distill -> task skill;
  deterministic replay.mjs demoted to an optional CI fast path.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md                                     |   2 +-
 skills/record-and-replay/SKILL.md             | 143 ++++++++++--------
 skills/record-and-replay/evals/evals.json     |  16 +-
 .../record-and-replay/references/distill.md   |  59 ++++++++
 skills/record-and-replay/scripts/inject.js    |  22 ++-
 skills/record-and-replay/scripts/record.mjs   |  47 ++++--
 6 files changed, 212 insertions(+), 77 deletions(-)
 create mode 100644 skills/record-and-replay/references/distill.md

diff --git a/README.md b/README.md
index ae71d95a..05c6cd1f 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ This plugin includes the following skills (see `skills/` for details):
 | [company-research](skills/company-research/SKILL.md) | Discover target companies matching your ICP using the Browserbase Search API, deep-research each one, and score fit into a research report and CSV |
 | [event-prospecting](skills/event-prospecting/SKILL.md) | Extract speakers from a conference page, filter their companies against your ICP, and deep-research the best-fit people into a person-first prospecting report |
 | [competitor-analysis](skills/competitor-analysis/SKILL.md) | Auto-discover a company's competitors via the Browserbase Search API, deep-research each across marketing, signal, benchmark, and strategic-diff lanes, and compile a browsable HTML report with an overview, per-competitor deep dives, a feature/pricing matrix, and a mentions feed |
-| [record-and-replay](skills/record-and-replay/SKILL.md) | Record a human browser flow on a Browserbase cloud session and replay it deterministically through the `browse` CLI with accessibility-snapshot selector healing — "show, don't prompt" a bug or turn a flow into a self-healing test |
+| [record-and-replay](skills/record-and-replay/SKILL.md) | Record a human browser flow (clicks, typing, screenshots, full CDP trace) on a Browserbase session, then let an agent distill what the human *meant* — collapsing corrections, dropping abandoned actions — into a reusable, parameterized task skill that replays against the live page |
 
 ## Installation
 
diff --git a/skills/record-and-replay/SKILL.md b/skills/record-and-replay/SKILL.md
index b5e444e1..1f9d5de2 100644
--- a/skills/record-and-replay/SKILL.md
+++ b/skills/record-and-replay/SKILL.md
@@ -1,97 +1,122 @@
 ---
 name: record-and-replay
-description: Record a human browser flow on a Browserbase session and replay it as a self-healing test. Use when you want to "show, don't prompt" a bug or workflow — capture clicks/typing/scrolls in a live cloud browser, save them as a Chrome DevTools Recorder file, then re-run them (with optional healing) to verify a flow still works. Triggers on "record this flow", "replay the recording", "record and replay", "turn this into a browser test".
-compatibility: "Requires Node 18+ and the browse CLI (`npm install -g browse`). Cloud sessions need `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID`. Replay is zero-dependency (it drives the browse CLI); record uses `@browserbasehq/sdk` + `playwright-core`, so run `npm install` in this skill dir first."
+description: Turn a recorded human browser flow into a reusable, parameterized task skill. Capture clicks/typing/screenshots plus a full CDP trace on a Browserbase session, then let an agent distill what the human *meant* (collapsing corrections, dropping abandoned actions) into an intent-level SKILL.md that replays against the live page. Use for "show, don't prompt" — record a flow once and reuse it. Triggers on "record this flow", "turn this into a skill", "record and replay", "replay the recording".
+compatibility: "Requires Node 18+ and the browse CLI (`npm install -g browse`), plus `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID`. Record uses `@browserbasehq/sdk` + `playwright-core` — run `npm install` in this skill dir. Pairs with the `browser-trace` skill for the full CDP firehose."
 license: MIT
-allowed-tools: Bash, Read
+allowed-tools: Bash, Read, Grep
 ---
 
 # Record & Replay
 
-"Show the bug instead of prompting it." Capture a human browser flow, get back a
-portable recording, then replay it deterministically with selector healing.
-Sessions run on Browserbase cloud browsers with a live, interactive view.
-
-## Fundamental logic
-
-- **Record** — attach to a Browserbase session over CDP, inject a listener that
-  captures each human action as a *semantic step* (a priority-ordered list of
-  selectors: `aria/Name` → `text/Label` → CSS path → XPath — plus value and
-  timestamp). The fallback list IS the healing.
-- **Replay** — drive every step through deterministic `browse` CLI subcommands,
-  resolving each step highest-confidence-first:
-  1. **semantic** — a recorded `aria/` or `text/` selector matched to a live
-     accessibility-snapshot ref (survives dynamic-id churn, e.g. Google's `#cNNN`),
-  2. **recorded** — the recorded XPath, then CSS, each *verified* with
-     `browse get visible` before acting (`browse click` reports success even on a
-     no-match, so a selector is never trusted blind),
-  3. **heal** (`RR_HEAL=1`) — match the step's typed value (or the value typed
-     just before) to a snapshot ref; this is what rescues unlabeled autocomplete
-     picks whose only recorded selector was a dynamic id,
-  4. **coords** (`RR_HEAL=1`) — last resort: `browse get box` a recorded selector
-     and click its center.
-
-Output is **Chrome DevTools Recorder** compatible, so recordings are also runnable
-by `@puppeteer/replay`.
-
-## Setup (once)
+"Show the bug instead of prompting it." Record a human flow once, then turn it
+into a **reusable task skill** an agent can replay — and parameterize — against
+the live page.
 
-```bash
-cd skills/record-and-replay && npm install   # only needed for `record`
-npm install -g browse                         # the replay/record driver CLI
-export BROWSERBASE_API_KEY=...  BROWSERBASE_PROJECT_ID=...
+The pipeline is **capture wide, reason narrow**:
+
+```
+record (interaction stream + screenshots)              ← semantic spine
+  + browser-trace (CDP firehose: network/console/DOM)  ← full observability
+  → distill = teacher agent reasons about INTENT       ← collapses corrections,
+  → skills/<task>/SKILL.md                                drops abandoned actions
 ```
 
-## Record
+The key idea: a recording is **mechanics** ("typed 'new yo', clicked `#c307`").
+What you want is **intent** ("destination = New York"). Recovering intent —
+including spotting that the user typed San Francisco, erased it, and chose Los
+Angeles, or applied a filter then removed it — is a judgment, so the distiller is
+**an agent, not a script** (see `references/distill.md`).
+
+## 1. Capture
+
+Record produces the **semantic spine**: each click/type with the acted element's
+accessible `name` + `role` + committed value, plus a screenshot per step.
 
 ```bash
 RR_URL="https://www.saucedemo.com" RR_OUT=/tmp/rec.json RR_TITLE="login flow" \
   node --env-file=.env scripts/record.mjs
 ```
 
-1. The script prints a **live view URL** — open it and perform the flow (click, type, scroll).
-2. Stop and save by pressing **ENTER** (interactive), creating the stop file
-   (`touch /tmp/rr-stop`, lets an agent stop it conversationally), or setting
-   `RR_SECONDS=30` to auto-stop.
-3. A recording is written to `RR_OUT`.
+Open the printed **live view URL**, perform the flow, then stop with ENTER,
+`touch /tmp/rr-stop`, or `RR_SECONDS=30`. Output: `RR_OUT` + `<RR_OUT>-shots/`.
+
+**For full observability**, attach `browser-trace` so the teacher agent can also
+query network/console/DOM. Create one keep-alive session, point both at it:
+
+```bash
+node ../browser-trace/scripts/bb-capture.mjs --new myflow   # session + CDP firehose
+SID=$(jq -r .browserbase.session_id .o11y/myflow/manifest.json)
+CONNECT_URL=$(browse cloud sessions get "$SID" | jq -r .connectUrl)
+RR_CONNECT_URL="$CONNECT_URL" RR_URL="https://site.com" RR_OUT=/tmp/rec.json \
+  node --env-file=.env scripts/record.mjs                   # attaches to same session
+# after stopping the recording:
+node ../browser-trace/scripts/stop-capture.mjs myflow && node ../browser-trace/scripts/bisect-cdp.mjs myflow
+```
 
 | Var | Default | Meaning |
 |-----|---------|---------|
 | `RR_URL` | `https://example.com` | start URL |
 | `RR_OUT` | `/tmp/recording-<ts>.json` | output recording path |
-| `RR_TITLE` | `Recorded flow` | recording title |
-| `RR_STOP` | `/tmp/rr-stop` | create this file to stop recording |
-| `RR_SECONDS` | _(none)_ | auto-stop after N seconds instead of ENTER |
+| `RR_CONNECT_URL` | _(none)_ | attach to an existing session (e.g. browser-trace's) instead of creating one |
+| `RR_TITLE` / `RR_STOP` / `RR_SECONDS` | — | title / stop-file / auto-stop |
 
-## Replay
+## 2. Distill (the agent does this)
 
-```bash
-RR_FILE=/tmp/rec.json RR_HEAL=1 \
-  node --env-file=.env scripts/replay.mjs
+Read `references/distill.md`, then **act as the teacher agent**: read
+`recording.json` + the screenshots, query the `browser-trace` buckets as needed,
+and reconstruct the *smallest set of intents that explains the session* —
+collapsing corrections, dropping abandoned/undone actions, parameterizing the
+values the user supplied. Write the result as `skills/<task>/SKILL.md`.
+
+Each step's headline is the value the field **committed to** (the acted element's
+`name`), never the keystrokes or a dynamic selector. The committed value is also
+the step's verification check.
+
+### Task skill shape
+
+```markdown
+---
+name: <task>
+description: <what it does + when to fire, with triggers>
+license: MIT
+---
+# <Task>
+Realize each intent against the live UI (don't replay keystrokes). Verify each.
+
+Inputs: origin, destination, depart, return
+1. Set "Where from?" = {origin}.   ✅ field reads {origin}
+2. Set "Where to?"   = {destination}.   ✅ reads {destination}, not "Anywhere"
+3. Set dates = {depart}/{return}; confirm Done.
+4. Click "Search".   ✅ results for {origin}→{destination} load.
+
+Fallback: recording.json (mechanics) · <recording>-shots/step-NN.png (oracle)
 ```
 
-Prints a per-step pass/fail report (with the resolution path used per step:
-`semantic` / `xpath` / `css` / `healed`), a best-effort live-view URL, and saves a
-screenshot of every step to `RR_SHOTS`.
+## 3. Replay
 
-| Var | Default | Meaning |
-|-----|---------|---------|
-| `RR_FILE` | _(required)_ | recording to replay |
-| `RR_HEAL` | `0` | `1` = snapshot-ref + coordinate healing on selector miss |
-| `RR_SHOTS` | `/tmp/replay-<ts>` | screenshot output dir |
-| `RR_SESSION` | `rr-<ts>` | browse CLI session name |
+Replay = **invoke the generated task skill**: the agent realizes each intent via
+`browse`, verifying against committed values / step screenshots. Because it
+replays *intent*, it survives dynamic-id churn and minor layout change.
+
+A deterministic fast path is also available for cheap, reproducible CI checks
+(it executes the recorded selectors directly, with snapshot-ref healing):
+
+```bash
+RR_FILE=/tmp/rec.json RR_HEAL=1 node --env-file=.env scripts/replay.mjs
+```
 
 ## Recording shape
 
 ```json
 {
   "title": "login flow",
-  "source": "browserbase-record-replay",
   "startUrl": "https://www.saucedemo.com",
+  "shots": "/tmp/rec-shots",
   "steps": [
     { "type": "navigate", "url": "https://www.saucedemo.com" },
-    { "type": "change", "selectors": [["aria/Username"], ["#user-name"]], "value": "standard_user" },
-    { "type": "click",  "selectors": [["text/Login"], ["#login-button"]] }
+    { "type": "change", "name": "Username", "role": "textbox", "value": "standard_user",
+      "selectors": [["aria/Username"], ["#user-name"]], "screenshot": "/tmp/rec-shots/step-02.png" },
+    { "type": "click", "name": "Login", "role": "button", "selectors": [["text/Login"], ["#login-button"]] }
   ]
 }
 ```
diff --git a/skills/record-and-replay/evals/evals.json b/skills/record-and-replay/evals/evals.json
index da40b36a..bb6af04b 100644
--- a/skills/record-and-replay/evals/evals.json
+++ b/skills/record-and-replay/evals/evals.json
@@ -2,16 +2,20 @@
   "skill": "record-and-replay",
   "evals": [
     {
-      "prompt": "Record my login flow on https://www.saucedemo.com so I can replay it later.",
-      "expected": "Runs scripts/record.mjs with RR_URL set to the site, surfaces the live-view URL for the user to perform the flow, and saves a Chrome DevTools Recorder JSON when the user stops."
+      "prompt": "Record my flight search on Google Flights and turn it into a reusable skill.",
+      "expected": "Runs scripts/record.mjs (ideally attached to a browser-trace session via RR_CONNECT_URL), surfaces the live-view URL for the user to perform the flow, and saves the interaction stream + per-step screenshots."
     },
     {
-      "prompt": "Replay /tmp/rec.json and tell me which steps passed.",
-      "expected": "Runs scripts/replay.mjs with RR_FILE=/tmp/rec.json and RR_HEAL=1, then reports the per-step pass/fail scorecard including the resolution path (semantic/xpath/css/healed) and the screenshots directory."
+      "prompt": "Now distill that recording into a task skill.",
+      "expected": "Acts as the teacher agent per references/distill.md: reads recording.json + screenshots, queries the CDP trace as needed, recovers intent (committed values, not keystrokes), and writes a parameterized skills/<task>/SKILL.md."
     },
     {
-      "prompt": "The recorded flow stopped selecting the right autocomplete option on replay — can you make it self-heal?",
-      "expected": "Explains that replay heals selector misses by matching the typed value to a live accessibility-snapshot ref (RR_HEAL=1), which rescues autocomplete picks recorded only via dynamic ids, and re-runs the replay to verify."
+      "prompt": "I typed San Francisco, deleted it, then typed Los Angeles — make sure the skill only does Los Angeles.",
+      "expected": "Explains that the teacher-agent distiller collapses self-corrections and drops abandoned actions, so the emitted skill sets origin = Los Angeles only; the intermediate San Francisco state is omitted as noise."
+    },
+    {
+      "prompt": "Replay /tmp/rec.json deterministically and tell me which steps passed.",
+      "expected": "Runs the optional fast path scripts/replay.mjs with RR_HEAL=1, reporting the per-step pass/fail scorecard (resolution path: semantic/xpath/css/healed) and the screenshots directory."
     }
   ]
 }
diff --git a/skills/record-and-replay/references/distill.md b/skills/record-and-replay/references/distill.md
new file mode 100644
index 00000000..bca8b83b
--- /dev/null
+++ b/skills/record-and-replay/references/distill.md
@@ -0,0 +1,59 @@
+# Distill: the teacher agent
+
+The distiller is **an agent, not a script.** Reconstructing what a human *meant*
+from what they *did* is a goal-level judgment — collapsing self-corrections,
+dropping abandoned actions, parameterizing variables — and no deterministic rule
+can do it. This is the same shape as the `autobrowse` teacher loop: there the
+outer agent reads its own run's trace and improves a skill; here it reads a
+*human's* trace and authors one.
+
+## Inputs (capture wide, read selectively)
+
+Give the agent everything the session produced, but let it **query** the trace
+rather than dumping the firehose into context (that's what the bisected buckets
+are for — progressive disclosure):
+
+| source | what it carries | how to read it |
+|--------|-----------------|----------------|
+| `recording.json` | semantic spine: each click/type with the acted element's `name` + `role` + value | read in full (it's small) |
+| `<recording>-shots/step-NN.png` | what the page looked like at each commit | read the ones you need to disambiguate intent |
+| `browser-trace` buckets (`.o11y/<run>/cdp/by-bucket/`) | network, console, DOM dumps, exact event timing | `grep`/`jq`/`query.mjs` on demand — e.g. to confirm a click triggered a request, or that a value committed |
+
+## The job
+
+Produce the **smallest set of intents that explains the session**, then write a
+parameterized task skill. Specifically:
+
+1. **Recover intent, not mechanics.** A step's headline is the value the field
+   *committed to* — read from the acted element's `name` (e.g. the autocomplete
+   suggestion "New York"), not the keystrokes ("new yo") or the dynamic selector
+   (`#c307`).
+2. **Collapse self-corrections.** Typed "San Francisco", cleared it, typed "Los
+   Angeles" → one intent: `origin = Los Angeles`. The intermediate states are noise.
+3. **Drop abandoned actions.** Applied a "window seat" filter then removed it →
+   net zero, omit it entirely. Same for opened-then-closed menus, mis-clicks.
+4. **Parameterize.** The values the user supplied (cities, dates, search terms)
+   become inputs with the recorded value as the example. Structural choices
+   (which button submits) stay fixed.
+5. **Attach a check per step.** The committed value *is* the assertion ("the field
+   reads New York"); for steps with no readable value, point at the step screenshot.
+
+## Output
+
+Write `skills/<task-name>/`:
+- `SKILL.md` — intent-level, parameterized, with per-step verification (see the
+  shape in the parent SKILL.md).
+- `recording.json` + `<recording>-shots/` — carried along as the mechanics
+  fallback and the visual oracle.
+
+## Teacher prompt (starting point)
+
+> You are distilling a recorded browser session into a reusable task skill. You
+> have `recording.json` (semantic click/type stream with element names), the
+> per-step screenshots, and a queryable `browser-trace` (network/console/DOM).
+> Figure out what the human was *trying to accomplish* — not the literal keystrokes.
+> Collapse corrections, drop abandoned/undone actions, and identify which values
+> were user inputs (parameterize them). Emit a parameterized SKILL.md whose steps
+> are intents with a verification check each. When a step is ambiguous, look at its
+> screenshot and query the trace before deciding. Prefer the fewest steps that
+> reliably reproduce the goal.
diff --git a/skills/record-and-replay/scripts/inject.js b/skills/record-and-replay/scripts/inject.js
index f7503c3e..b9586de0 100644
--- a/skills/record-and-replay/scripts/inject.js
+++ b/skills/record-and-replay/scripts/inject.js
@@ -50,6 +50,24 @@
     return (g('aria-label') || g('placeholder') || g('name') || g('title') || '').trim();
   }
 
+  // The INTENT signal: the human-meaningful name of what was acted on, recovered
+  // ungated (not limited to certain tags) so an autocomplete suggestion ("New
+  // York") is captured even when its only selector is a dynamic id. Priority:
+  // explicit aria > labelledby > placeholder/title/alt > value > visible text.
+  function nameOf(el) {
+    const g = (a) => (el.getAttribute && el.getAttribute(a)) || '';
+    let lbl = '';
+    const lb = g('aria-labelledby');
+    if (lb) lbl = lb.split(/\s+/).map((id) => (document.getElementById(id) || {}).innerText || '').join(' ').trim();
+    const text = (el.innerText || el.textContent || '').replace(/\s+/g, ' ').trim();
+    const cand = g('aria-label') || lbl || g('placeholder') || g('title') || g('alt')
+      || (el.tagName === 'INPUT' ? el.value : '') || text;
+    return (cand || '').slice(0, 120);
+  }
+  function roleOf(el) {
+    return ((el.getAttribute && el.getAttribute('role')) || el.tagName || '').toLowerCase();
+  }
+
   // Chrome DevTools Recorder format: selectors is an array of selector-groups,
   // tried in priority order during replay. This list IS the healing.
   function selectorsFor(el) {
@@ -69,7 +87,7 @@
   document.addEventListener('click', (e) => {
     const el = e.target;
     if (!el || el.nodeType !== 1) return;
-    send({ type: 'click', selectors: selectorsFor(el), url: location.href, ts: now() });
+    send({ type: 'click', name: nameOf(el), role: roleOf(el), selectors: selectorsFor(el), url: location.href, ts: now() });
   }, true);
 
   // 'change' fires on commit/blur -> captures the final field value, not keystrokes.
@@ -77,7 +95,7 @@
     const el = e.target;
     if (!el || el.nodeType !== 1) return;
     const value = ('value' in el) ? el.value : '';
-    send({ type: 'change', selectors: selectorsFor(el), value, url: location.href, ts: now() });
+    send({ type: 'change', name: nameOf(el), role: roleOf(el), selectors: selectorsFor(el), value, url: location.href, ts: now() });
   }, true);
 
   document.addEventListener('keydown', (e) => {
diff --git a/skills/record-and-replay/scripts/record.mjs b/skills/record-and-replay/scripts/record.mjs
index fa878a04..f59dbad5 100644
--- a/skills/record-and-replay/scripts/record.mjs
+++ b/skills/record-and-replay/scripts/record.mjs
@@ -11,21 +11,33 @@
 // page breaks page.exposeBinding, which is how we ship captured events to Node.
 import Browserbase from '@browserbasehq/sdk';
 import { chromium } from 'playwright-core';
-import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'node:fs';
+import { readFileSync, writeFileSync, existsSync, unlinkSync, mkdirSync } from 'node:fs';
 import readline from 'node:readline';
 
 const START_URL = process.env.RR_URL || 'https://example.com';
 const OUT = process.env.RR_OUT || `/tmp/recording-${Date.now()}.json`;
 const TITLE = process.env.RR_TITLE || 'Recorded flow';
+// per-step screenshots live beside the recording: <out>-shots/step-NN.png. They
+// are the visual record of intent (what the page looked like at each commit) and
+// double as the replay oracle.
+const SHOTS = process.env.RR_SHOTS || OUT.replace(/\.json$/, '') + '-shots';
+mkdirSync(SHOTS, { recursive: true });
 const SECONDS = process.env.RR_SECONDS ? parseInt(process.env.RR_SECONDS, 10) : null;
 // stop the recording by creating this file (lets the agent stop it conversationally)
 const STOP_FILE = process.env.RR_STOP || '/tmp/rr-stop';
 try { unlinkSync(STOP_FILE); } catch (_) {}
 const inject = readFileSync(new URL('./inject.js', import.meta.url), 'utf8');
 
+// Two modes:
+//  - attach (RR_CONNECT_URL set): join a session someone else owns — e.g. the
+//    keep-alive session browser-trace's `bb-capture.mjs --new` created — so the
+//    CDP firehose and this semantic interaction stream observe the SAME session.
+//  - create (default): spin up our own session (standalone, no trace).
 const bb = new Browserbase({ apiKey: process.env.BROWSERBASE_API_KEY });
-const session = await bb.sessions.create({ projectId: process.env.BROWSERBASE_PROJECT_ID });
-const browser = await chromium.connectOverCDP(session.connectUrl);
+let session = null;
+const connectUrl = process.env.RR_CONNECT_URL
+  || (session = await bb.sessions.create({ projectId: process.env.BROWSERBASE_PROJECT_ID })).connectUrl;
+const browser = await chromium.connectOverCDP(connectUrl);
 const context = browser.contexts()[0];
 const page = context.pages()[0] ?? (await context.newPage());
 
@@ -46,18 +58,31 @@ async function drain() {
         try { localStorage.removeItem('__rr_buf'); } catch (_) {}
         return e;
       });
-      if (evs && evs.length) events.push(...evs);
+      if (evs && evs.length) {
+        events.push(...evs);
+        // screenshot the page that just produced events; attach to the last one
+        try {
+          const shot = `${SHOTS}/step-${String(events.length).padStart(2, '0')}.png`;
+          await p.screenshot({ path: shot });
+          events[events.length - 1].screenshot = shot;
+        } catch (_) { /* page mid-navigation; skip this shot */ }
+      }
     } catch (_) { /* context navigating; next tick */ }
   }
 }
 const poll = setInterval(drain, 600);
 
 let liveUrl = 'https://www.browserbase.com/sessions';
-try {
-  const dbg = await bb.sessions.debug(session.id);
-  liveUrl = dbg.debuggerFullscreenUrl || dbg.debuggerUrl || liveUrl;
-} catch (e) {
-  console.error('(could not fetch live view url:', String(e).slice(0, 120), ')');
+if (session) {
+  try {
+    const dbg = await bb.sessions.debug(session.id);
+    liveUrl = dbg.debuggerFullscreenUrl || dbg.debuggerUrl || liveUrl;
+  } catch (e) {
+    console.error('(could not fetch live view url:', String(e).slice(0, 120), ')');
+  }
+} else {
+  // attach mode: bb-capture already printed the live debugger URL for this session
+  liveUrl = process.env.RR_LIVE_URL || '(use the live URL bb-capture printed)';
 }
 
 console.log('\n=== RECORDING ===');
@@ -86,9 +111,13 @@ const recording = {
   title: TITLE,
   source: 'browserbase-record-replay',          // Chrome DevTools Recorder compatible
   startUrl: START_URL,
+  shots: SHOTS,
   steps: [{ type: 'navigate', url: START_URL }, ...events],
 };
 writeFileSync(OUT, JSON.stringify(recording, null, 2));
 console.log(`\nSaved ${events.length} interaction step(s) -> ${OUT}`);
+console.log(`Screenshots -> ${SHOTS}`);
 
+// detach our CDP client; in attach mode we leave the session running for the
+// trace owner (bb-capture) to stop and finalize.
 await browser.close();

From 266b6c33c9abf65334f0fac2df5f3fb9c48dfe22 Mon Sep 17 00:00:00 2001
From: shubh24 <shubhankar24@gmail.com>
Date: Sat, 27 Jun 2026 00:43:49 -0700
Subject: [PATCH 3/3] Rename record-and-replay -> browser-record; drop
 deterministic replay

- Rename the skill to browser-record (it's a recorder that emits a task
  skill; "replay" is now just invoking that skill).
- Remove replay.mjs: replay is agentic (invoke the generated skill), so the
  deterministic engine is no longer part of the product.
- Generated task skills now bundle a curated screenshots/ folder (the visual
  oracle) referenced per step, and each step names its recorded target
  (accessible name/role) as a hint while granting the agent agency to use
  whatever live element achieves the intent.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md                                     |   2 +-
 .../LICENSE.txt                               |   0
 .../SKILL.md                                  |  54 ++---
 .../evals/evals.json                          |   8 +-
 .../package.json                              |   2 +-
 .../references/distill.md                     |  11 +-
 .../scripts/inject.js                         |   0
 .../scripts/record.mjs                        |   0
 skills/record-and-replay/scripts/replay.mjs   | 187 ------------------
 9 files changed, 42 insertions(+), 222 deletions(-)
 rename skills/{record-and-replay => browser-record}/LICENSE.txt (100%)
 rename skills/{record-and-replay => browser-record}/SKILL.md (63%)
 rename skills/{record-and-replay => browser-record}/evals/evals.json (71%)
 rename skills/{record-and-replay => browser-record}/package.json (83%)
 rename skills/{record-and-replay => browser-record}/references/distill.md (85%)
 rename skills/{record-and-replay => browser-record}/scripts/inject.js (100%)
 rename skills/{record-and-replay => browser-record}/scripts/record.mjs (100%)
 delete mode 100644 skills/record-and-replay/scripts/replay.mjs

diff --git a/README.md b/README.md
index 05c6cd1f..62adde34 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ This plugin includes the following skills (see `skills/` for details):
 | [company-research](skills/company-research/SKILL.md) | Discover target companies matching your ICP using the Browserbase Search API, deep-research each one, and score fit into a research report and CSV |
 | [event-prospecting](skills/event-prospecting/SKILL.md) | Extract speakers from a conference page, filter their companies against your ICP, and deep-research the best-fit people into a person-first prospecting report |
 | [competitor-analysis](skills/competitor-analysis/SKILL.md) | Auto-discover a company's competitors via the Browserbase Search API, deep-research each across marketing, signal, benchmark, and strategic-diff lanes, and compile a browsable HTML report with an overview, per-competitor deep dives, a feature/pricing matrix, and a mentions feed |
-| [record-and-replay](skills/record-and-replay/SKILL.md) | Record a human browser flow (clicks, typing, screenshots, full CDP trace) on a Browserbase session, then let an agent distill what the human *meant* — collapsing corrections, dropping abandoned actions — into a reusable, parameterized task skill that replays against the live page |
+| [browser-record](skills/browser-record/SKILL.md) | Record a human browser flow (clicks, typing, screenshots, full CDP trace) on a Browserbase session, then let an agent distill what the human *meant* — collapsing corrections, dropping abandoned actions — into a reusable, parameterized task skill that replays against the live page |
 
 ## Installation
 
diff --git a/skills/record-and-replay/LICENSE.txt b/skills/browser-record/LICENSE.txt
similarity index 100%
rename from skills/record-and-replay/LICENSE.txt
rename to skills/browser-record/LICENSE.txt
diff --git a/skills/record-and-replay/SKILL.md b/skills/browser-record/SKILL.md
similarity index 63%
rename from skills/record-and-replay/SKILL.md
rename to skills/browser-record/SKILL.md
index 1f9d5de2..33fcadef 100644
--- a/skills/record-and-replay/SKILL.md
+++ b/skills/browser-record/SKILL.md
@@ -1,16 +1,16 @@
 ---
-name: record-and-replay
-description: Turn a recorded human browser flow into a reusable, parameterized task skill. Capture clicks/typing/screenshots plus a full CDP trace on a Browserbase session, then let an agent distill what the human *meant* (collapsing corrections, dropping abandoned actions) into an intent-level SKILL.md that replays against the live page. Use for "show, don't prompt" — record a flow once and reuse it. Triggers on "record this flow", "turn this into a skill", "record and replay", "replay the recording".
+name: browser-record
+description: Record a human browser flow on a Browserbase session and distill it into a reusable, parameterized task skill. Captures clicks/typing/screenshots (plus an optional full CDP trace), then an agent reasons about what the human *meant* — collapsing corrections, dropping abandoned actions — and writes an intent-level SKILL.md that replays against the live page. Use for "show, don't prompt": record a flow once and turn it into a skill. Triggers on "record this flow", "turn this into a skill", "record a browser workflow", "browser record".
 compatibility: "Requires Node 18+ and the browse CLI (`npm install -g browse`), plus `BROWSERBASE_API_KEY` and `BROWSERBASE_PROJECT_ID`. Record uses `@browserbasehq/sdk` + `playwright-core` — run `npm install` in this skill dir. Pairs with the `browser-trace` skill for the full CDP firehose."
 license: MIT
 allowed-tools: Bash, Read, Grep
 ---
 
-# Record & Replay
+# Browser Record
 
 "Show the bug instead of prompting it." Record a human flow once, then turn it
-into a **reusable task skill** an agent can replay — and parameterize — against
-the live page.
+into a **reusable, parameterized task skill** an agent can replay against the live
+page.
 
 The pipeline is **capture wide, reason narrow**:
 
@@ -66,14 +66,26 @@ Read `references/distill.md`, then **act as the teacher agent**: read
 `recording.json` + the screenshots, query the `browser-trace` buckets as needed,
 and reconstruct the *smallest set of intents that explains the session* —
 collapsing corrections, dropping abandoned/undone actions, parameterizing the
-values the user supplied. Write the result as `skills/<task>/SKILL.md`.
+values the user supplied. Write the result as `skills/<task>/`.
 
 Each step's headline is the value the field **committed to** (the acted element's
 `name`), never the keystrokes or a dynamic selector. The committed value is also
 the step's verification check.
 
+### What the generated task skill must contain
+
+- `SKILL.md` — intent steps (shape below).
+- `screenshots/NN-<label>.png` — the committed-state shot for each intent step,
+  curated from the recording and referenced per step. This is the visual oracle.
+- `recording.json` — the raw mechanics, last-resort fallback only.
+
 ### Task skill shape
 
+Each step states the **intent**, names the **recorded target** (the element's
+accessible name/role, and its selector if useful) as a *hint*, explicitly grants
+the agent **agency to use whatever element achieves the intent**, points at the
+screenshot, and gives a verification check.
+
 ```markdown
 ---
 name: <task>
@@ -81,29 +93,25 @@ description: <what it does + when to fire, with triggers>
 license: MIT
 ---
 # <Task>
-Realize each intent against the live UI (don't replay keystrokes). Verify each.
+Realize each intent against the live UI — do NOT replay keystrokes or dynamic
+selectors. The "recorded target" is a hint; if the live page differs, use any
+element that achieves the intent. Verify each step.
 
-Inputs: origin, destination, depart, return
-1. Set "Where from?" = {origin}.   ✅ field reads {origin}
-2. Set "Where to?"   = {destination}.   ✅ reads {destination}, not "Anywhere"
-3. Set dates = {depart}/{return}; confirm Done.
-4. Click "Search".   ✅ results for {origin}→{destination} load.
+Inputs: origin, destination, depart
 
-Fallback: recording.json (mechanics) · <recording>-shots/step-NN.png (oracle)
+1. Set destination = {destination}.
+   Recorded target: combobox "Where to?" (aria/Where to?) → suggestion option.
+   See screenshots/03-destination.png · ✅ field reads {destination}, not "Anywhere".
+...
+Fallback: screenshots/ (oracle) · recording.json (raw mechanics, last resort)
 ```
 
 ## 3. Replay
 
-Replay = **invoke the generated task skill**: the agent realizes each intent via
-`browse`, verifying against committed values / step screenshots. Because it
-replays *intent*, it survives dynamic-id churn and minor layout change.
-
-A deterministic fast path is also available for cheap, reproducible CI checks
-(it executes the recorded selectors directly, with snapshot-ref healing):
-
-```bash
-RR_FILE=/tmp/rec.json RR_HEAL=1 node --env-file=.env scripts/replay.mjs
-```
+Replay = **invoke the generated task skill** like any skill (a natural-language
+request that matches its triggers). The agent realizes each intent via `browse`,
+using the per-step screenshots as the oracle and verifying committed values.
+Because it replays *intent*, it survives dynamic-id churn and minor layout change.
 
 ## Recording shape
 
diff --git a/skills/record-and-replay/evals/evals.json b/skills/browser-record/evals/evals.json
similarity index 71%
rename from skills/record-and-replay/evals/evals.json
rename to skills/browser-record/evals/evals.json
index bb6af04b..5d27e866 100644
--- a/skills/record-and-replay/evals/evals.json
+++ b/skills/browser-record/evals/evals.json
@@ -1,5 +1,5 @@
 {
-  "skill": "record-and-replay",
+  "skill": "browser-record",
   "evals": [
     {
       "prompt": "Record my flight search on Google Flights and turn it into a reusable skill.",
@@ -7,15 +7,11 @@
     },
     {
       "prompt": "Now distill that recording into a task skill.",
-      "expected": "Acts as the teacher agent per references/distill.md: reads recording.json + screenshots, queries the CDP trace as needed, recovers intent (committed values, not keystrokes), and writes a parameterized skills/<task>/SKILL.md."
+      "expected": "Acts as the teacher agent per references/distill.md: reads recording.json + screenshots, queries the CDP trace as needed, recovers intent (committed values, not keystrokes), and writes skills/<task>/ with a parameterized SKILL.md, a curated screenshots/ folder referenced per step, and recording.json as fallback."
     },
     {
       "prompt": "I typed San Francisco, deleted it, then typed Los Angeles — make sure the skill only does Los Angeles.",
       "expected": "Explains that the teacher-agent distiller collapses self-corrections and drops abandoned actions, so the emitted skill sets origin = Los Angeles only; the intermediate San Francisco state is omitted as noise."
-    },
-    {
-      "prompt": "Replay /tmp/rec.json deterministically and tell me which steps passed.",
-      "expected": "Runs the optional fast path scripts/replay.mjs with RR_HEAL=1, reporting the per-step pass/fail scorecard (resolution path: semantic/xpath/css/healed) and the screenshots directory."
     }
   ]
 }
diff --git a/skills/record-and-replay/package.json b/skills/browser-record/package.json
similarity index 83%
rename from skills/record-and-replay/package.json
rename to skills/browser-record/package.json
index e3335deb..e1d1013a 100644
--- a/skills/record-and-replay/package.json
+++ b/skills/browser-record/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "record-and-replay",
+  "name": "browser-record",
   "version": "0.1.0",
   "private": true,
   "type": "module",
diff --git a/skills/record-and-replay/references/distill.md b/skills/browser-record/references/distill.md
similarity index 85%
rename from skills/record-and-replay/references/distill.md
rename to skills/browser-record/references/distill.md
index bca8b83b..d604f8b4 100644
--- a/skills/record-and-replay/references/distill.md
+++ b/skills/browser-record/references/distill.md
@@ -41,10 +41,13 @@ parameterized task skill. Specifically:
 ## Output
 
 Write `skills/<task-name>/`:
-- `SKILL.md` — intent-level, parameterized, with per-step verification (see the
-  shape in the parent SKILL.md).
-- `recording.json` + `<recording>-shots/` — carried along as the mechanics
-  fallback and the visual oracle.
+- `SKILL.md` — intent-level, parameterized, per-step verification. Each step names
+  the **recorded target** (accessible name/role, plus selector if useful) as a
+  *hint*, and explicitly grants the agent agency to use whatever live element
+  achieves the intent — never bind it to a dynamic id.
+- `screenshots/NN-<label>.png` — the committed-state shot for each intent step,
+  curated from the recording and referenced per step. The visual oracle.
+- `recording.json` — the raw mechanics, carried as a last-resort fallback.
 
 ## Teacher prompt (starting point)
 
diff --git a/skills/record-and-replay/scripts/inject.js b/skills/browser-record/scripts/inject.js
similarity index 100%
rename from skills/record-and-replay/scripts/inject.js
rename to skills/browser-record/scripts/inject.js
diff --git a/skills/record-and-replay/scripts/record.mjs b/skills/browser-record/scripts/record.mjs
similarity index 100%
rename from skills/record-and-replay/scripts/record.mjs
rename to skills/browser-record/scripts/record.mjs
diff --git a/skills/record-and-replay/scripts/replay.mjs b/skills/record-and-replay/scripts/replay.mjs
deleted file mode 100644
index cc46a3ac..00000000
--- a/skills/record-and-replay/scripts/replay.mjs
+++ /dev/null
@@ -1,187 +0,0 @@
-// Replay a recorded flow against a fresh Browserbase session using the `browse` CLI.
-//
-// Engine: every step is executed through deterministic `browse` subcommands
-// (open / click / fill / select / key / eval / mouse). Selection is resolved
-// "highest-confidence first":
-//   1. semantic    — recorded aria/ or text/ selector -> matched to a live
-//                    snapshot ref (survives Google-style dynamic id churn)
-//   2. recorded    — recorded xpath, then css, each VERIFIED with `get visible`
-//                    before acting (browse click reports success even on a
-//                    no-match, so we never trust it blind)
-//   3. heal        — snapshot-match the step's value / prior typed value to a
-//                    ref (this is what rescues unlabeled autocomplete picks)
-//   4. coords      — last resort: `get box` a recorded selector, click center
-// Passes 3-4 are gated behind RR_HEAL=1.
-//
-//   RR_FILE=/tmp/rec.json [RR_HEAL=1] [RR_SHOTS=/tmp/shots] \
-//     node --env-file=.env scripts/replay.mjs
-import { execFileSync } from 'node:child_process';
-import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
-
-const FILE = process.env.RR_FILE;
-if (!FILE) { console.error('Set RR_FILE=path/to/recording.json'); process.exit(1); }
-const HEAL = process.env.RR_HEAL === '1';
-const SHOTS = process.env.RR_SHOTS || `/tmp/replay-${Date.now()}`;
-const SESSION = process.env.RR_SESSION || `rr-${Date.now()}`;
-mkdirSync(SHOTS, { recursive: true });
-const rec = JSON.parse(readFileSync(FILE, 'utf8'));
-
-// --- browse CLI wrapper -----------------------------------------------------
-// Runs `browse <args> --remote -s <session>`, strips the npm update banner, and
-// best-effort parses the JSON body. Never throws: a failed/empty call returns
-// { ok:false }, which the resolver treats as "this selector didn't resolve".
-function browse(args, { json = true } = {}) {
-  let raw = '';
-  try {
-    raw = execFileSync('browse', [...args, '--remote', '-s', SESSION], {
-      encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], env: process.env,
-    });
-  } catch (e) {
-    raw = (e.stdout || '') + (e.stderr || '');
-    if (!raw) return { ok: false, raw: String(e).slice(0, 160) };
-  }
-  if (!json) return { ok: true, raw };
-  const a = raw.indexOf('{'), b = raw.lastIndexOf('}');
-  if (a === -1 || b <= a) return { ok: false, raw }; // e.g. "Could not find an element..."
-  try { return { ok: true, json: JSON.parse(raw.slice(a, b + 1)), raw }; }
-  catch { return { ok: false, raw }; }
-}
-
-// --- selector handling ------------------------------------------------------
-function classify(sel) {
-  if (sel.startsWith('aria/')) return { kind: 'aria', label: sel.slice(5), sel };
-  if (sel.startsWith('text/')) return { kind: 'text', label: sel.slice(5), sel };
-  if (sel.startsWith('xpath/')) return { kind: 'xpath', value: sel.slice(6), sel: sel.slice(6) };
-  if (sel.startsWith('pierce/')) return { kind: 'css', value: sel.slice(7), sel: sel.slice(7) };
-  return { kind: 'css', value: sel, sel };
-}
-function candidates(step) {
-  const seen = new Set(), out = [];
-  for (const g of step.selectors || []) for (const s of g) {
-    if (typeof s === 'string' && !seen.has(s)) { seen.add(s); out.push(classify(s)); }
-  }
-  return out;
-}
-// `get visible` resolves css/xpath/ref and returns {visible:true|false}; a
-// no-match falls out as ok:false. This is our trustworthy existence probe.
-const resolves = (sel) => { const r = browse(['get', 'visible', sel]); return r.ok && r.json?.visible === true; };
-
-// Match a human label to a live snapshot ref. --compact yields lines like
-// "  [0-41] combobox: Where to?"; we score interactive nodes by name overlap.
-const INTERACTIVE = ['option', 'button', 'link', 'menuitem', 'menuitemradio', 'tab', 'combobox', 'listitem', 'checkbox', 'radio', 'cell', 'gridcell'];
-function snapshotRef(label) {
-  if (!label) return null;
-  const want = label.toLowerCase().trim();
-  const tokens = want.split(/\s+/).filter((t) => t.length > 1);
-  for (const filter of [label, tokens[0]].filter(Boolean)) {
-    const r = browse(['snapshot', '--compact', '--filter', filter]);
-    const tree = r.json?.tree;
-    if (!tree) continue;
-    let best = null, bestScore = 0;
-    for (const line of tree.split('\n')) {
-      const m = line.match(/\[(\d+-\d+)\]\s+([^:\n]+?)(?::\s*(.*))?\s*$/);
-      if (!m) continue;
-      const [, ref, roleRaw, nameRaw] = m;
-      const role = roleRaw.trim().toLowerCase(), name = (nameRaw || '').toLowerCase().trim();
-      if (!name) continue;
-      let score = 0;
-      if (name === want) score = 100;
-      else if (name.startsWith(want)) score = 80;
-      else if (name.includes(want)) score = 60;
-      else if (tokens.some((t) => name.includes(t))) score = 40;
-      if (!score) continue;
-      if (INTERACTIVE.includes(role)) score += 10;
-      if (score > bestScore) { bestScore = score; best = `@${ref}`; }
-    }
-    if (best) return best;
-  }
-  return null;
-}
-
-// --- per-step execution -----------------------------------------------------
-const act = (type, sel, value) =>
-  type === 'change' ? browse(['fill', sel, value ?? '']) : browse(['click', sel]);
-
-function doStep(step, ctx) {
-  const cands = candidates(step);
-  const arias = cands.filter((c) => c.kind === 'aria' || c.kind === 'text');
-  const xpaths = cands.filter((c) => c.kind === 'xpath');
-  const csses = cands.filter((c) => c.kind === 'css');
-
-  // 1. semantic: recorded aria/text label -> live snapshot ref
-  for (const c of arias) {
-    const ref = snapshotRef(c.label);
-    if (ref) { act(step.type, ref, step.value); return { status: 'ok', via: 'semantic', selector: c.sel, ref }; }
-  }
-  // 2. recorded selectors, verified before acting (xpath is anchored; css last)
-  for (const c of [...xpaths, ...csses]) {
-    if (resolves(c.sel)) { act(step.type, c.sel, step.value); return { status: 'ok', via: c.kind, selector: c.sel }; }
-  }
-  if (!HEAL) return { status: 'failed', reason: 'no selector matched (try RR_HEAL=1)' };
-
-  // 3. heal: match this step's value, or the value typed just before (rescues
-  //    autocomplete suggestion clicks whose only recorded selector was a
-  //    dynamic #id) to a snapshot ref.
-  for (const label of [step.value, ctx.lastValue].filter(Boolean)) {
-    const ref = snapshotRef(label);
-    if (ref) { act(step.type, ref, step.value); return { status: 'healed', via: 'value-snapshot', label, ref }; }
-  }
-  // 4. coords: a recorded selector that exists but wasn't "visible" — click its box center
-  for (const c of [...xpaths, ...csses]) {
-    const r = browse(['get', 'box', c.sel]);
-    const box = r.json;
-    if (box && typeof box.x === 'number') {
-      browse(['mouse', 'click', String(Math.round(box.x)), String(Math.round(box.y))]);
-      return { status: 'healed', via: 'coords', selector: c.sel };
-    }
-  }
-  return { status: 'failed', reason: 'unresolvable after heal' };
-}
-
-// --- run --------------------------------------------------------------------
-const sleep = (ms) => Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
-const results = [];
-let i = 0, ctx = { lastValue: null };
-
-for (const step of rec.steps) {
-  i++;
-  try {
-    if (step.type === 'navigate') {
-      const r = browse(['open', step.url]);
-      results.push({ step: i, type: 'navigate', status: r.ok ? 'ok' : 'error', url: step.url });
-    } else if (step.type === 'scroll') {
-      browse(['eval', `window.scrollBy(${step.x || 0}, ${step.y || 0}); 'ok'`]);
-      results.push({ step: i, type: 'scroll', status: 'ok' });
-    } else if (step.type === 'keyDown' || step.type === 'keyUp') {
-      if (step.type === 'keyDown') browse(['key', step.key]);
-      results.push({ step: i, type: step.type, status: 'ok', key: step.key });
-    } else if (step.type === 'click' || step.type === 'change') {
-      results.push({ step: i, type: step.type, ...doStep(step, ctx) });
-      if (step.type === 'change' && step.value) ctx.lastValue = step.value;
-    } else {
-      results.push({ step: i, type: step.type, status: 'skipped' });
-    }
-  } catch (e) {
-    results.push({ step: i, type: step.type, status: 'error', error: String(e).slice(0, 200) });
-  }
-  // autocomplete/menus need a beat to render before the next step snapshots
-  sleep(step.type === 'change' ? 900 : 400);
-  browse(['screenshot', '-p', `${SHOTS}/step-${String(i).padStart(2, '0')}-${step.type}.png`]);
-}
-
-// best-effort live-view link (newest running session is the one we just drove)
-let liveView = null;
-try {
-  const ls = browse(['cloud', 'sessions', 'list', '--status', 'RUNNING', '--json'], { json: false });
-  const id = JSON.parse(ls.raw.slice(ls.raw.indexOf('['), ls.raw.lastIndexOf(']') + 1))[0]?.id;
-  if (id) {
-    const dbg = browse(['cloud', 'sessions', 'debug', id]);
-    liveView = dbg.json?.debuggerFullscreenUrl || null;
-  }
-} catch { /* live view is optional */ }
-
-try { browse(['stop'], { json: false }); } catch { /* daemon may already be gone */ }
-
-const ok = results.filter((r) => r.status === 'ok' || r.status === 'healed').length;
-console.log(JSON.stringify({ file: FILE, session: SESSION, liveView, screenshots: SHOTS, results }, null, 2));
-console.log(`\nReplay: ${ok}/${results.length} steps succeeded. Screenshots -> ${SHOTS}`);