diff --git a/.github/workflows/read-perf.yml b/.github/workflows/read-perf.yml new file mode 100644 index 00000000000000..b28011efce8bd7 --- /dev/null +++ b/.github/workflows/read-perf.yml @@ -0,0 +1,77 @@ +name: Read perf + +on: + pull_request: + paths: + - 'src/**' + - 'examples/jsm/**' + - 'package.json' + - 'utils/build/**' + - 'test/e2e/perf-**' + +# Read-only: safely operates on untrusted PR code. +# The matching report-perf.yml runs with pull-requests:write and posts the comment. +permissions: + contents: read + +jobs: + read-perf: + name: Perf regression + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - name: Git checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + fetch-depth: 0 # needed for `git worktree add ` + - name: Install Node + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6 + with: + node-version: 24 + cache: 'npm' + - name: Install Vulkan drivers and xvfb + run: | + sudo apt-get update + sudo apt-get install -y mesa-vulkan-drivers xvfb + - name: Install dependencies + run: npm ci + + - name: === Perf regression === + id: perf + # Exit 2 = regression detected (we still want to post the comment). + # Exit 1 = crash. continue-on-error lets both cases proceed to the + # artifact upload step so the comment still lands on regressions; + # the overall workflow status still reflects the failure, so GitHub + # shows a red check on regressions or crashes. + continue-on-error: true + run: | + xvfb-run -a node test/e2e/perf-regression-orchestrator.js \ + webgpu_backdrop_water \ + --baseline=${{ github.event.pull_request.base.sha }} + + - name: Attach PR number to summary + # Always run. If the orchestrator crashed we won't have a summary — + # skip gracefully; the crash is already visible in the perf step log. + if: always() + run: | + SUMMARY=test/e2e/perf-regression-webgpu_backdrop_water.summary.json + if [ ! -f "$SUMMARY" ]; then + echo "::warning::No summary JSON found — orchestrator likely crashed. See the 'Perf regression' step log." + exit 0 + fi + PR=${{ github.event.pull_request.number }} + node -e " + const fs=require('fs'); + const p='$SUMMARY'; + const s=JSON.parse(fs.readFileSync(p,'utf8')); + s.pr=$PR; + fs.writeFileSync(p, JSON.stringify(s)); + " + + - name: Upload artifact + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + with: + name: perf-summary + path: test/e2e/perf-regression-*.summary.json + if-no-files-found: ignore diff --git a/.github/workflows/report-perf.yml b/.github/workflows/report-perf.yml new file mode 100644 index 00000000000000..ec98bf297f3c60 --- /dev/null +++ b/.github/workflows/report-perf.yml @@ -0,0 +1,92 @@ +name: Report perf + +on: + workflow_run: + workflows: ["Read perf"] + types: + - completed + +# This workflow needs "pull-requests: write" to comment on the PR. It does NOT +# check out PR code — it only reads the artifact produced by Read perf. +# Reference: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ +permissions: + pull-requests: write + +jobs: + report-perf: + name: Comment on PR + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + steps: + # Using actions/download-artifact doesn't work across workflow_run + # https://github.com/actions/download-artifact/issues/60 + - name: Download artifact + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + id: download + with: + result-encoding: string + script: | + const fs = require('fs/promises'); + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id, + }); + const match = artifacts.data.artifacts.find( a => a.name === 'perf-summary' ); + if (!match) throw new Error('perf-summary artifact not found'); + const download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: match.id, + archive_format: 'zip', + }); + await fs.writeFile('perf-summary.zip', Buffer.from(download.data)); + await exec.exec('unzip -o perf-summary.zip -d perf-summary'); + const files = await fs.readdir('perf-summary'); + const jsonFile = files.find( f => f.endsWith( '.summary.json' ) ); + if (!jsonFile) throw new Error('summary.json not found in artifact'); + const json = await fs.readFile('perf-summary/' + jsonFile, 'utf8'); + return JSON.stringify({ jsonFile, json }); + + # Need the formatter script from the base branch. We explicitly check out + # the base ref (never the PR head) so malicious PRs can't substitute + # their own formatter. + - name: Git checkout (base ref) + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + ref: ${{ github.event.workflow_run.pull_requests[0].base.ref || 'dev' }} + + - name: Format comment + id: format + run: | + mkdir -p .perf + node -e " + const fs=require('fs'); + const { jsonFile, json } = ${{ steps.download.outputs.result }}; + fs.writeFileSync('.perf/summary.json', json); + console.log('PR=' + JSON.parse(json).pr); + " | tee -a $GITHUB_OUTPUT + node test/e2e/perf-format-comment.js .perf/summary.json > .perf/comment.md + # Expose body via an env var for the create-or-update-comment step. + { + echo 'BODY<> $GITHUB_ENV + + - name: Find existing comment + uses: peter-evans/find-comment@b30e6a3c0ed37e7c023ccd3f1db5c6c0b0c23aad # v4 + id: find-comment + with: + issue-number: ${{ steps.format.outputs.PR }} + comment-author: 'github-actions[bot]' + body-includes: 'Perf regression' + + - name: Comment on PR + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + issue-number: ${{ steps.format.outputs.PR }} + comment-id: ${{ steps.find-comment.outputs.comment-id }} + edit-mode: replace + body: ${{ env.BODY }} diff --git a/test/e2e/perf-collector.js b/test/e2e/perf-collector.js new file mode 100644 index 00000000000000..541467eea47958 --- /dev/null +++ b/test/e2e/perf-collector.js @@ -0,0 +1,449 @@ +// Shared perf-collection primitives used by perf-regression.js (single-run) +// and perf-regression-orchestrator.js (multi-iteration, persistent browser). +// +// Exports: +// injectedClientJS — function serialized into evaluateOnNewDocument +// attachPerfInjection(page) +// collectIteration(page, { url, warmup, duration, heapSampleMs }) +// buildSummary(raw, before, after, meta) + +export async function attachPerfInjection( page ) { + + await page.evaluateOnNewDocument( `(${ injectedClientJS.toString() })();` ); + +} + +export async function collectIteration( page, { url, warmup, duration, heapSampleMs = 50, onLog = () => {} } ) { + + onLog( `loading ${ url }` ); + // `load` fires once the document + all sub-resources have finished loading + // (or errored out). Safer than `networkidle2` for WebGPU examples that + // keep incidental connections alive. + await page.goto( url, { waitUntil: 'load', timeout: 60_000 } ); + + onLog( `warmup ${ warmup }ms` ); + await new Promise( r => setTimeout( r, warmup ) ); + + onLog( `sampling ${ duration }ms` ); + await page.evaluate( ( ms ) => window.__perfStart( ms ), heapSampleMs ); + + const before = await page.evaluate( () => window.__wgpuSnapshot() ); + await new Promise( r => setTimeout( r, duration ) ); + const after = await page.evaluate( () => window.__wgpuSnapshot() ); + const raw = await page.evaluate( () => window.__perfStop() ); + + // Sanity check: if zero frames were recorded the renderer never started. + // Common causes: WebGPU adapter request failed (Linux headless GPU path), + // a page error before any rAF tick, or a misconfigured example. Fail loudly + // here rather than silently producing a summary of zeros. + if ( raw.frames.length === 0 ) { + + throw new Error( `no frames recorded during ${ duration }ms sample window — the renderer did not start. Check console errors above (WebGPU adapter null? WebGL context failed?).` ); + + } + + return { raw, before, after }; + +} + +export function buildSummary( raw, before, after, meta ) { + + const { label, example, durationMs, warmupMs, gcDropThresholdPct = 3 } = meta; + + const pct = ( arr, p ) => arr.length ? arr[ Math.min( arr.length - 1, Math.floor( arr.length * p / 100 ) ) ] : 0; + const mean = arr => arr.length ? arr.reduce( ( s, v ) => s + v, 0 ) / arr.length : 0; + + const frames = raw.frames.slice().sort( ( a, b ) => a - b ); + const used = raw.heap.map( h => h.used ); + + const gcEvents = []; + for ( let i = 1; i < raw.heap.length; i ++ ) { + + const drop = raw.heap[ i - 1 ].used - raw.heap[ i ].used; + if ( drop > raw.heap[ i - 1 ].used * ( gcDropThresholdPct / 100 ) ) gcEvents.push( { t: raw.heap[ i ].t, drop } ); + + } + + const cmdSubmits = after.cmdSubmits - before.cmdSubmits; + const renderPasses = after.renderPasses - before.renderPasses; + const computePasses = after.computePasses - before.computePasses; + const framesInWindow = raw.frames.length; + + return { + label, + example, + durationMs, + warmupMs, + frames: framesInWindow, + fps: framesInWindow ? 1000 / mean( raw.frames ) : 0, + frameTimeMs: { + mean: mean( frames ), + p50: pct( frames, 50 ), + p95: pct( frames, 95 ), + p99: pct( frames, 99 ), + max: frames[ frames.length - 1 ] || 0 + }, + jsHeapBytes: { + samples: used.length, + min: used.length ? Math.min( ...used ) : 0, + max: used.length ? Math.max( ...used ) : 0, + mean: mean( used ), + start: used[ 0 ] || 0, + end: used[ used.length - 1 ] || 0, + growth: ( used[ used.length - 1 ] || 0 ) - ( used[ 0 ] || 0 ) + }, + gc: { + events: gcEvents.length, + totalFreedBytes: gcEvents.reduce( ( s, e ) => s + e.drop, 0 ), + eventsPerSec: gcEvents.length / ( durationMs / 1000 ) + }, + webgpu: { + liveBuffersBefore: before.liveBuffers, + liveBuffersAfter: after.liveBuffers, + liveBuffersDelta: after.liveBuffers - before.liveBuffers, + liveTexturesBefore: before.liveTextures, + liveTexturesAfter: after.liveTextures, + liveTexturesDelta: after.liveTextures - before.liveTextures, + estimatedVRAMBefore: before.estimatedVRAMBytes, + estimatedVRAMAfter: after.estimatedVRAMBytes, + estimatedVRAMDelta: after.estimatedVRAMBytes - before.estimatedVRAMBytes, + shaderModules: after.shaderModules, + renderPipelines: after.renderPipelines, + computePipelines: after.computePipelines, + bindGroupsTotalBefore: before.bindGroupsTotal, + bindGroupsTotalAfter: after.bindGroupsTotal, + bindGroupsTotalDelta: after.bindGroupsTotal - before.bindGroupsTotal, + samplersBefore: before.samplers, + samplersAfter: after.samplers, + samplersDelta: after.samplers - before.samplers, + cmdSubmits, + renderPasses, + computePasses, + cmdSubmitsPerFrame: framesInWindow ? cmdSubmits / framesInWindow : 0, + renderPassesPerFrame: framesInWindow ? renderPasses / framesInWindow : 0, + computePassesPerFrame: framesInWindow ? computePasses / framesInWindow : 0, + errors: after.errors - before.errors + } + }; + +} + +// --------------------------------------------------------------------------- +// Client-side injection. Installed via evaluateOnNewDocument so it wraps +// GPUDevice on first acquire. Safe to install once per browser context. +// --------------------------------------------------------------------------- + +export function injectedClientJS() { + + if ( window.__perfInstalled ) return; + window.__perfInstalled = true; + + const stats = { + liveBuffers: 0, + liveTextures: 0, + estimatedVRAMBytes: 0, + shaderModules: 0, + renderPipelines: 0, + computePipelines: 0, + bindGroupsTotal: 0, + samplers: 0, + cmdSubmits: 0, + renderPasses: 0, + computePasses: 0, + errors: 0 + }; + + window.__wgpuSnapshot = () => ( { ...stats } ); + + function formatBytesPerPixel( fmt ) { + + if ( ! fmt ) return 4; + + if ( fmt.endsWith( '32float' ) || fmt.endsWith( '32uint' ) || fmt.endsWith( '32sint' ) ) { + + const ch = fmt.startsWith( 'rgba' ) ? 4 : fmt.startsWith( 'rg' ) ? 2 : 1; + return ch * 4; + + } + + if ( fmt.endsWith( '16float' ) || fmt.endsWith( '16uint' ) || fmt.endsWith( '16sint' ) || fmt.endsWith( '16unorm' ) || fmt.endsWith( '16snorm' ) ) { + + const ch = fmt.startsWith( 'rgba' ) ? 4 : fmt.startsWith( 'rg' ) ? 2 : 1; + return ch * 2; + + } + + if ( fmt.indexOf( 'depth24plus-stencil8' ) >= 0 ) return 4; + if ( fmt.indexOf( 'depth32float-stencil8' ) >= 0 ) return 5; + if ( fmt.indexOf( 'depth24plus' ) >= 0 ) return 4; + if ( fmt.indexOf( 'depth16unorm' ) >= 0 ) return 2; + if ( fmt.indexOf( 'depth32float' ) >= 0 ) return 4; + if ( fmt.indexOf( 'stencil8' ) >= 0 ) return 1; + + if ( fmt.startsWith( 'bc' ) || fmt.startsWith( 'etc' ) || fmt.startsWith( 'astc' ) ) return 1; + + if ( fmt.startsWith( 'rgba' ) || fmt.startsWith( 'bgra' ) ) return 4; + if ( fmt.startsWith( 'rg' ) ) return 2; + return 1; + + } + + function estimateTextureBytes( desc ) { + + const w = ( desc.size && ( desc.size.width ?? desc.size[ 0 ] ) ) || 0; + const h = ( desc.size && ( desc.size.height ?? desc.size[ 1 ] ) ) || 1; + const d = ( desc.size && ( desc.size.depthOrArrayLayers ?? desc.size[ 2 ] ) ) || 1; + const bpp = formatBytesPerPixel( desc.format || 'rgba8unorm' ); + const mips = desc.mipLevelCount || 1; + const samples = desc.sampleCount || 1; + let bytes = 0; + for ( let m = 0; m < mips; m ++ ) { + + bytes += Math.max( 1, w >> m ) * Math.max( 1, h >> m ) * d * bpp; + + } + + return bytes * samples; + + } + + function wrapDevice( device ) { + + if ( device.__perfWrapped ) return; + device.__perfWrapped = true; + + const origCreateBuffer = device.createBuffer.bind( device ); + device.createBuffer = function ( desc ) { + + const size = desc.size || 0; + const buf = origCreateBuffer( desc ); + stats.liveBuffers ++; + stats.estimatedVRAMBytes += size; + const origDestroy = buf.destroy.bind( buf ); + let destroyed = false; + buf.destroy = function () { + + if ( ! destroyed ) { + + destroyed = true; + stats.liveBuffers = Math.max( 0, stats.liveBuffers - 1 ); + stats.estimatedVRAMBytes = Math.max( 0, stats.estimatedVRAMBytes - size ); + + } + + return origDestroy(); + + }; + + return buf; + + }; + + const origCreateTexture = device.createTexture.bind( device ); + device.createTexture = function ( desc ) { + + const bytes = estimateTextureBytes( desc ); + const tex = origCreateTexture( desc ); + stats.liveTextures ++; + stats.estimatedVRAMBytes += bytes; + const origDestroy = tex.destroy.bind( tex ); + let destroyed = false; + tex.destroy = function () { + + if ( ! destroyed ) { + + destroyed = true; + stats.liveTextures = Math.max( 0, stats.liveTextures - 1 ); + stats.estimatedVRAMBytes = Math.max( 0, stats.estimatedVRAMBytes - bytes ); + + } + + return origDestroy(); + + }; + + return tex; + + }; + + const origCreateShaderModule = device.createShaderModule.bind( device ); + device.createShaderModule = function ( desc ) { + + stats.shaderModules ++; + return origCreateShaderModule( desc ); + + }; + + [ 'createRenderPipeline', 'createRenderPipelineAsync' ].forEach( name => { + + if ( ! device[ name ] ) return; + const orig = device[ name ].bind( device ); + device[ name ] = function ( desc ) { + + stats.renderPipelines ++; + return orig( desc ); + + }; + + } ); + + [ 'createComputePipeline', 'createComputePipelineAsync' ].forEach( name => { + + if ( ! device[ name ] ) return; + const orig = device[ name ].bind( device ); + device[ name ] = function ( desc ) { + + stats.computePipelines ++; + return orig( desc ); + + }; + + } ); + + const origCreateBindGroup = device.createBindGroup.bind( device ); + device.createBindGroup = function ( desc ) { + + stats.bindGroupsTotal ++; + return origCreateBindGroup( desc ); + + }; + + if ( device.createSampler ) { + + const origCreateSampler = device.createSampler.bind( device ); + device.createSampler = function ( desc ) { + + stats.samplers ++; + return origCreateSampler( desc ); + + }; + + } + + const origCreateCE = device.createCommandEncoder.bind( device ); + device.createCommandEncoder = function ( desc ) { + + const enc = origCreateCE( desc ); + const origBeginRP = enc.beginRenderPass.bind( enc ); + enc.beginRenderPass = function ( rpDesc ) { + + stats.renderPasses ++; + return origBeginRP( rpDesc ); + + }; + + const origBeginCP = enc.beginComputePass.bind( enc ); + enc.beginComputePass = function ( cpDesc ) { + + stats.computePasses ++; + return origBeginCP( cpDesc ); + + }; + + return enc; + + }; + + if ( device.queue ) { + + const origSubmit = device.queue.submit.bind( device.queue ); + device.queue.submit = function ( buffers ) { + + stats.cmdSubmits ++; + return origSubmit( buffers ); + + }; + + } + + try { + + device.addEventListener( 'uncapturederror', () => { + + stats.errors ++; + + } ); + + } catch ( _ ) {} + + } + + if ( navigator.gpu && navigator.gpu.__proto__ ) { + + const origRequestAdapter = navigator.gpu.requestAdapter.bind( navigator.gpu ); + navigator.gpu.requestAdapter = async function ( ...args ) { + + const adapter = await origRequestAdapter( ...args ); + if ( adapter && ! adapter.__perfWrapped ) { + + adapter.__perfWrapped = true; + const origRequestDevice = adapter.requestDevice.bind( adapter ); + adapter.requestDevice = async function ( ...devArgs ) { + + const device = await origRequestDevice( ...devArgs ); + wrapDevice( device ); + return device; + + }; + + } + + return adapter; + + }; + + } + + // --- JS heap + frame-time sampler --- + + window.__perf = { frames: [], heap: [], startT: null, lastRAF: null, heapTimerId: null }; + + const origRAF = window.requestAnimationFrame.bind( window ); + window.requestAnimationFrame = ( cb ) => origRAF( ( t ) => { + + if ( window.__perf.startT !== null ) { + + const now = performance.now(); + if ( window.__perf.lastRAF !== null ) window.__perf.frames.push( now - window.__perf.lastRAF ); + window.__perf.lastRAF = now; + + } + + cb( t ); + + } ); + + window.__perfStart = ( heapSampleMs ) => { + + window.__perf.startT = performance.now(); + window.__perf.lastRAF = null; + window.__perf.frames.length = 0; + window.__perf.heap.length = 0; + window.__perf.heapTimerId = setInterval( () => { + + if ( performance.memory ) { + + window.__perf.heap.push( { + t: performance.now(), + used: performance.memory.usedJSHeapSize, + total: performance.memory.totalJSHeapSize + } ); + + } + + }, heapSampleMs || 50 ); + + }; + + window.__perfStop = () => { + + clearInterval( window.__perf.heapTimerId ); + window.__perf.startT = null; + return { + frames: window.__perf.frames.slice(), + heap: window.__perf.heap.slice() + }; + + }; + +} diff --git a/test/e2e/perf-format-comment.js b/test/e2e/perf-format-comment.js new file mode 100644 index 00000000000000..06f231c915271e --- /dev/null +++ b/test/e2e/perf-format-comment.js @@ -0,0 +1,93 @@ +// Format a perf-regression summary JSON into a GitHub-flavored markdown +// comment matching the style of the Bundle size / Tree-shaking reports. +// +// Usage: +// node test/e2e/perf-format-comment.js > comment.md + +import * as fs from 'node:fs/promises'; + +const [ , , jsonPath ] = process.argv; + +if ( ! jsonPath ) { + + console.error( 'usage: node test/e2e/perf-format-comment.js ' ); + process.exit( 1 ); + +} + +const s = JSON.parse( await fs.readFile( jsonPath, 'utf8' ) ); + +const mb = v => ( v / 1024 / 1024 ).toFixed( 2 ) + ' MB'; +const ms = v => v.toFixed( 2 ) + ' ms'; +const num = v => Number.isInteger( v ) ? String( v ) : v.toFixed( 2 ); + +function fmtPct( pct ) { + + if ( ! Number.isFinite( pct ) ) return 'new'; + const rounded = pct.toFixed( 1 ); + // Anything that rounds to exactly zero is shown as a dot — "+0.0%" + // adds visual noise for the common case of an identical metric. + if ( rounded === '0.0' || rounded === '-0.0' ) return '·'; + const sign = pct >= 0 ? '+' : ''; + return `${ sign }${ rounded }%`; + +} + +function verdictBadge( v ) { + + if ( v === 'REGRESS' ) return '🔴 **regress**'; + if ( v === 'improve' ) return '🟢 improve'; + return 'stable'; + +} + +// Whitelist + display order. Only these rows appear in the PR comment; +// the JSON artifact still contains the full metric set for anyone digging in. +const DISPLAY = [ + { name: 'fps', label: 'FPS (uncapped)', fmt: num }, + { name: 'frameTimeMs.p50', label: 'Frame time (median)', fmt: ms }, + { name: 'jsHeapBytes.mean', label: 'JS heap (mean)', fmt: mb }, + { name: 'webgpu.estimatedVRAMAfter', label: 'WebGPU VRAM', fmt: mb }, + { name: 'webgpu.cmdSubmitsPerFrame', label: 'Submits/frame', fmt: num } +]; + +const rowsByName = new Map( s.rows.map( r => [ r.name, r ] ) ); + +// Regression state only reflects gated metrics (row.isRegression is already +// false for non-gated rows by construction in the orchestrator). +const regressions = s.rows.filter( r => r.isRegression ); +const heading = regressions.length + ? `### 🔴 Perf regression (${ s.example })` + : `### 🟢 Perf regression (${ s.example })`; + +let out = ''; +out += `${ heading }\n\n`; +out += `_Median across ${ s.iterationsMeasured } measured iteration${ s.iterationsMeasured === 1 ? '' : 's' }`; +out += ` (1 warmup dropped${ s.stoppedEarly ? ', early-stopped' : '' }), `; +out += `gated at k=${ s.k }·MAD. Lavapipe (software WebGPU), vsync disabled — FPS reflects renderer throughput, not display refresh._\n\n`; + +out += `| Metric | Baseline | Candidate | Δ | Verdict |\n`; +out += `|:--|:-:|:-:|:-:|:-:|\n`; + +for ( const item of DISPLAY ) { + + const row = rowsByName.get( item.name ); + if ( ! row ) continue; + const label = row.gate === false ? `${ item.label } (info)` : item.label; + const baseStr = item.fmt( row.b.median ); + const candStr = item.fmt( row.c.median ); + const pctStr = fmtPct( row.pct ); + // Non-gated rows never show the red regress badge — only neutral indicators. + const badge = row.gate === false + ? ( row.verdict === 'improve' ? '🟢 improve' : 'info' ) + : verdictBadge( row.verdict ); + out += `| ${ label } | ${ baseStr } | ${ candStr } | ${ pctStr } | ${ badge } |\n`; + +} + +out += `\n`; +out += `Baseline: \`${ s.baselineRef }\` @ \`${ s.baselineSha.slice( 0, 8 ) }\``; +out += ` · Candidate: \`${ s.candidateSha.slice( 0, 8 ) }\`${ s.candidateDirty ? ' (uncommitted)' : '' }`; +out += ` · Duration: ${ s.duration }ms · Warmup: ${ s.warmup }ms\n`; + +process.stdout.write( out ); diff --git a/test/e2e/perf-regression-compare.js b/test/e2e/perf-regression-compare.js new file mode 100644 index 00000000000000..dcc845fdc51f94 --- /dev/null +++ b/test/e2e/perf-regression-compare.js @@ -0,0 +1,127 @@ +// Compare two perf-regression JSON runs and print a clean A/B table. +// +// Usage: +// node test/e2e/perf-regression-compare.js +// +// Reads: +// test/e2e/perf-regression--.json +// test/e2e/perf-regression--.json + +import * as fs from 'node:fs/promises'; + +const [ , , baselineLabel, candidateLabel, example ] = process.argv; + +if ( ! baselineLabel || ! candidateLabel || ! example ) { + + console.error( 'usage: node test/e2e/perf-regression-compare.js ' ); + process.exit( 1 ); + +} + +const baseline = JSON.parse( await fs.readFile( `test/e2e/perf-regression-${ baselineLabel }-${ example }.json`, 'utf8' ) ); +const candidate = JSON.parse( await fs.readFile( `test/e2e/perf-regression-${ candidateLabel }-${ example }.json`, 'utf8' ) ); + +const mb = v => v / 1024 / 1024; +const pad = ( s, n ) => String( s ).padStart( n ); + +function fmtPct( base, cand ) { + + if ( base === cand ) return '·'; + if ( base === 0 ) return 'new'; + return ( ( cand - base ) / Math.abs( base ) * 100 ).toFixed( 1 ) + '%'; + +} + +function fmt( v, kind ) { + + if ( typeof v !== 'number' || ! Number.isFinite( v ) ) return String( v ); + if ( kind === 'bytes-mb' ) return mb( v ).toFixed( 2 ); + if ( kind === 'ms' ) return v.toFixed( 2 ); + if ( kind === 'int' ) return String( Math.round( v ) ); + return v.toFixed( 2 ); + +} + +function arrow( base, cand ) { + + if ( base === cand ) return '·'; + return cand < base ? '▼' : '▲'; + +} + +function row( name, path, kind = 'num' ) { + + const b = path.split( '.' ).reduce( ( a, k ) => a && a[ k ], baseline ); + const c = path.split( '.' ).reduce( ( a, k ) => a && a[ k ], candidate ); + const delta = c - b; + const deltaStr = ( delta >= 0 ? '+' : '' ) + fmt( delta, kind ); + console.log( + pad( name, 36 ), ' | ', + pad( fmt( b, kind ), 12 ), ' | ', + pad( fmt( c, kind ), 12 ), ' | ', + pad( deltaStr + ' ' + arrow( b, c ), 14 ), ' | ', + pad( fmtPct( b, c ), 8 ) + ); + +} + +function section( title ) { + + console.log( '\n-- ' + title + ' --' ); + +} + +console.log( `\nperf-regression: ${ example }` ); +console.log( ` baseline: ${ baselineLabel }` ); +console.log( ` candidate: ${ candidateLabel }` ); +console.log( ` duration: ${ baseline.durationMs } ms (baseline), ${ candidate.durationMs } ms (candidate)\n` ); + +console.log( pad( 'metric', 36 ), ' | ', pad( baselineLabel, 12 ), ' | ', pad( candidateLabel, 12 ), ' | ', pad( 'Δ', 14 ), ' | ', pad( 'Δ %', 8 ) ); +console.log( '-'.repeat( 36 ), '-+-', '-'.repeat( 12 ), '-+-', '-'.repeat( 12 ), '-+-', '-'.repeat( 14 ), '-+-', '-'.repeat( 8 ) ); + +section( 'Frame timing' ); +row( 'fps (rAF)', 'fps' ); +row( 'mean (ms)', 'frameTimeMs.mean', 'ms' ); +row( 'p50 (ms)', 'frameTimeMs.p50', 'ms' ); +row( 'p95 (ms)', 'frameTimeMs.p95', 'ms' ); +row( 'p99 (ms)', 'frameTimeMs.p99', 'ms' ); +row( 'max (ms)', 'frameTimeMs.max', 'ms' ); + +section( 'JS heap' ); +row( 'min (MB)', 'jsHeapBytes.min', 'bytes-mb' ); +row( 'mean (MB)', 'jsHeapBytes.mean', 'bytes-mb' ); +row( 'max (MB)', 'jsHeapBytes.max', 'bytes-mb' ); +row( 'growth (MB)', 'jsHeapBytes.growth', 'bytes-mb' ); + +section( 'GC' ); +row( 'events', 'gc.events', 'int' ); +row( 'events/s', 'gc.eventsPerSec' ); +row( 'bytes freed (MB)', 'gc.totalFreedBytes', 'bytes-mb' ); + +section( 'WebGPU VRAM (estimated)' ); +row( 'buffers+textures before (MB)', 'webgpu.estimatedVRAMBefore', 'bytes-mb' ); +row( 'buffers+textures after (MB)', 'webgpu.estimatedVRAMAfter', 'bytes-mb' ); +row( 'Δ over window (MB)', 'webgpu.estimatedVRAMDelta', 'bytes-mb' ); + +section( 'WebGPU resources (delta over window)' ); +row( 'live buffers Δ', 'webgpu.liveBuffersDelta', 'int' ); +row( 'live textures Δ', 'webgpu.liveTexturesDelta', 'int' ); +row( 'bind groups (cumulative) Δ', 'webgpu.bindGroupsTotalDelta', 'int' ); +row( 'samplers Δ', 'webgpu.samplersDelta', 'int' ); + +section( 'WebGPU resources (totals)' ); +row( 'shader modules', 'webgpu.shaderModules', 'int' ); +row( 'render pipelines', 'webgpu.renderPipelines', 'int' ); +row( 'compute pipelines', 'webgpu.computePipelines', 'int' ); +row( 'live buffers after', 'webgpu.liveBuffersAfter', 'int' ); +row( 'live textures after', 'webgpu.liveTexturesAfter', 'int' ); + +section( 'WebGPU command rate (per frame)' ); +row( 'submits/frame', 'webgpu.cmdSubmitsPerFrame' ); +row( 'render passes/frame', 'webgpu.renderPassesPerFrame' ); +row( 'compute passes/frame', 'webgpu.computePassesPerFrame' ); + +section( 'Errors' ); +row( 'uncaptured WebGPU errors', 'webgpu.errors', 'int' ); + +console.log(); diff --git a/test/e2e/perf-regression-orchestrator.js b/test/e2e/perf-regression-orchestrator.js new file mode 100644 index 00000000000000..34e9124e38ef1f --- /dev/null +++ b/test/e2e/perf-regression-orchestrator.js @@ -0,0 +1,598 @@ +// Perf regression orchestrator. +// +// Usage: +// node test/e2e/perf-regression-orchestrator.js \ +// [--baseline=dev] [--min-iterations=4] [--max-iterations=5] \ +// [--duration=8000] [--warmup=3000] [--port-base=1240] [--k=3] \ +// [--force-build] [--no-build] +// +// Example: +// node test/e2e/perf-regression-orchestrator.js _perf_backdrop_water_noinspector +// +// Candidate = the live working tree (whatever you've got checked out, including +// uncommitted changes). Baseline = a detached worktree pinned at --baseline. +// +// Speed-oriented design: +// - Parallel baseline-worktree setup + builds (runs concurrently with candidate build). +// - Build-SHA cache: reruns against the same baseline SHA skip `npm run build`. +// - Persistent browsers (one per side) + in-process servers (one per root). +// Only a new Page is created per iteration — no Chromium relaunch, no Node spawn. +// - Adaptive iteration count: fast-path exit at n=1/n=2, MAD gate at n>=3. +// - Iteration 1 per side is warmup (cold JIT / pipeline cache) and dropped. +// +// Stability knobs (all carried over from test/e2e/puppeteer.js): +// - Lavapipe software WebGPU via VK_DRIVER_FILES. +// - Shared CI flag set. +// - Deterministic Math.random seed (no RAF / now stub — we need real timing). +// - Interleaved iterations (base, cand, base, cand, …) to cancel drift. + +import puppeteer from 'puppeteer'; +import { spawn } from 'node:child_process'; +import * as fs from 'node:fs/promises'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { createServer } from '../../utils/server.js'; +import { attachPerfInjection, collectIteration, buildSummary } from './perf-collector.js'; + +// Surface any unhandled rejection with a clear stack and a distinctive prefix, +// so CI logs point at the real cause instead of just "Process completed with +// exit code 1" and a missing summary file downstream. +process.on( 'unhandledRejection', ( reason ) => { + + console.error( '\n[orch] FATAL unhandled rejection:' ); + console.error( reason && reason.stack ? reason.stack : reason ); + process.exit( 1 ); + +} ); +process.on( 'uncaughtException', ( err ) => { + + console.error( '\n[orch] FATAL uncaught exception:' ); + console.error( err.stack || err ); + process.exit( 1 ); + +} ); + +// --- args ------------------------------------------------------------------- + +const argv = process.argv.slice( 2 ); +const positional = argv.filter( a => ! a.startsWith( '--' ) ); +const flags = Object.fromEntries( argv.filter( a => a.startsWith( '--' ) ) + .map( a => a.replace( /^--/, '' ).split( '=' ) ) + .map( ( [ k, v ] ) => [ k, v === undefined ? true : v ] ) ); + +const baselineRef = flags.baseline || 'dev'; + +if ( positional.length === 0 ) { + + console.error( 'usage: node test/e2e/perf-regression-orchestrator.js [--baseline=dev] [--min-iterations=N] [--max-iterations=N] [--duration=ms] [--warmup=ms] [--k=3] [--force-build] [--no-build]' ); + process.exit( 1 ); + +} + +if ( positional.length > 1 ) { + + // The old CLI accepted ` `. Silently + // treating positional[0] as the example leads to "no such HTML file" later, + // after an expensive build. Fail early with a migration hint instead. + console.error( `ERROR: too many positional args: ${ JSON.stringify( positional ) }` ); + console.error( `The CLI now takes only . Baseline is set via --baseline= (default "dev").` ); + console.error( `Candidate is always the live working tree.` ); + console.error( `Example: node test/e2e/perf-regression-orchestrator.js ${ positional[ positional.length - 1 ] } --baseline=${ positional[ 0 ] }` ); + process.exit( 1 ); + +} + +const [ example ] = positional; + +// Verify the example HTML exists in the live tree before spending time on +// builds + iterations. (In CI the live tree is the PR branch; locally it's cwd.) +try { + + await fs.access( path.join( process.cwd(), 'examples', `${ example }.html` ) ); + +} catch ( _ ) { + + console.error( `ERROR: examples/${ example }.html does not exist.` ); + console.error( `Did you mean one of the webgpu_*.html files under examples/? e.g. 'webgpu_backdrop_water'.` ); + process.exit( 1 ); + +} + +const minIterations = parseInt( flags[ 'min-iterations' ], 10 ) || 4; // 1 warmup + 3 measured +const maxIterations = parseInt( flags[ 'max-iterations' ], 10 ) || 7; // 1 warmup + 6 measured +// Fast-exit percent thresholds when MAD is not yet meaningful (n < 3 measured). +// Set conservatively — frame-time percentiles can legitimately swing 40-50% +// run-to-run on identical code due to GC and scheduler stalls. These +// thresholds are "the code is clearly on fire" territory, not subtle regressions. +const fastExitPctAtN1 = parseFloat( flags[ 'fast-exit-pct1' ] ) || 75; +const fastExitPctAtN2 = parseFloat( flags[ 'fast-exit-pct2' ] ) || 30; +const duration = parseInt( flags.duration, 10 ) || 8_000; +const warmup = parseInt( flags.warmup, 10 ) || 3_000; +const portBase = parseInt( flags[ 'port-base' ], 10 ) || 1240; +const k = parseFloat( flags.k ) || 3; +const forceBuild = flags[ 'force-build' ] === true || flags[ 'force-build' ] === 'true'; +const noBuild = flags[ 'no-build' ] === true || flags[ 'no-build' ] === 'true'; + +const root = process.cwd(); +const wtRoot = path.join( root, '.perf-wt' ); +const wtBase = path.join( wtRoot, 'base' ); + +// --- shell helpers ---------------------------------------------------------- + +function sh( cmd, args, opts = {} ) { + + return new Promise( ( resolve, reject ) => { + + const p = spawn( cmd, args, { stdio: 'inherit', ...opts } ); + p.on( 'exit', code => code === 0 ? resolve() : reject( new Error( `${ cmd } ${ args.join( ' ' ) } exited ${ code }` ) ) ); + + } ); + +} + +function shCapture( cmd, args, opts = {} ) { + + return new Promise( ( resolve, reject ) => { + + const p = spawn( cmd, args, { stdio: [ 'ignore', 'pipe', 'inherit' ], ...opts } ); + let out = ''; + p.stdout.on( 'data', d => out += d ); + p.on( 'exit', code => code === 0 ? resolve( out.trim() ) : reject( new Error( `${ cmd } exited ${ code }` ) ) ); + + } ); + +} + +async function resolveRef( ref ) { + + try { + + return await shCapture( 'git', [ 'rev-parse', '--verify', `${ ref }^{commit}` ] ); + + } catch ( _ ) { + + throw new Error( `ref '${ ref }' does not resolve — try 'dev', 'HEAD', or a SHA.` ); + + } + +} + +async function dirExists( p ) { + + try { await fs.access( p ); return true; } catch ( _ ) { return false; } + +} + +// --- live-tree build (candidate) ------------------------------------------- + +async function ensureLiveBuild() { + + if ( noBuild ) { + + console.log( `[orch] --no-build set; using existing build/ in live tree` ); + return; + + } + + console.log( `[orch] building live tree (candidate)` ); + await sh( 'npm', [ 'run', 'build' ], { cwd: root } ); + +} + +// --- baseline worktree (no build — three.js commits build/ on the default branch) -- + +async function ensureBaselineWorktree( ref, dir ) { + + const target = await resolveRef( ref ); + + let reusable = false; + if ( await dirExists( dir ) ) { + + try { + + const head = await shCapture( 'git', [ '-C', dir, 'rev-parse', 'HEAD' ] ); + if ( head === target ) reusable = true; + + } catch ( _ ) {} + + if ( ! reusable ) { + + console.log( `[orch] baseline worktree ${ dir } is on a different SHA; replacing` ); + try { await sh( 'git', [ 'worktree', 'remove', '--force', dir ] ); } + catch ( _ ) { await fs.rm( dir, { recursive: true, force: true } ); } + + } + + } + + if ( ! reusable ) { + + await fs.mkdir( wtRoot, { recursive: true } ); + await sh( 'git', [ 'worktree', 'add', '--detach', dir, ref ] ); + + } + + // The baseline worktree uses the committed build/ artifacts from the ref. + // If they're missing (some refs drop build/ from the tree), fall back to + // rebuilding. Pass --force-build to rebuild anyway. + const committedBuildExists = await dirExists( path.join( dir, 'build/three.webgpu.js' ) ); + if ( forceBuild || ! committedBuildExists ) { + + console.log( `[orch] building baseline in ${ dir } (${ forceBuild ? '--force-build' : 'missing build/' })` ); + await sh( 'npm', [ 'run', 'build' ], { cwd: dir } ); + + } else { + + console.log( `[orch] baseline ${ ref } (${ target.slice( 0, 8 ) }): using committed build/` ); + + } + + return { dir, sha: target }; + +} + +// --- per-side runtime (server + browser, persistent) ----------------------- + +const CHROME_FLAGS = [ + '--hide-scrollbars', + '--enable-unsafe-webgpu', + '--enable-features=Vulkan', + '--disable-vulkan-surface', + '--ignore-gpu-blocklist', + '--disable-gpu-driver-bug-workarounds', + '--no-sandbox', + '--enable-precise-memory-info', + // Uncap frame rate so rAF reports the renderer's true throughput + // (e.g. "potential fps" ~300 on a fast scene) instead of being clamped + // to the compositor's 60Hz vsync. + '--disable-frame-rate-limit', + '--disable-gpu-vsync', + // Noise reduction: + '--disable-background-timer-throttling', + '--disable-renderer-backgrounding', + '--disable-backgrounding-occluded-windows', + '--disable-ipc-flooding-protection', + '--disable-features=CalculateNativeWinOcclusion,TranslateUI', + '--autoplay-policy=no-user-gesture-required' +]; + +async function startSide( { name, dir, port } ) { + + // In-process server scoped to the worktree root. Serves examples/.html + // and build/*.js out of the worktree's own checkout. + const server = createServer( { root: dir } ); + await new Promise( ( resolve, reject ) => { + + server.once( 'error', reject ); + server.listen( port, () => { server.off( 'error', reject ); resolve(); } ); + + } ); + + const browser = await puppeteer.launch( { + // On CI Linux, headless 'new' has a broken GPU path — WebGPU adapter + // returns null and WebGL context creation fails too. `false` makes + // Chromium attach to the xvfb virtual display (the workflow runs us + // under xvfb-run), which restores both backends. + // Locally (no CI env, no xvfb), keep true-headless for speed. + headless: ( 'CI' in process.env || process.env.VISIBLE ) ? false : 'new', + args: CHROME_FLAGS, + env: { + ...process.env, + VK_DRIVER_FILES: process.env.VK_DRIVER_FILES || '/usr/share/vulkan/icd.d/lvp_icd.x86_64.json' + }, + defaultViewport: { width: 1280, height: 720 }, + protocolTimeout: 0, + handleSIGINT: false, + userDataDir: path.join( wtRoot, `.profile-${ name }` ) + } ); + + return { name, dir, port, server, browser, url: `http://localhost:${ port }/examples/${ example }.html` }; + +} + +async function stopSide( side ) { + + try { await side.browser.close(); } catch ( _ ) {} + try { side.server.close(); } catch ( _ ) {} + +} + +async function runIteration( side, label, { warmupMs } ) { + + const page = await side.browser.newPage(); + page.on( 'pageerror', err => console.error( `[${ side.name } page error] ${ err.message }` ) ); + page.on( 'console', msg => { if ( msg.type() === 'error' ) console.error( `[${ side.name } console.error] ${ msg.text() }` ); } ); + page.on( 'response', resp => { + + const status = resp.status(); + if ( status >= 400 ) console.error( `[${ side.name } ${ status }] ${ resp.url() }` ); + + } ); + + await attachPerfInjection( page ); + + const { raw, before, after } = await collectIteration( page, { + url: side.url, + warmup: warmupMs, + duration, + onLog: m => console.log( `[${ label }] ${ m }` ) + } ); + + await page.close(); + + return buildSummary( raw, before, after, { + label, example, durationMs: duration, warmupMs + } ); + +} + +// --- stats ------------------------------------------------------------------ + +function median( arr ) { + + const s = arr.slice().sort( ( a, b ) => a - b ); + const n = s.length; + if ( n === 0 ) return 0; + return n % 2 ? s[ ( n - 1 ) / 2 ] : ( s[ n / 2 - 1 ] + s[ n / 2 ] ) / 2; + +} + +function mad( arr ) { + + const m = median( arr ); + return median( arr.map( v => Math.abs( v - m ) ) ); + +} + +function pickAt( obj, dotted ) { + + return dotted.split( '.' ).reduce( ( a, kk ) => a && a[ kk ], obj ); + +} + +// `gate: true` → contributes to the pass/fail exit code. Stable metrics only. +// `gate: false` → shown in the report but not gated. Noisy but informative. +// Heap growth and GC counters swing wildly based on when GC fires relative to +// the sample window — displayed for humans, never block a CI build. +const METRICS = [ + { name: 'fps', path: 'fps', dir: 'higher', gate: true }, + { name: 'frameTimeMs.p50', path: 'frameTimeMs.p50', dir: 'lower', gate: true }, + { name: 'frameTimeMs.p95', path: 'frameTimeMs.p95', dir: 'lower', gate: true }, + { name: 'frameTimeMs.p99', path: 'frameTimeMs.p99', dir: 'lower', gate: true }, + { name: 'jsHeapBytes.mean', path: 'jsHeapBytes.mean', dir: 'lower', gate: true }, + { name: 'jsHeapBytes.growth', path: 'jsHeapBytes.growth', dir: 'lower', gate: false }, + { name: 'gc.events', path: 'gc.events', dir: 'lower', gate: false }, + { name: 'gc.totalFreedBytes', path: 'gc.totalFreedBytes', dir: 'lower', gate: false }, + { name: 'webgpu.estimatedVRAMAfter', path: 'webgpu.estimatedVRAMAfter', dir: 'lower', gate: true }, + { name: 'webgpu.cmdSubmitsPerFrame', path: 'webgpu.cmdSubmitsPerFrame', dir: 'lower', gate: true }, + { name: 'webgpu.errors', path: 'webgpu.errors', dir: 'lower', gate: true } +]; + +function summarize( samples ) { + + const out = {}; + for ( const m of METRICS ) { + + const vals = samples.map( r => pickAt( r, m.path ) ).filter( v => typeof v === 'number' && Number.isFinite( v ) ); + out[ m.name ] = { median: median( vals ), mad: mad( vals ), n: vals.length, samples: vals }; + + } + + return out; + +} + +// Per-metric verdict. Both significance and confidence are n-aware: +// n < 3 measured: MAD is not reliable (MAD=0 at n=1). Use |Δ%| thresholds. +// n=1: |Δ%| > fastExitPctAtN1 (default 40%) +// n=2: |Δ%| > fastExitPctAtN2 (default 20%) +// n >= 3 measured: MAD gate. Significance = |Δ| > k·MAD. +function classify( b, c, dir, n ) { + + const delta = c.median - b.median; + const noise = Math.max( b.mad, c.mad ); + const pct = b.median === 0 ? ( c.median === 0 ? 0 : Infinity ) : ( delta / Math.abs( b.median ) * 100 ); + + let sig, confident; + if ( n < 3 ) { + + const threshold = n === 1 ? fastExitPctAtN1 : fastExitPctAtN2; + sig = Math.abs( pct ) > threshold; + confident = sig; // small-n: no "clearly noise" verdict — we just don't know yet + + } else { + + sig = Math.abs( delta ) > k * noise; + confident = Math.abs( delta ) > 2 * k * noise || Math.abs( delta ) < 0.3 * k * noise; + + } + + const regressedDir = dir === 'lower' ? ( delta > 0 ) : ( delta < 0 ); + const isRegression = sig && regressedDir; + + return { delta, noise, pct, significant: sig, isRegression, confident }; + +} + +// Adaptive-stop checks only consider gated metrics — non-gated rows are +// reported for humans but never influence when to stop or whether to fail. +const GATED = METRICS.filter( m => m.gate ); + +function allConfident( base, cand, n ) { + + return GATED.every( m => classify( base[ m.name ], cand[ m.name ], m.dir, n ).confident ); + +} + +function anyCatastrophicRegression( base, cand, n ) { + + return GATED.some( m => classify( base[ m.name ], cand[ m.name ], m.dir, n ).isRegression ); + +} + +// --- main ------------------------------------------------------------------- + +// Resolve candidate SHA from the live tree. May be dirty (uncommitted changes), +// in which case the SHA still points at HEAD but the tree doesn't match it. +async function liveTreeMeta() { + + const sha = await shCapture( 'git', [ 'rev-parse', 'HEAD' ] ); + const dirty = ( await shCapture( 'git', [ 'status', '--porcelain' ] ) ).length > 0; + return { sha, dirty }; + +} + +// Baseline worktree + candidate (live-tree) build run in parallel. +console.log( `[orch] preparing baseline (${ baselineRef }) + building candidate (live tree) in parallel` ); +const [ baseInfo, candMeta ] = await Promise.all( [ + ensureBaselineWorktree( baselineRef, wtBase ), + Promise.all( [ ensureLiveBuild(), liveTreeMeta() ] ).then( ( [ , meta ] ) => meta ) +] ); + +// Start both sides (server + browser) once. +console.log( `[orch] launching browsers + servers` ); +const [ base, cand ] = await Promise.all( [ + startSide( { name: 'base', dir: baseInfo.dir, port: portBase } ), + startSide( { name: 'cand', dir: root, port: portBase + 1 } ) +] ); + +const samples = { base: [], cand: [] }; +let iter = 0; +let stoppedEarly = false; + +try { + + while ( iter < maxIterations ) { + + iter ++; + + // Iteration 1 per side is warmup (cold JIT, cold pipeline cache). + const isWarmup = iter === 1; + const warmupMs = isWarmup ? warmup : Math.min( warmup, 1000 ); + + console.log( `\n[orch] iteration ${ iter }/${ maxIterations }${ isWarmup ? ' (warmup, dropped)' : '' } — base` ); + const bRun = await runIteration( base, `base${ iter }`, { warmupMs } ); + console.log( `\n[orch] iteration ${ iter }/${ maxIterations }${ isWarmup ? ' (warmup, dropped)' : '' } — cand` ); + const cRun = await runIteration( cand, `cand${ iter }`, { warmupMs } ); + + if ( ! isWarmup ) { + + samples.base.push( bRun ); + samples.cand.push( cRun ); + + } + + // Adaptive stop: + // - n < 3 measured: fast-exit only on an obvious regression (any + // single metric crossing the %-threshold in the wrong direction). + // We never fast-pass at small n — too easy for the result to flip + // once real samples arrive. + // - n >= 3 measured: the MAD gate is usable. Stop once every metric + // is either clearly moving or clearly noise. + const n = samples.base.length; + if ( n >= 1 ) { + + const b = summarize( samples.base ); + const c = summarize( samples.cand ); + let shouldStop = false; + let reason = ''; + if ( n < 3 ) { + + if ( anyCatastrophicRegression( b, c, n ) ) { + + shouldStop = true; + reason = `catastrophic regression at n=${ n }`; + + } + + } else if ( iter >= minIterations && allConfident( b, c, n ) ) { + + shouldStop = true; + reason = `all metrics confident at n=${ n }`; + + } + + if ( shouldStop ) { + + console.log( `\n[orch] early stop — ${ reason }` ); + stoppedEarly = true; + break; + + } + + } + + } + +} finally { + + await Promise.all( [ stopSide( base ), stopSide( cand ) ] ); + +} + +// --- aggregate & gate ------------------------------------------------------- + +const baseSum = summarize( samples.base ); +const candSum = summarize( samples.cand ); + +const pad = ( s, n ) => String( s ).padStart( n ); +const fmt = v => ( typeof v === 'number' && Number.isFinite( v ) ) ? v.toPrecision( 4 ) : String( v ); + +console.log( `\nperf-regression-orchestrator: ${ example }` ); +console.log( ` baseline: ${ baselineRef } (${ baseInfo.sha.slice( 0, 8 ) })` ); +console.log( ` candidate: live tree @ ${ candMeta.sha.slice( 0, 8 ) }${ candMeta.dirty ? ' (uncommitted changes)' : '' }` ); +console.log( ` iterations: ${ iter } total, ${ samples.base.length } measured (1 warmup dropped)${ stoppedEarly ? ' [early stop]' : '' }` ); +console.log( ` gate: |Δmedian| > ${ k } · max(MAD_base, MAD_cand)\n` ); + +console.log( pad( 'metric', 32 ), ' | ', pad( 'base (med±MAD)', 22 ), ' | ', pad( 'cand (med±MAD)', 22 ), ' | ', pad( 'Δ%', 8 ), ' | ', pad( 'verdict', 8 ) ); +console.log( '-'.repeat( 32 ), '-+-', '-'.repeat( 22 ), '-+-', '-'.repeat( 22 ), '-+-', '-'.repeat( 8 ), '-+-', '-'.repeat( 8 ) ); + +let regressed = 0; +const rows = []; + +for ( const m of METRICS ) { + + const b = baseSum[ m.name ]; + const c = candSum[ m.name ]; + const { delta, pct, significant, isRegression } = classify( b, c, m.dir, samples.base.length ); + + // Only gated metrics contribute to the pass/fail exit code. + const gatedRegression = m.gate && isRegression; + if ( gatedRegression ) regressed ++; + + const verdict = gatedRegression ? 'REGRESS' : ( significant ? 'improve' : 'noise' ); + rows.push( { name: m.name, gate: m.gate, b, c, delta, pct, verdict, isRegression: gatedRegression } ); + + const nameDisplay = m.gate ? m.name : m.name + ' (info)'; + console.log( + pad( nameDisplay, 32 ), ' | ', + pad( `${ fmt( b.median ) } ± ${ fmt( b.mad ) }`, 22 ), ' | ', + pad( `${ fmt( c.median ) } ± ${ fmt( c.mad ) }`, 22 ), ' | ', + pad( ( pct >= 0 ? '+' : '' ) + pct.toFixed( 1 ) + '%', 8 ), ' | ', + pad( verdict, 8 ) + ); + +} + +const outPath = path.join( root, `test/e2e/perf-regression-${ example }.summary.json` ); +await fs.writeFile( outPath, JSON.stringify( { + example, + baselineRef, baselineSha: baseInfo.sha, + candidateSha: candMeta.sha, candidateDirty: candMeta.dirty, + iterationsTotal: iter, iterationsMeasured: samples.base.length, + duration, warmup, k, + stoppedEarly, + host: { platform: process.platform, arch: process.arch, cpus: os.cpus().length, mem: os.totalmem() }, + base: baseSum, cand: candSum, rows +}, null, 2 ) ); +console.log( `\n[orch] wrote ${ outPath }` ); + +if ( regressed > 0 ) { + + console.error( `\n[orch] FAIL — ${ regressed } metric(s) regressed beyond ${ k }·MAD gate` ); + process.exit( 2 ); + +} + +console.log( `\n[orch] PASS — no regression beyond ${ k }·MAD gate` ); +process.exit( 0 ); diff --git a/test/e2e/perf-regression.js b/test/e2e/perf-regression.js new file mode 100644 index 00000000000000..77acb07bf3354a --- /dev/null +++ b/test/e2e/perf-regression.js @@ -0,0 +1,79 @@ +// Perf/VRAM regression harness for WebGPU examples (single-run mode). +// +// Usage: +// node test/e2e/perf-regression.js