From fc921ef98944b70f8f54a0a023d20be2d0bc7b9b Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 13:41:14 +0100 Subject: [PATCH 01/10] wasm: CstReader -> CstView --- packages/compiler/scripts/parseLiquid.js | 53 +--- packages/runtime/package.json | 6 +- .../runtime/src/{cstReader.ts => cstView.ts} | 276 +++++++++--------- packages/runtime/src/miniohm.ts | 24 +- packages/runtime/tsdown.config.ts | 2 +- 5 files changed, 180 insertions(+), 181 deletions(-) rename packages/runtime/src/{cstReader.ts => cstView.ts} (62%) diff --git a/packages/compiler/scripts/parseLiquid.js b/packages/compiler/scripts/parseLiquid.js index bd1d4798..5afb8c34 100644 --- a/packages/compiler/scripts/parseLiquid.js +++ b/packages/compiler/scripts/parseLiquid.js @@ -18,7 +18,7 @@ import {Bench} from 'tinybench'; import {Compiler} from '../src/Compiler.ts'; import {unparse, toWasmGrammar} from '../test/_helpers.js'; -import {createReader} from '../../runtime/src/cstReader.ts'; +import {CstView} from '../../runtime/src/cstView.ts'; const __dirname = dirname(fileURLToPath(import.meta.url)); const datadir = join(__dirname, '../test/data'); @@ -33,8 +33,7 @@ const positionalArgs = process.argv.slice(2).filter(a => !a.startsWith('--')); // https://matklad.github.io/2024/03/22/basic-things.html const smallSize = flags.has('--small-size'); const includeUnparse = flags.has('--include-unparse'); -const useCstReader = flags.has('--cst-reader'); -const useCstReaderPacked = flags.has('--cst-reader-packed'); +const useCstReader = flags.has('--cst-reader') || flags.has('--cst-reader-packed'); // Get pattern from command line arguments const pattern = positionalArgs[0]; @@ -102,52 +101,26 @@ const pattern = positionalArgs[0]; opts ); - // Walk CST using CstReader (raw handles), collecting terminal text. - function unparseCstReaderRaw(matchResult) { - const reader = createReader(matchResult); - const inp = reader.input; - let ans = ''; - function walk(handle, startIdx) { - if (reader.isTerminal(handle)) { - ans += inp.slice(startIdx, startIdx + reader.matchLength(handle)); - return; - } - reader.forEachChild( - handle, - (child, _leadingSpaces, offset) => { - walk(child, startIdx + offset); - }, - startIdx - ); - } - walk(reader.rootHandle, reader.rootStartIdx); - return ans; - } - - // Walk CST using CstReader (handles with startIdx), collecting terminal text. - function unparseCstReaderPacked(matchResult) { - const reader = createReader(matchResult, {packStartIdx: true}); + // Walk CST using CstView (handles with startIdx), collecting terminal text. + function unparseCstView(matchResult) { + const view = CstView.from(matchResult); let ans = ''; function walk(handle) { - if (reader.isTerminal(handle)) { - ans += reader.sourceString(handle); + if (view.isTerminal(handle)) { + ans += view.sourceString(handle); return; } - reader.forEachChild(handle, (child, _leadingSpaces) => { + view.forEachChild(handle, (child, _leadingSpaces) => { walk(child); }); } - walk(reader.root); + walk(view.root); return ans; } const wasmLabel = includeUnparse ? 'Wasm parse+unparse' : 'Wasm parse'; bench.add( - useCstReaderPacked - ? `${wasmLabel} (CstReader packed)` - : useCstReader - ? `${wasmLabel} (CstReader)` - : wasmLabel, + useCstReader ? `${wasmLabel} (CstView)` : wasmLabel, () => { let overriddenDuration = 0; for (const {input} of files) { @@ -164,11 +137,7 @@ const pattern = positionalArgs[0]; peakWasmMemoryBytes, exports.memory.buffer.byteLength ); - return useCstReaderPacked - ? unparseCstReaderPacked(m) - : useCstReader - ? unparseCstReaderRaw(m) - : unparse(g); + return useCstReader ? unparseCstView(m) : unparse(g); }); if (includeUnparse) overriddenDuration += bench.now() - start; } diff --git a/packages/runtime/package.json b/packages/runtime/package.json index db8d1eb0..06acd807 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -24,9 +24,9 @@ "types": "./dist/src/unstableDebug.d.ts", "default": "./dist/src/unstableDebug.js" }, - "./cstReader": { - "types": "./dist/src/cstReader.d.ts", - "default": "./dist/src/cstReader.js" + "./cstView": { + "types": "./dist/src/cstView.d.ts", + "default": "./dist/src/cstView.js" } }, "files": ["dist"], diff --git a/packages/runtime/src/cstReader.ts b/packages/runtime/src/cstView.ts similarity index 62% rename from packages/runtime/src/cstReader.ts rename to packages/runtime/src/cstView.ts index f497f8d8..31a11def 100644 --- a/packages/runtime/src/cstReader.ts +++ b/packages/runtime/src/cstView.ts @@ -13,6 +13,20 @@ const MASK = SHIFT - 1; // 0x7FFFFFF */ export const NULL_HANDLE = 0; +/** + * Special kind values for non-nonterminal nodes. + * For nonterminal nodes, `kind()` returns the rule name (a string). + */ +export const CstKind = { + Terminal: '_terminal', + List: '_list', + Optional: '_opt', + Spaces: 'spaces', +} as const; + +export type CstKindValue = (typeof CstKind)[keyof typeof CstKind]; +export type CstKind = CstKindValue | string; + /** * Spaces handle encoding: (spacesLen << 2) | 0b10. * Bit 1 set, bit 0 clear — distinct from heap pointers (low 2 bits = 00) @@ -21,7 +35,7 @@ export const NULL_HANDLE = 0; * In packed mode, startIdx is embedded in the upper bits just like other * handles: startIdx * SHIFT + rawSpacesHandle. * - * These are internal — consumers use the normal accessor methods on CstReader. + * These are internal — consumers use the normal accessor methods on CstView. */ function makeSpacesHandle(spacesLen: number): number { return (spacesLen << 2) | 2; @@ -55,21 +69,18 @@ export function unpackStartIdx(handle: number): number { /** * Zero-allocation access to the CST stored in Wasm linear memory. * - * Accessor methods (isTerminal, matchLength, childCount, ctorName, details) - * accept either a raw handle or a handle with startIdx. + * All handles have startIdx packed in. * - * forEachChild(handle, fn) iterates visible children. The callback receives - * (childHandle, leadingSpaces, pos, index). leadingSpaces is a handle for - * the leading spaces node (use accessor methods to inspect), or NULL_HANDLE - * if none. + * Use `kind(handle)` to get the node kind — returns the rule name for + * nonterminals, or a CstKind constant for other node types. */ -export class CstReader { +export class CstView { /** @internal */ private _ctx: MatchContext; - /** @internal — whether handles have startIdx packed in. */ - private _packed: boolean; + /** @internal — precomputed rule ctor names (without parameterization). */ + private _ruleCtorNames: string[]; - /** Handle for the root node (with startIdx packed in if packStartIdx was set). */ + /** Handle for the root node (with startIdx packed in). */ readonly root: number; /** Handle for leading spaces before the root, or NULL_HANDLE if none. */ readonly rootLeadingSpaces: number; @@ -84,11 +95,42 @@ export class CstReader { } /** @internal */ - constructor(ctx: MatchContext, root: number, rootLeadingSpaces: number, packed: boolean) { + constructor( + ctx: MatchContext, + root: number, + rootLeadingSpaces: number, + ruleCtorNames: string[] + ) { this._ctx = ctx; this.root = root; this.rootLeadingSpaces = rootLeadingSpaces; - this._packed = packed; + this._ruleCtorNames = ruleCtorNames; + } + + /** Create a CstView from a successful match result. */ + static from(result: SucceededMatchResult): CstView { + const exports = (result.grammar as any)._instance.exports; + const ctx = result._ctx; + + const heapTop = exports.__offset.value; + if (heapTop >= SHIFT) { + throw new Error( + `Wasm heap too large for CstView: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` + ); + } + const startIdxLimit = 2 ** (53 - HANDLE_BITS); + if (ctx.input.length >= startIdxLimit) { + throw new Error( + `Input too long for CstView: ${ctx.input.length} chars exceeds ${53 - HANDLE_BITS}-bit limit (${startIdxLimit} chars)` + ); + } + + const ruleCtorNames = (result.grammar as any)._ruleCtorNames as string[]; + + const spacesLen = Math.max(0, exports.getSpacesLenAt(0)); + const rootPtr = exports.bindingsAt(0); + const rootLeadingSpaces = spacesLen > 0 ? 0 * SHIFT + makeSpacesHandle(spacesLen) : 0; + return new CstView(ctx, pack(rootPtr, spacesLen), rootLeadingSpaces, ruleCtorNames); } /** Extract the startIdx from a handle. */ @@ -96,6 +138,83 @@ export class CstReader { return unpackStartIdx(handle); } + /** + * Node kind. Returns the rule name (without parameterization) for + * nonterminals, or one of the CstKind constants ('_terminal', '_list', + * '_opt', 'spaces') for other node types. + */ + kind(handle: number): CstKind { + const raw = handle & MASK; + if (isSpacesHandle(raw)) return CstKind.Spaces; + if (isTaggedTerminal(raw)) return CstKind.Terminal; + const type = (this._ctx.view.getInt32(raw + 8, true) & + MATCH_RECORD_TYPE_MASK) as MatchRecordType; + switch (type) { + case MatchRecordType.NONTERMINAL: { + const ruleId = this._ctx.view.getInt32(raw + 8, true) >>> 2; + return this._ruleCtorNames[ruleId]; + } + case MatchRecordType.TERMINAL: + return CstKind.Terminal; + case MatchRecordType.ITER_FLAG: + return CstKind.List; + case MatchRecordType.OPTIONAL: + return CstKind.Optional; + } + } + + /** Children per list item. Only valid when kind() === CstKind.List. */ + listArity(handle: number): number { + const raw = handle & MASK; + return this._ctx.view.getInt32(raw + 8, true) >>> 2; + } + + /** + * Iterate list items grouped by arity. The callback receives the child + * handles for one iteration as arguments. + * Only valid when kind() === CstKind.List. + */ + mapList(handle: number, cb: (...itemChildren: number[]) => T): T[] { + const arity = this.listArity(handle); + const results: T[] = []; + if (arity <= 1) { + this.forEachChild(handle, child => { + results.push(cb(child)); + }); + } else { + const group: number[] = []; + this.forEachChild(handle, child => { + group.push(child); + if (group.length === arity) { + results.push(cb(...group)); + group.length = 0; + } + }); + } + return results; + } + + /** + * Unwrap an optional node. If present, calls `present` with the children. + * If absent, calls `absent` (or returns undefined). + * Only valid when kind() === CstKind.Optional. + */ + mapOpt( + handle: number, + present: (...children: number[]) => T, + absent?: () => T + ): T | undefined { + const count = this.childCount(handle); + if (count === 0) { + return absent ? absent() : undefined; + } + const children: number[] = []; + this.forEachChild(handle, child => { + children.push(child); + }); + return present(...children); + } + isTerminal(handle: number): boolean { const raw = handle & MASK; if (isSpacesHandle(raw)) return false; @@ -149,25 +268,6 @@ export class CstReader { return this._ctx.view.getUint32(raw + 4, true); } - /** - * Constructor name. For nonterminals, the rule name (without parameterization). - * For other types: '_terminal', '_list', '_opt'. - */ - ctorName(handle: number): string { - const raw = handle & MASK; - if (isSpacesHandle(raw)) return 'spaces'; - if (isTaggedTerminal(raw)) return '_terminal'; - const type = (this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType; - if (type === MatchRecordType.NONTERMINAL) { - const ruleId = this._ctx.view.getInt32(raw + 8, true) >>> 2; - return this._ctx.ruleNames[ruleId].split('<')[0]; - } - if (type === MatchRecordType.TERMINAL) return '_terminal'; - if (type === MatchRecordType.ITER_FLAG) return '_list'; - return '_opt'; - } - /** * Upper bits of typeAndDetails. For NONTERMINAL: the ruleId. * For ITER_FLAG: the arity (children per iteration). @@ -184,14 +284,11 @@ export class CstReader { return this._ctx.view.getUint32(raw + 16 + i * 4, true); } - /** Source string for a node. If startIdx is omitted, it is extracted from the handle. */ - sourceString(handle: number, startIdx?: number): string { - if (startIdx === undefined) { - const raw = handle & MASK; - startIdx = (handle - raw) / SHIFT; - handle = raw; - } - return this._ctx.input.slice(startIdx, startIdx + this.matchLength(handle)); + /** Source string for a node. startIdx is extracted from the handle. */ + sourceString(handle: number): string { + const raw = handle & MASK; + const si = (handle - raw) / SHIFT; + return this._ctx.input.slice(si, si + this.matchLength(raw)); } /** The full input string that was parsed. */ @@ -203,61 +300,9 @@ export class CstReader { * Iterate over children. The callback receives (childHandle, leadingSpaces, * pos, index). leadingSpaces is a handle for the leading spaces node * (works with accessor methods like matchLength, ctorName, sourceString), - * or NULL_HANDLE (0) if none. - * - * The meaning of `pos` depends on the mode: - * - Raw mode: offset from the parent's start position. - * `parentStartIdx` is the parent's absolute position, needed to - * query the memo table for spaces lengths. - * - Packed mode: the child's absolute startIdx (parentStartIdx is - * ignored since startIdx is embedded in the handle). + * or NULL_HANDLE (0) if none. `pos` is the child's absolute startIdx. */ forEachChild( - handle: number, - fn: (child: number, leadingSpaces: number, pos: number, index: number) => void, - parentStartIdx = 0 - ): void { - if (this._packed) { - this._forEachChildPacked(handle, fn); - } else { - this._forEachChildRaw(handle, parentStartIdx, fn); - } - } - - /** Check whether a raw child handle has parent-level space skipping. */ - private _hasParentSpaces(rawChild: number): boolean { - if (isTaggedTerminal(rawChild)) return true; - const type = (this._ctx.view.getInt32(rawChild + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType; - return type === MatchRecordType.NONTERMINAL || type === MatchRecordType.TERMINAL; - } - - private _forEachChildRaw( - handle: number, - parentStartIdx: number, - fn: (child: number, leadingSpaces: number, offset: number, index: number) => void - ): void { - if (isTaggedTerminal(handle)) return; - const count = this._ctx.view.getUint32(handle, true); - const {getSpacesLenAt} = this._ctx; - let offset = 0; - for (let i = 0; i < count; i++) { - const child = this._ctx.view.getUint32(handle + 16 + i * 4, true); - const rawSpacesLen = - getSpacesLenAt && this._hasParentSpaces(child) - ? Math.max(0, getSpacesLenAt(parentStartIdx + offset)) - : 0; - const leadingSpaces = rawSpacesLen > 0 ? makeSpacesHandle(rawSpacesLen) : 0; - offset += rawSpacesLen; - const len = isTaggedTerminal(child) - ? child >>> 1 - : this._ctx.view.getUint32(child + 4, true); - fn(child, leadingSpaces, offset, i); - offset += len; - } - } - - private _forEachChildPacked( handle: number, fn: (child: number, leadingSpaces: number, pos: number, index: number) => void ): void { @@ -285,45 +330,12 @@ export class CstReader { childStart += len; } } -} - -interface CreateReaderOptions { - /** If true, handles include startIdx (see pack()). Default: false. */ - packStartIdx?: boolean; -} -export function createReader( - result: SucceededMatchResult, - options?: CreateReaderOptions -): CstReader { - const exports = (result.grammar as any)._instance.exports; - const ctx = result._ctx; - const doPack = options?.packStartIdx ?? false; - - if (doPack) { - const heapTop = exports.__offset.value; - if (heapTop >= SHIFT) { - throw new Error( - `Wasm heap too large for CstReader: ${heapTop} bytes exceeds ${HANDLE_BITS}-bit limit (${SHIFT} bytes)` - ); - } - const startIdxLimit = 2 ** (53 - HANDLE_BITS); - if (ctx.input.length >= startIdxLimit) { - throw new Error( - `Input too long for CstReader: ${ctx.input.length} chars exceeds ${53 - HANDLE_BITS}-bit limit (${startIdxLimit} chars)` - ); - } + /** Check whether a raw child handle has parent-level space skipping. */ + private _hasParentSpaces(rawChild: number): boolean { + if (isTaggedTerminal(rawChild)) return true; + const type = (this._ctx.view.getInt32(rawChild + 8, true) & + MATCH_RECORD_TYPE_MASK) as MatchRecordType; + return type === MatchRecordType.NONTERMINAL || type === MatchRecordType.TERMINAL; } - - const spacesLen = Math.max(0, exports.getSpacesLenAt(0)); - const rootPtr = exports.bindingsAt(0); - const p = doPack ? pack : (h: number, _s: number) => h; - // Pack the spaces handle with startIdx=0 (root leading spaces always start at 0). - const rootLeadingSpaces = - spacesLen > 0 - ? doPack - ? 0 * SHIFT + makeSpacesHandle(spacesLen) - : makeSpacesHandle(spacesLen) - : 0; - return new CstReader(ctx, p(rootPtr, spacesLen), rootLeadingSpaces, doPack); } diff --git a/packages/runtime/src/miniohm.ts b/packages/runtime/src/miniohm.ts index 4e8b905a..8a11ccec 100644 --- a/packages/runtime/src/miniohm.ts +++ b/packages/runtime/src/miniohm.ts @@ -1,4 +1,5 @@ import {assert, checkNotNull} from './assert.ts'; +import {CstView} from './cstView.ts'; import {getLineAndColumn, getLineAndColumnMessage} from './extras.ts'; export const MATCH_RECORD_TYPE_MASK = 0b11; @@ -142,6 +143,8 @@ export class Grammar { /** @internal */ private _instance?: WebAssembly.Instance = undefined; + /** @internal — precomputed rule constructor names (without parameterization). */ + _ruleCtorNames: string[] = []; /** @internal */ private _imports = { // System-level AssemblyScript imports. @@ -317,6 +320,10 @@ export class Grammar { this._ruleIds.set(ruleName, this._ruleIds.size); this._ruleNames.push(ruleName); } + this._ruleCtorNames = this._ruleNames.map(n => { + const i = n.indexOf('<'); + return i === -1 ? n : n.slice(0, i); + }); for (const str of parseStringTable(module, 'strings')) { this._strings.push(str); } @@ -1064,8 +1071,10 @@ function createMatchResult( } export class SucceededMatchResult extends MatchResult { - /** @internal */ - _cst: CstNode; + /** @internal — lazily materialized CstNode tree (compatibility adapter). */ + private _cstRoot?: CstNode; + /** @internal — lazily created CstView. */ + private _cstView?: CstView; /** @internal */ protected constructor( @@ -1075,7 +1084,16 @@ export class SucceededMatchResult extends MatchResult { succeeded: boolean ) { super(grammar, startExpr, ctx, succeeded); - this._cst = grammar._getCstRoot(ctx); + } + + /** Zero-allocation handle-based view of the CST. */ + get cst(): CstView { + return (this._cstView ??= CstView.from(this)); + } + + /** @internal — for backward compat: lazily access _cst. */ + get _cst(): CstNode { + return (this._cstRoot ??= this.grammar._getCstRoot(this._ctx)); } getCstRoot(): CstNode { diff --git a/packages/runtime/tsdown.config.ts b/packages/runtime/tsdown.config.ts index f50d49f1..6f60fdac 100644 --- a/packages/runtime/tsdown.config.ts +++ b/packages/runtime/tsdown.config.ts @@ -4,7 +4,7 @@ export default defineConfig({ entry: { index: 'index.ts', 'src/unstableDebug': 'src/unstableDebug.ts', - 'src/cstReader': 'src/cstReader.ts', + 'src/cstView': 'src/cstView.ts', }, format: 'esm', fixedExtension: false, From 8f8cba3c5fe79b7ba74375fa7854f27e44482ad3 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 13:49:30 +0100 Subject: [PATCH 02/10] formatting --- packages/compiler/scripts/parseLiquid.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/compiler/scripts/parseLiquid.js b/packages/compiler/scripts/parseLiquid.js index ffae4fb5..52d29f34 100644 --- a/packages/compiler/scripts/parseLiquid.js +++ b/packages/compiler/scripts/parseLiquid.js @@ -16,9 +16,10 @@ import {fileURLToPath} from 'node:url'; import * as ohm from 'ohm-js-legacy'; import {Bench} from 'tinybench'; -import {Compiler} from '../src/Compiler.ts'; import {unparse} from '../test/_helpers.js'; import {CstView} from '../../runtime/src/cstView.ts'; +import {compileGrammars} from '../src/api.ts'; +import {Grammar} from '../../runtime/src/miniohm.ts'; const __dirname = dirname(fileURLToPath(import.meta.url)); const datadir = join(__dirname, '../test/data'); From 309827a0724d15f3bb86efde07e33bd4440a878e Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 13:56:08 +0100 Subject: [PATCH 03/10] simplification --- packages/compiler/scripts/parseLiquid.js | 17 +++++------ packages/runtime/src/cstView.ts | 38 ------------------------ 2 files changed, 8 insertions(+), 47 deletions(-) diff --git a/packages/compiler/scripts/parseLiquid.js b/packages/compiler/scripts/parseLiquid.js index 52d29f34..b6372281 100644 --- a/packages/compiler/scripts/parseLiquid.js +++ b/packages/compiler/scripts/parseLiquid.js @@ -17,7 +17,6 @@ import * as ohm from 'ohm-js-legacy'; import {Bench} from 'tinybench'; import {unparse} from '../test/_helpers.js'; -import {CstView} from '../../runtime/src/cstView.ts'; import {compileGrammars} from '../src/api.ts'; import {Grammar} from '../../runtime/src/miniohm.ts'; @@ -35,7 +34,7 @@ const positionalArgs = process.argv.slice(2).filter(a => !a.startsWith('--')); // https://matklad.github.io/2024/03/22/basic-things.html const smallSize = flags.has('--small-size'); const includeUnparse = flags.has('--include-unparse'); -const useCstReader = flags.has('--cst-reader') || flags.has('--cst-reader-packed'); +const useCstView = flags.has('--use-cstview'); // Get pattern from command line arguments const pattern = positionalArgs[0]; @@ -106,24 +105,24 @@ const pattern = positionalArgs[0]; // Walk CST using CstView (handles with startIdx), collecting terminal text. function unparseCstView(matchResult) { - const view = CstView.from(matchResult); + const {cst} = matchResult; let ans = ''; function walk(handle) { - if (view.isTerminal(handle)) { - ans += view.sourceString(handle); + if (cst.kind(handle) === '_terminal') { + ans += cst.sourceString(handle); return; } - view.forEachChild(handle, (child, _leadingSpaces) => { + cst.forEachChild(handle, (child, _leadingSpaces) => { walk(child); }); } - walk(view.root); + walk(cst.root); return ans; } const wasmLabel = includeUnparse ? 'Wasm parse+unparse' : 'Wasm parse'; bench.add( - useCstReader ? `${wasmLabel} (CstView)` : wasmLabel, + useCstView ? `${wasmLabel} (CstView)` : wasmLabel, () => { let overriddenDuration = 0; for (const {input} of files) { @@ -140,7 +139,7 @@ const pattern = positionalArgs[0]; peakWasmMemoryBytes, exports.memory.buffer.byteLength ); - return useCstReader ? unparseCstView(m) : unparse(g); + return useCstView ? unparseCstView(m) : unparse(g); }); if (includeUnparse) overriddenDuration += bench.now() - start; } diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index 31a11def..e5049f40 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -215,44 +215,6 @@ export class CstView { return present(...children); } - isTerminal(handle: number): boolean { - const raw = handle & MASK; - if (isSpacesHandle(raw)) return false; - if (isTaggedTerminal(raw)) return true; - return ( - ((this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType) === MatchRecordType.TERMINAL - ); - } - - isNonterminal(handle: number): boolean { - const raw = handle & MASK; - if (isSpacesHandle(raw)) return true; - if (isTaggedTerminal(raw)) return false; - return ( - ((this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType) === MatchRecordType.NONTERMINAL - ); - } - - isList(handle: number): boolean { - const raw = handle & MASK; - if (isSpacesHandle(raw) || isTaggedTerminal(raw)) return false; - return ( - ((this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType) === MatchRecordType.ITER_FLAG - ); - } - - isOptional(handle: number): boolean { - const raw = handle & MASK; - if (isSpacesHandle(raw) || isTaggedTerminal(raw)) return false; - return ( - ((this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType) === MatchRecordType.OPTIONAL - ); - } - /** Number of raw children stored in this match record. */ childCount(handle: number): number { const raw = handle & MASK; From c0f3db5e0a226a55600542cbfc9114b524210be3 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 14:10:59 +0100 Subject: [PATCH 04/10] api cleanups --- packages/runtime/src/cstView.ts | 94 ++++++++------------------------- 1 file changed, 23 insertions(+), 71 deletions(-) diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index e5049f40..307ba706 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -32,9 +32,6 @@ export type CstKind = CstKindValue | string; * Bit 1 set, bit 0 clear — distinct from heap pointers (low 2 bits = 00) * and tagged terminals (bit 0 = 1). * - * In packed mode, startIdx is embedded in the upper bits just like other - * handles: startIdx * SHIFT + rawSpacesHandle. - * * These are internal — consumers use the normal accessor methods on CstView. */ function makeSpacesHandle(spacesLen: number): number { @@ -48,20 +45,13 @@ function isSpacesHandle(raw: number): boolean { /** * Pack a raw CST handle and startIdx into a single Number handle. * Uses 53 of the available integer-precision bits in an IEEE 754 double - * (27 bits for the pointer, 26 bits for startIdx). Accessor methods - * (isTerminal, matchLength, etc.) transparently accept either a raw handle - * or a handle with startIdx — they extract the low 27 bits via `& MASK`, - * which is an identity operation for raw handles (< 2^27). + * (27 bits for the pointer, 26 bits for startIdx). */ -export function pack(rawHandle: number, startIdx: number): number { +function pack(rawHandle: number, startIdx: number): number { return startIdx * SHIFT + rawHandle; } -export function unpackHandle(handle: number): number { - return handle & MASK; -} - -export function unpackStartIdx(handle: number): number { +function unpackStartIdx(handle: number): number { const raw = handle & MASK; return (handle - raw) / SHIFT; } @@ -85,15 +75,6 @@ export class CstView { /** Handle for leading spaces before the root, or NULL_HANDLE if none. */ readonly rootLeadingSpaces: number; - /** Raw handle for the root node (without startIdx). */ - get rootHandle(): number { - return this.root & MASK; - } - /** startIdx for the root node. */ - get rootStartIdx(): number { - return unpackStartIdx(this.root); - } - /** @internal */ constructor( ctx: MatchContext, @@ -163,19 +144,27 @@ export class CstView { } } - /** Children per list item. Only valid when kind() === CstKind.List. */ - listArity(handle: number): number { - const raw = handle & MASK; - return this._ctx.view.getInt32(raw + 8, true) >>> 2; - } - /** - * Iterate list items grouped by arity. The callback receives the child - * handles for one iteration as arguments. - * Only valid when kind() === CstKind.List. + * Map over grouped children. Works for both list and optional nodes. + * + * - List (`_list`): groups children by the list's arity and calls `cb` + * per group. Returns `T[]` with one entry per iteration. + * - Optional (`_opt`): all children form a single group. Returns `T[]` + * with 0 elements (absent) or 1 element (present). */ - mapList(handle: number, cb: (...itemChildren: number[]) => T): T[] { - const arity = this.listArity(handle); + mapItems(handle: number, cb: (...children: number[]) => T): T[] { + const count = this.childCount(handle); + if (count === 0) return []; + + const raw = handle & MASK; + const type = (this._ctx.view.getInt32(raw + 8, true) & + MATCH_RECORD_TYPE_MASK) as MatchRecordType; + // For lists, arity is in the details field; for optionals, all children are one group. + const arity = + type === MatchRecordType.ITER_FLAG + ? this._ctx.view.getInt32(raw + 8, true) >>> 2 + : count; + const results: T[] = []; if (arity <= 1) { this.forEachChild(handle, child => { @@ -194,27 +183,6 @@ export class CstView { return results; } - /** - * Unwrap an optional node. If present, calls `present` with the children. - * If absent, calls `absent` (or returns undefined). - * Only valid when kind() === CstKind.Optional. - */ - mapOpt( - handle: number, - present: (...children: number[]) => T, - absent?: () => T - ): T | undefined { - const count = this.childCount(handle); - if (count === 0) { - return absent ? absent() : undefined; - } - const children: number[] = []; - this.forEachChild(handle, child => { - children.push(child); - }); - return present(...children); - } - /** Number of raw children stored in this match record. */ childCount(handle: number): number { const raw = handle & MASK; @@ -230,22 +198,6 @@ export class CstView { return this._ctx.view.getUint32(raw + 4, true); } - /** - * Upper bits of typeAndDetails. For NONTERMINAL: the ruleId. - * For ITER_FLAG: the arity (children per iteration). - */ - details(handle: number): number { - const raw = handle & MASK; - if (isSpacesHandle(raw) || isTaggedTerminal(raw)) return 0; - return this._ctx.view.getInt32(raw + 8, true) >>> 2; - } - - /** Handle (Wasm pointer) of the i-th raw child. */ - childAt(handle: number, i: number): number { - const raw = handle & MASK; - return this._ctx.view.getUint32(raw + 16 + i * 4, true); - } - /** Source string for a node. startIdx is extracted from the handle. */ sourceString(handle: number): string { const raw = handle & MASK; @@ -261,7 +213,7 @@ export class CstView { /** * Iterate over children. The callback receives (childHandle, leadingSpaces, * pos, index). leadingSpaces is a handle for the leading spaces node - * (works with accessor methods like matchLength, ctorName, sourceString), + * (works with accessor methods like matchLength, kind, sourceString), * or NULL_HANDLE (0) if none. `pos` is the child's absolute startIdx. */ forEachChild( From 3f6c7077dffcd572dcfe828c986fdb76725b4b8e Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 14:17:05 +0100 Subject: [PATCH 05/10] attempt to minimize allocation --- packages/runtime/src/cstView.ts | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index 307ba706..b5728fbd 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -165,20 +165,26 @@ export class CstView { ? this._ctx.view.getInt32(raw + 8, true) >>> 2 : count; - const results: T[] = []; + // Collect all packed child handles in one pass into a pre-sized array. + const children = new Array(count); + let idx = 0; + this.forEachChild(handle, child => { + children[idx++] = child; + }); + + const numItems = arity <= 1 ? count : (count / arity) | 0; + const results = new Array(numItems); if (arity <= 1) { - this.forEachChild(handle, child => { - results.push(cb(child)); - }); + for (let i = 0; i < numItems; i++) { + results[i] = cb(children[i]); + } } else { - const group: number[] = []; - this.forEachChild(handle, child => { - group.push(child); - if (group.length === arity) { - results.push(cb(...group)); - group.length = 0; - } - }); + // Reusable args buffer — one allocation, not one per group. + const args = new Array(arity); + for (let i = 0, r = 0; r < numItems; i += arity, r++) { + for (let j = 0; j < arity; j++) args[j] = children[i + j]; + results[r] = cb.apply(undefined, args); + } } return results; } From bf2c91454c566a036a1a1de7634b2a372ace5e54 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 14:29:19 +0100 Subject: [PATCH 06/10] implement forEachItem --- packages/runtime/src/cstView.ts | 53 ++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index b5728fbd..cddb8256 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -145,16 +145,32 @@ export class CstView { } /** - * Map over grouped children. Works for both list and optional nodes. + * Iterate over logical items. Works for both list and optional nodes. * * - List (`_list`): groups children by the list's arity and calls `cb` - * per group. Returns `T[]` with one entry per iteration. - * - Optional (`_opt`): all children form a single group. Returns `T[]` - * with 0 elements (absent) or 1 element (present). + * per group. + * - Optional (`_opt`): all children form a single group. Calls `cb` + * once if present, not at all if absent. + */ + forEachItem(handle: number, cb: (...children: number[]) => void): void { + this._iterItems(handle, cb); + } + + /** + * Like `forEachItem`, but collects the return values into an array. */ mapItems(handle: number, cb: (...children: number[]) => T): T[] { + const results: T[] = []; + this._iterItems(handle, (...args) => { + results.push(cb(...args)); + }); + return results; + } + + /** @internal — shared implementation for forEachItem / mapItems. */ + private _iterItems(handle: number, cb: (...children: number[]) => void): void { const count = this.childCount(handle); - if (count === 0) return []; + if (count === 0) return; const raw = handle & MASK; const type = (this._ctx.view.getInt32(raw + 8, true) & @@ -165,28 +181,23 @@ export class CstView { ? this._ctx.view.getInt32(raw + 8, true) >>> 2 : count; - // Collect all packed child handles in one pass into a pre-sized array. - const children = new Array(count); - let idx = 0; - this.forEachChild(handle, child => { - children[idx++] = child; - }); - - const numItems = arity <= 1 ? count : (count / arity) | 0; - const results = new Array(numItems); if (arity <= 1) { - for (let i = 0; i < numItems; i++) { - results[i] = cb(children[i]); - } + this.forEachChild(handle, child => { + cb(child); + }); } else { - // Reusable args buffer — one allocation, not one per group. + // Collect all packed child handles, then invoke cb in groups. + const children = new Array(count); + let idx = 0; + this.forEachChild(handle, child => { + children[idx++] = child; + }); const args = new Array(arity); - for (let i = 0, r = 0; r < numItems; i += arity, r++) { + for (let i = 0; i < count; i += arity) { for (let j = 0; j < arity; j++) args[j] = children[i + j]; - results[r] = cb.apply(undefined, args); + cb.apply(undefined, args); } } - return results; } /** Number of raw children stored in this match record. */ From 4c73ab2ec8e9b73201243136f669d9daa4ad651b Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 16:39:57 +0100 Subject: [PATCH 07/10] items -> chunks --- packages/runtime/src/cstView.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index cddb8256..b53ca610 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -145,30 +145,30 @@ export class CstView { } /** - * Iterate over logical items. Works for both list and optional nodes. + * Iterate over logical chunks. Works for both list and optional nodes. * * - List (`_list`): groups children by the list's arity and calls `cb` - * per group. - * - Optional (`_opt`): all children form a single group. Calls `cb` + * per chunk. + * - Optional (`_opt`): all children form a single chunk. Calls `cb` * once if present, not at all if absent. */ - forEachItem(handle: number, cb: (...children: number[]) => void): void { - this._iterItems(handle, cb); + forEachChunk(handle: number, cb: (...children: number[]) => void): void { + this._iterChunks(handle, cb); } /** - * Like `forEachItem`, but collects the return values into an array. + * Like `forEachChunk`, but collects the return values into an array. */ - mapItems(handle: number, cb: (...children: number[]) => T): T[] { + mapChunks(handle: number, cb: (...children: number[]) => T): T[] { const results: T[] = []; - this._iterItems(handle, (...args) => { + this._iterChunks(handle, (...args) => { results.push(cb(...args)); }); return results; } - /** @internal — shared implementation for forEachItem / mapItems. */ - private _iterItems(handle: number, cb: (...children: number[]) => void): void { + /** @internal — shared implementation for forEachChunk / mapChunks. */ + private _iterChunks(handle: number, cb: (...children: number[]) => void): void { const count = this.childCount(handle); if (count === 0) return; From 7c196ece1cbae031debbe7e0b88696614500fc1a Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 21:14:31 +0100 Subject: [PATCH 08/10] implement unpack + test --- packages/compiler/test/test-cstView.js | 194 +++++++++++++++++++++++++ packages/runtime/src/cstView.ts | 18 +++ 2 files changed, 212 insertions(+) create mode 100644 packages/compiler/test/test-cstView.js diff --git a/packages/compiler/test/test-cstView.js b/packages/compiler/test/test-cstView.js new file mode 100644 index 00000000..675ec5b1 --- /dev/null +++ b/packages/compiler/test/test-cstView.js @@ -0,0 +1,194 @@ +import test from 'ava'; +import {readFileSync} from 'node:fs'; + +import {CstKind} from 'ohm-js/cstView'; + +import {compileAndLoad, scriptRel} from './_helpers.js'; + +const jsonSource = readFileSync(scriptRel('../../lang-json/json.ohm'), 'utf-8'); + +/** + * Walk the CST via CstView and reconstruct a JS value, exercising unpack, + * forEachChunk, mapChunks, and the basic accessors (kind, sourceString). + */ +function toJS(cst, handle) { + const k = cst.kind(handle); + switch (k) { + case 'Object_empty': + return {}; + + case 'Object_nonEmpty': + // "{" Pair ("," Pair)* "}" + return cst.unpack(handle, (_open, firstPair, restPairs, _close) => { + const obj = {}; + const [key, val] = parsePair(cst, firstPair); + obj[key] = val; + cst.forEachChunk(restPairs, (_comma, pair) => { + const [k2, v2] = parsePair(cst, pair); + obj[k2] = v2; + }); + return obj; + }); + + case 'Array_empty': + return []; + + case 'Array_nonEmpty': + // "[" Value ("," Value)* "]" + return cst.unpack(handle, (_open, firstValue, restValues, _close) => { + const arr = [toJS(cst, firstValue)]; + cst.forEachChunk(restValues, (_comma, value) => { + arr.push(toJS(cst, value)); + }); + return arr; + }); + + case 'stringLiteral': + // "\"" doubleStringCharacter* "\"" + return cst.unpack(handle, (_open, chars, _close) => { + const parts = cst.mapChunks(chars, char => toJS(cst, char)); + return parts.join(''); + }); + + case 'doubleStringCharacter_nonEscaped': + return cst.sourceString(handle); + + case 'doubleStringCharacter_escaped': + // "\\" escapeSequence + return cst.unpack(handle, (_backslash, escSeq) => toJS(cst, escSeq)); + + case 'escapeSequence_doubleQuote': + return '"'; + case 'escapeSequence_reverseSolidus': + return '\\'; + case 'escapeSequence_solidus': + return '/'; + case 'escapeSequence_backspace': + return '\b'; + case 'escapeSequence_formfeed': + return '\f'; + case 'escapeSequence_newline': + return '\n'; + case 'escapeSequence_carriageReturn': + return '\r'; + case 'escapeSequence_horizontalTab': + return '\t'; + case 'escapeSequence_codePoint': + // "u" fourHexDigits + return cst.unpack(handle, (_u, fourHex) => { + return String.fromCharCode(parseInt(cst.sourceString(fourHex), 16)); + }); + + case 'numberLiteral_withExponent': + case 'numberLiteral_withoutExponent': + case 'decimal_withFract': + case 'decimal_withoutFract': + return Number(cst.sourceString(handle)); + + case 'True': + return true; + case 'False': + return false; + case 'Null': + return null; + + default: + // Wrapper nonterminals (Value, Object, Array, String, Number, Pair, + // doubleStringCharacter, escapeSequence, etc.) — pass through to child. + if (k === CstKind.Terminal) { + return cst.sourceString(handle); + } + return cst.unpack(handle, (...children) => { + if (children.length === 1) return toJS(cst, children[0]); + throw new Error(`Unhandled kind: ${k} with ${children.length} children`); + }); + } +} + +function parsePair(cst, handle) { + // Pair = String ":" Value + return cst.unpack(handle, (key, _colon, value) => { + return [toJS(cst, key), toJS(cst, value)]; + }); +} + +function parse(g, input) { + return g.match(input).use(r => { + if (r.failed()) throw new Error(r.message); + return toJS(r.cst, r.cst.root); + }); +} + +let jsonGrammar; + +test.before(async () => { + jsonGrammar = await compileAndLoad(jsonSource); +}); + +test('empty object', t => { + t.deepEqual(parse(jsonGrammar, '{}'), {}); +}); + +test('empty array', t => { + t.deepEqual(parse(jsonGrammar, '[]'), []); +}); + +test('strings', t => { + t.is(parse(jsonGrammar, '"hello"'), 'hello'); + t.is(parse(jsonGrammar, '""'), ''); +}); + +test('numbers', t => { + t.is(parse(jsonGrammar, '0'), 0); + t.is(parse(jsonGrammar, '42'), 42); + t.is(parse(jsonGrammar, '-1'), -1); + t.is(parse(jsonGrammar, '3.14'), 3.14); + t.is(parse(jsonGrammar, '1e10'), 1e10); + t.is(parse(jsonGrammar, '2.5E-3'), 2.5e-3); +}); + +test('booleans and null', t => { + t.is(parse(jsonGrammar, 'true'), true); + t.is(parse(jsonGrammar, 'false'), false); + t.is(parse(jsonGrammar, 'null'), null); +}); + +test('simple object', t => { + t.deepEqual(parse(jsonGrammar, '{"key": "value", "num": 42}'), { + key: 'value', + num: 42, + }); +}); + +test('nested structures', t => { + const input = '{"a": [1, 2, {"b": true}], "c": null}'; + t.deepEqual(parse(jsonGrammar, input), { + a: [1, 2, {b: true}], + c: null, + }); +}); + +test('string escape sequences', t => { + t.is(parse(jsonGrammar, '"hello\\nworld"'), 'hello\nworld'); + t.is(parse(jsonGrammar, '"tab\\there"'), 'tab\there'); + t.is(parse(jsonGrammar, '"quote\\"end"'), 'quote"end'); + t.is(parse(jsonGrammar, '"slash\\\\end"'), 'slash\\end'); + t.is(parse(jsonGrammar, '"\\u0041"'), 'A'); +}); + +test('array with mixed types', t => { + t.deepEqual(parse(jsonGrammar, '[1, "two", true, null, [3]]'), [ + 1, + 'two', + true, + null, + [3], + ]); +}); + +test('deeply nested', t => { + const input = '{"a": {"b": {"c": [1, 2, 3]}}}'; + t.deepEqual(parse(jsonGrammar, input), { + a: {b: {c: [1, 2, 3]}}, + }); +}); diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index b53ca610..aac16415 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -156,6 +156,24 @@ export class CstView { this._iterChunks(handle, cb); } + /** + * Unpack a node's children into the callback arguments. + * + * For nonterminals: calls `cb` once with all children, returns the result. + * For optionals: calls `cb` once if present (returns `T`), or returns + * `undefined` if absent. + */ + unpack(handle: number, cb: (...children: number[]) => T): T | undefined { + const count = this.childCount(handle); + if (count === 0) return undefined; + const children = new Array(count); + let idx = 0; + this.forEachChild(handle, child => { + children[idx++] = child; + }); + return cb.apply(undefined, children); + } + /** * Like `forEachChunk`, but collects the return values into an array. */ From ad5251887a968f253f14247c94ae70a7bfbd9c4e Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 22:18:21 +0100 Subject: [PATCH 09/10] hax --- packages/compiler/test/test-cstView.js | 16 +- packages/runtime/src/cstView.ts | 217 +++++++++++++++++++------ 2 files changed, 172 insertions(+), 61 deletions(-) diff --git a/packages/compiler/test/test-cstView.js b/packages/compiler/test/test-cstView.js index 675ec5b1..fce0b23c 100644 --- a/packages/compiler/test/test-cstView.js +++ b/packages/compiler/test/test-cstView.js @@ -9,7 +9,7 @@ const jsonSource = readFileSync(scriptRel('../../lang-json/json.ohm'), 'utf-8'); /** * Walk the CST via CstView and reconstruct a JS value, exercising unpack, - * forEachChunk, mapChunks, and the basic accessors (kind, sourceString). + * forEachGroup, and the basic accessors (kind, sourceString). */ function toJS(cst, handle) { const k = cst.kind(handle); @@ -23,7 +23,7 @@ function toJS(cst, handle) { const obj = {}; const [key, val] = parsePair(cst, firstPair); obj[key] = val; - cst.forEachChunk(restPairs, (_comma, pair) => { + cst.forEachGroup(restPairs, (_comma, pair) => { const [k2, v2] = parsePair(cst, pair); obj[k2] = v2; }); @@ -37,7 +37,7 @@ function toJS(cst, handle) { // "[" Value ("," Value)* "]" return cst.unpack(handle, (_open, firstValue, restValues, _close) => { const arr = [toJS(cst, firstValue)]; - cst.forEachChunk(restValues, (_comma, value) => { + cst.forEachGroup(restValues, (_comma, value) => { arr.push(toJS(cst, value)); }); return arr; @@ -46,7 +46,10 @@ function toJS(cst, handle) { case 'stringLiteral': // "\"" doubleStringCharacter* "\"" return cst.unpack(handle, (_open, chars, _close) => { - const parts = cst.mapChunks(chars, char => toJS(cst, char)); + const parts = []; + cst.forEachGroup(chars, char => { + parts.push(toJS(cst, char)); + }); return parts.join(''); }); @@ -98,10 +101,7 @@ function toJS(cst, handle) { if (k === CstKind.Terminal) { return cst.sourceString(handle); } - return cst.unpack(handle, (...children) => { - if (children.length === 1) return toJS(cst, children[0]); - throw new Error(`Unhandled kind: ${k} with ${children.length} children`); - }); + return toJS(cst, cst.onlyChild(handle)); } } diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index aac16415..faced694 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -69,6 +69,8 @@ export class CstView { private _ctx: MatchContext; /** @internal — precomputed rule ctor names (without parameterization). */ private _ruleCtorNames: string[]; + /** @internal — mutable cursor used by _decodeChild to avoid allocation. */ + private _pos = 0; /** Handle for the root node (with startIdx packed in). */ readonly root: number; @@ -116,6 +118,7 @@ export class CstView { /** Extract the startIdx from a handle. */ startIdx(handle: number): number { + this._checkNull(handle); return unpackStartIdx(handle); } @@ -125,6 +128,7 @@ export class CstView { * '_opt', 'spaces') for other node types. */ kind(handle: number): CstKind { + this._checkNull(handle); const raw = handle & MASK; if (isSpacesHandle(raw)) return CstKind.Spaces; if (isTaggedTerminal(raw)) return CstKind.Terminal; @@ -145,81 +149,158 @@ export class CstView { } /** - * Iterate over logical chunks. Works for both list and optional nodes. + * Iterate over logical groups. Works for both list and optional nodes. * * - List (`_list`): groups children by the list's arity and calls `cb` - * per chunk. - * - Optional (`_opt`): all children form a single chunk. Calls `cb` + * per group. + * - Optional (`_opt`): all children form a single group. Calls `cb` * once if present, not at all if absent. */ - forEachChunk(handle: number, cb: (...children: number[]) => void): void { - this._iterChunks(handle, cb); + forEachGroup(handle: number, cb: (...children: number[]) => void): void { + this._checkNull(handle); + const k = this.kind(handle); + if (k !== CstKind.List && k !== CstKind.Optional) { + throw new Error( + `forEachGroup() is only valid for list and optional nodes, got ${k}` + ); + } + const raw = handle & MASK; + const count = this._ctx.view.getUint32(raw, true); + if (count === 0) return; + + const type = (this._ctx.view.getInt32(raw + 8, true) & + MATCH_RECORD_TYPE_MASK) as MatchRecordType; + // For lists, arity is in the details field; for optionals, all children are one group. + const arity = + type === MatchRecordType.ITER_FLAG + ? this._ctx.view.getInt32(raw + 8, true) >>> 2 + : count; + + const savedPos = this._pos; + this._pos = (handle - raw) / SHIFT; + try { + // Stream groups directly — fast paths for common arities. + // Decode all children in a group before calling cb, then + // save/restore _pos around the callback for reentrancy safety. + switch (arity) { + case 1: + for (let i = 0; i < count; i++) { + const c0 = this._decodeChild(raw, i); + const p = this._pos; cb(c0); this._pos = p; + } + break; + case 2: + for (let i = 0; i < count; i += 2) { + const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1); + const p = this._pos; cb(c0, c1); this._pos = p; + } + break; + case 3: + for (let i = 0; i < count; i += 3) { + const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1), + c2 = this._decodeChild(raw, i + 2); + const p = this._pos; cb(c0, c1, c2); this._pos = p; + } + break; + case 4: + for (let i = 0; i < count; i += 4) { + const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1), + c2 = this._decodeChild(raw, i + 2), c3 = this._decodeChild(raw, i + 3); + const p = this._pos; cb(c0, c1, c2, c3); this._pos = p; + } + break; + default: { + const args = new Array(arity); + for (let i = 0; i < count; i += arity) { + for (let j = 0; j < arity; j++) args[j] = this._decodeChild(raw, i + j); + const p = this._pos; cb.apply(undefined, args); this._pos = p; + } + } + } + } finally { + this._pos = savedPos; + } } /** * Unpack a node's children into the callback arguments. * - * For nonterminals: calls `cb` once with all children, returns the result. + * For nonterminals: always calls `cb` with all children (even if zero), + * returns the result. * For optionals: calls `cb` once if present (returns `T`), or returns * `undefined` if absent. + * + * Not valid for terminals or lists — use `sourceString` for terminals + * and `forEachGroup` for lists. + * + * Zero-allocation for nodes with up to 6 children. For zero-allocation + * traversal of any arity, use `forEachChild` directly. */ unpack(handle: number, cb: (...children: number[]) => T): T | undefined { - const count = this.childCount(handle); - if (count === 0) return undefined; - const children = new Array(count); - let idx = 0; - this.forEachChild(handle, child => { - children[idx++] = child; - }); - return cb.apply(undefined, children); + this._checkNull(handle); + const k = this.kind(handle); + if (k === CstKind.Terminal || k === CstKind.Spaces) { + throw new Error(`unpack() is not valid for ${k} nodes; use sourceString() instead`); + } + if (k === CstKind.List) { + throw new Error(`unpack() is not valid for list nodes; use forEachGroup() instead`); + } + const raw = handle & MASK; + const count = this._ctx.view.getUint32(raw, true); + // For optionals, absent means no callback. + if (k === CstKind.Optional && count === 0) return undefined; + if (cb.length > count) { + throw new Error( + `unpack(): callback expects ${cb.length} args but ${k} has ${count} children` + ); + } + this._pos = (handle - raw) / SHIFT; + // Fast paths for common arities — no array allocation. + switch (count) { + case 0: return (cb as () => T)(); + case 1: return (cb as any)(this._decodeChild(raw, 0)); + case 2: return (cb as any)(this._decodeChild(raw, 0), this._decodeChild(raw, 1)); + case 3: return (cb as any)( + this._decodeChild(raw, 0), this._decodeChild(raw, 1), + this._decodeChild(raw, 2)); + case 4: return (cb as any)( + this._decodeChild(raw, 0), this._decodeChild(raw, 1), + this._decodeChild(raw, 2), this._decodeChild(raw, 3)); + case 5: return (cb as any)( + this._decodeChild(raw, 0), this._decodeChild(raw, 1), + this._decodeChild(raw, 2), this._decodeChild(raw, 3), + this._decodeChild(raw, 4)); + case 6: return (cb as any)( + this._decodeChild(raw, 0), this._decodeChild(raw, 1), + this._decodeChild(raw, 2), this._decodeChild(raw, 3), + this._decodeChild(raw, 4), this._decodeChild(raw, 5)); + default: { + const children = new Array(count); + for (let i = 0; i < count; i++) children[i] = this._decodeChild(raw, i); + return cb.apply(undefined, children); + } + } } /** - * Like `forEachChunk`, but collects the return values into an array. + * Return the only child of a wrapper nonterminal. Throws if the node + * doesn't have exactly one child. */ - mapChunks(handle: number, cb: (...children: number[]) => T): T[] { - const results: T[] = []; - this._iterChunks(handle, (...args) => { - results.push(cb(...args)); - }); - return results; - } - - /** @internal — shared implementation for forEachChunk / mapChunks. */ - private _iterChunks(handle: number, cb: (...children: number[]) => void): void { - const count = this.childCount(handle); - if (count === 0) return; - + onlyChild(handle: number): number { const raw = handle & MASK; - const type = (this._ctx.view.getInt32(raw + 8, true) & - MATCH_RECORD_TYPE_MASK) as MatchRecordType; - // For lists, arity is in the details field; for optionals, all children are one group. - const arity = - type === MatchRecordType.ITER_FLAG - ? this._ctx.view.getInt32(raw + 8, true) >>> 2 - : count; - - if (arity <= 1) { - this.forEachChild(handle, child => { - cb(child); - }); - } else { - // Collect all packed child handles, then invoke cb in groups. - const children = new Array(count); - let idx = 0; - this.forEachChild(handle, child => { - children[idx++] = child; - }); - const args = new Array(arity); - for (let i = 0; i < count; i += arity) { - for (let j = 0; j < arity; j++) args[j] = children[i + j]; - cb.apply(undefined, args); - } + const count = this._ctx.view.getUint32(raw, true); + if (count !== 1) { + throw new Error( + `onlyChild(): expected 1 child, got ${count} (kind: ${this.kind(handle)})` + ); } + this._pos = (handle - raw) / SHIFT; + return this._decodeChild(raw, 0); } /** Number of raw children stored in this match record. */ childCount(handle: number): number { + this._checkNull(handle); const raw = handle & MASK; if (isSpacesHandle(raw) || isTaggedTerminal(raw)) return 0; return this._ctx.view.getUint32(raw, true); @@ -227,6 +308,7 @@ export class CstView { /** Length of matched input (in UTF-16 code units). */ matchLength(handle: number): number { + this._checkNull(handle); const raw = handle & MASK; if (isSpacesHandle(raw)) return raw >>> 2; if (isTaggedTerminal(raw)) return raw >>> 1; @@ -235,6 +317,7 @@ export class CstView { /** Source string for a node. startIdx is extracted from the handle. */ sourceString(handle: number): string { + this._checkNull(handle); const raw = handle & MASK; const si = (handle - raw) / SHIFT; return this._ctx.input.slice(si, si + this.matchLength(raw)); @@ -255,8 +338,9 @@ export class CstView { handle: number, fn: (child: number, leadingSpaces: number, pos: number, index: number) => void ): void { + this._checkNull(handle); const raw = handle & MASK; - if (isTaggedTerminal(raw)) return; + if (isSpacesHandle(raw) || isTaggedTerminal(raw)) return; const count = this._ctx.view.getUint32(raw, true); let childStart = (handle - raw) / SHIFT; const {getSpacesLenAt} = this._ctx; @@ -280,6 +364,33 @@ export class CstView { } } + /** + * @internal — decode child at slot `i` of raw pointer `raw`. + * Reads and advances `this._pos`. Must be called sequentially for i = 0, 1, 2, ... + */ + private _decodeChild(raw: number, i: number): number { + const rawChild = this._ctx.view.getUint32(raw + 16 + i * 4, true); + const {getSpacesLenAt} = this._ctx; + const rawSpacesLen = + getSpacesLenAt && this._hasParentSpaces(rawChild) + ? Math.max(0, getSpacesLenAt(this._pos)) + : 0; + this._pos += rawSpacesLen; + const childHandle = this._pos * SHIFT + rawChild; + const len = isTaggedTerminal(rawChild) + ? rawChild >>> 1 + : this._ctx.view.getUint32(rawChild + 4, true); + this._pos += len; + return childHandle; + } + + /** @internal — throw on NULL_HANDLE to catch bugs early. */ + private _checkNull(handle: number): void { + if (handle === NULL_HANDLE) { + throw new Error('NULL_HANDLE passed to CstView method'); + } + } + /** Check whether a raw child handle has parent-level space skipping. */ private _hasParentSpaces(rawChild: number): boolean { if (isTaggedTerminal(rawChild)) return true; From 7e0356a127bd09419e555e96c832bb82f3a19cf2 Mon Sep 17 00:00:00 2001 From: Patrick Dubroy Date: Tue, 10 Mar 2026 22:38:29 +0100 Subject: [PATCH 10/10] simplify --- packages/compiler/test/test-cstView.js | 8 +-- packages/runtime/src/cstView.ts | 78 ++++---------------------- 2 files changed, 11 insertions(+), 75 deletions(-) diff --git a/packages/compiler/test/test-cstView.js b/packages/compiler/test/test-cstView.js index fce0b23c..58016810 100644 --- a/packages/compiler/test/test-cstView.js +++ b/packages/compiler/test/test-cstView.js @@ -177,13 +177,7 @@ test('string escape sequences', t => { }); test('array with mixed types', t => { - t.deepEqual(parse(jsonGrammar, '[1, "two", true, null, [3]]'), [ - 1, - 'two', - true, - null, - [3], - ]); + t.deepEqual(parse(jsonGrammar, '[1, "two", true, null, [3]]'), [1, 'two', true, null, [3]]); }); test('deeply nested', t => { diff --git a/packages/runtime/src/cstView.ts b/packages/runtime/src/cstView.ts index faced694..de3d0d60 100644 --- a/packages/runtime/src/cstView.ts +++ b/packages/runtime/src/cstView.ts @@ -160,9 +160,7 @@ export class CstView { this._checkNull(handle); const k = this.kind(handle); if (k !== CstKind.List && k !== CstKind.Optional) { - throw new Error( - `forEachGroup() is only valid for list and optional nodes, got ${k}` - ); + throw new Error(`forEachGroup() is only valid for list and optional nodes, got ${k}`); } const raw = handle & MASK; const count = this._ctx.view.getUint32(raw, true); @@ -179,43 +177,12 @@ export class CstView { const savedPos = this._pos; this._pos = (handle - raw) / SHIFT; try { - // Stream groups directly — fast paths for common arities. - // Decode all children in a group before calling cb, then - // save/restore _pos around the callback for reentrancy safety. - switch (arity) { - case 1: - for (let i = 0; i < count; i++) { - const c0 = this._decodeChild(raw, i); - const p = this._pos; cb(c0); this._pos = p; - } - break; - case 2: - for (let i = 0; i < count; i += 2) { - const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1); - const p = this._pos; cb(c0, c1); this._pos = p; - } - break; - case 3: - for (let i = 0; i < count; i += 3) { - const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1), - c2 = this._decodeChild(raw, i + 2); - const p = this._pos; cb(c0, c1, c2); this._pos = p; - } - break; - case 4: - for (let i = 0; i < count; i += 4) { - const c0 = this._decodeChild(raw, i), c1 = this._decodeChild(raw, i + 1), - c2 = this._decodeChild(raw, i + 2), c3 = this._decodeChild(raw, i + 3); - const p = this._pos; cb(c0, c1, c2, c3); this._pos = p; - } - break; - default: { - const args = new Array(arity); - for (let i = 0; i < count; i += arity) { - for (let j = 0; j < arity; j++) args[j] = this._decodeChild(raw, i + j); - const p = this._pos; cb.apply(undefined, args); this._pos = p; - } - } + const args = new Array(arity); + for (let i = 0; i < count; i += arity) { + for (let j = 0; j < arity; j++) args[j] = this._decodeChild(raw, i + j); + const p = this._pos; + cb.apply(undefined, args); + this._pos = p; } } finally { this._pos = savedPos; @@ -232,9 +199,6 @@ export class CstView { * * Not valid for terminals or lists — use `sourceString` for terminals * and `forEachGroup` for lists. - * - * Zero-allocation for nodes with up to 6 children. For zero-allocation - * traversal of any arity, use `forEachChild` directly. */ unpack(handle: number, cb: (...children: number[]) => T): T | undefined { this._checkNull(handle); @@ -255,31 +219,9 @@ export class CstView { ); } this._pos = (handle - raw) / SHIFT; - // Fast paths for common arities — no array allocation. - switch (count) { - case 0: return (cb as () => T)(); - case 1: return (cb as any)(this._decodeChild(raw, 0)); - case 2: return (cb as any)(this._decodeChild(raw, 0), this._decodeChild(raw, 1)); - case 3: return (cb as any)( - this._decodeChild(raw, 0), this._decodeChild(raw, 1), - this._decodeChild(raw, 2)); - case 4: return (cb as any)( - this._decodeChild(raw, 0), this._decodeChild(raw, 1), - this._decodeChild(raw, 2), this._decodeChild(raw, 3)); - case 5: return (cb as any)( - this._decodeChild(raw, 0), this._decodeChild(raw, 1), - this._decodeChild(raw, 2), this._decodeChild(raw, 3), - this._decodeChild(raw, 4)); - case 6: return (cb as any)( - this._decodeChild(raw, 0), this._decodeChild(raw, 1), - this._decodeChild(raw, 2), this._decodeChild(raw, 3), - this._decodeChild(raw, 4), this._decodeChild(raw, 5)); - default: { - const children = new Array(count); - for (let i = 0; i < count; i++) children[i] = this._decodeChild(raw, i); - return cb.apply(undefined, children); - } - } + const children = new Array(count); + for (let i = 0; i < count; i++) children[i] = this._decodeChild(raw, i); + return cb.apply(undefined, children); } /**