Skip to content

Commit b25e8bc

Browse files
committed
wasm: Add skeleton of toAST, with test, and test from Shopify repo
1 parent b616254 commit b25e8bc

File tree

7 files changed

+2244
-10
lines changed

7 files changed

+2244
-10
lines changed

packages/miniohm-js/index.js

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class CstNode {
193193
}
194194

195195
isNonterminal() {
196-
return this.type >= 0;
196+
return this._type >= 0;
197197
}
198198

199199
isTerminal() {
@@ -204,6 +204,10 @@ class CstNode {
204204
return this._type === -2;
205205
}
206206

207+
isOptional() {
208+
return false; // TODO
209+
}
210+
207211
get ruleName() {
208212
const id = this._view.getInt32(this._base + 8, true);
209213
return this._ruleNames[id];
@@ -222,7 +226,23 @@ class CstNode {
222226
return t < 0 ? t : 0;
223227
}
224228

229+
get childrenNoSpaces() {
230+
// TODO: Lazily compute this once only.
231+
const children = [];
232+
for (let i = 0; i < this.count; i++) {
233+
const slotOffset = this._base + 16 + i * 4;
234+
const child = new CstNode(
235+
this._ruleNames,
236+
this._view,
237+
this._view.getUint32(slotOffset, true),
238+
);
239+
if (child.ruleName !== '$spaces') children.push(child);
240+
}
241+
return children;
242+
}
243+
225244
get children() {
245+
// TODO: Lazily compute this once only.
226246
const children = [];
227247
for (let i = 0; i < this.count; i++) {
228248
const slotOffset = this._base + 16 + i * 4;
@@ -232,4 +252,23 @@ class CstNode {
232252
}
233253
return children;
234254
}
255+
256+
sourceString(offset) {
257+
const bytes = new Uint8Array(
258+
this._view.buffer,
259+
INPUT_BUFFER_OFFSET + offset,
260+
this.matchLength,
261+
);
262+
const ans = utf8.decode(bytes);
263+
return ans;
264+
}
265+
266+
isSyntactic(ruleName) {
267+
const firstChar = this.ruleName[0];
268+
return firstChar === firstChar.toUpperCase();
269+
}
270+
271+
isLexical(ruleName) {
272+
return !this.isSyntactic(ruleName);
273+
}
235274
}

packages/miniohm-js/toAST.js

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
const assert = (cond, msg = 'Assertion failed') => {
2+
throw new Error(msg);
3+
};
4+
5+
function handleListOf(child) {
6+
return child.toAST(this.args.mapping);
7+
}
8+
9+
function handleEmptyListOf() {
10+
return [];
11+
}
12+
13+
function handleNonemptyListOf(first, sep, rest) {
14+
return [first.toAST(this.args.mapping)].concat(rest.toAST(this.args.mapping));
15+
}
16+
17+
const defaultMapping = {
18+
listOf: handleListOf,
19+
ListOf: handleListOf,
20+
21+
emptyListOf: handleEmptyListOf,
22+
EmptyListOf: handleEmptyListOf,
23+
24+
nonemptyListOf: handleNonemptyListOf,
25+
NonemptyListOf: handleNonemptyListOf,
26+
};
27+
28+
class Visitor {
29+
constructor(mapping) {
30+
this.mapping = mapping;
31+
}
32+
33+
visit(node, offset) {
34+
if (node.isTerminal()) {
35+
return this.visitTerminal(node, offset);
36+
} else if (node.isNonterminal()) {
37+
return this.visitNonterminal(node, offset);
38+
} else if (node.isIter()) {
39+
return this.visitIter(node, offset);
40+
} else {
41+
throw new Error(`Unknown node type: ${node._type}`);
42+
}
43+
}
44+
45+
visitTerminal(node, offset) {
46+
return node.sourceString(offset);
47+
}
48+
49+
visitNonterminal(node, offset) {
50+
const {ruleName} = node;
51+
const children = node.childrenNoSpaces;
52+
const {mapping} = this;
53+
54+
let currOffset = offset;
55+
const childOffsets = node.children.flatMap((c, i) => {
56+
const origOffset = currOffset;
57+
currOffset += c.matchLength;
58+
return c.isNonterminal() && c.ruleName === '$spaces' ? [] : origOffset;
59+
});
60+
61+
// without customization
62+
if (!Object.hasOwn(mapping, ruleName)) {
63+
// lexical rule
64+
if (node.isLexical()) {
65+
return node.sourceString(offset);
66+
}
67+
68+
// singular node (e.g. only surrounded by literals or lookaheads)
69+
const realChildren = children.filter(c => !c.isTerminal());
70+
if (realChildren.length === 1) {
71+
const idx = children.indexOf(realChildren[0]);
72+
return this.visit(realChildren[0], childOffsets[idx]);
73+
}
74+
75+
// rest: terms with multiple children
76+
}
77+
// direct forward
78+
if (typeof mapping[ruleName] === 'number') {
79+
assert(false, 'not handled: direct forward');
80+
return this.visit(children[mapping[ruleName]]);
81+
}
82+
83+
// named/mapped children or unnamed children ('0', '1', '2', ...)
84+
const propMap = mapping[ruleName] || children;
85+
const ans = {
86+
type: ruleName,
87+
};
88+
// eslint-disable-next-line guard-for-in
89+
for (const prop in propMap) {
90+
const mappedProp = mapping[ruleName] && mapping[ruleName][prop];
91+
if (typeof mappedProp === 'number') {
92+
// direct forward
93+
ans[prop] = this.visit(children[mappedProp], childOffsets[mappedProp]);
94+
} else if (
95+
typeof mappedProp === 'string' ||
96+
typeof mappedProp === 'boolean' ||
97+
mappedProp === null
98+
) {
99+
// primitive value
100+
ans[prop] = mappedProp;
101+
} else if (typeof mappedProp === 'object' && mappedProp instanceof Number) {
102+
// primitive number (must be unboxed)
103+
ans[prop] = Number(mappedProp);
104+
} else if (typeof mappedProp === 'function') {
105+
// computed value
106+
ans[prop] = mappedProp.call(node, children);
107+
} else if (mappedProp === undefined) {
108+
if (children[prop] && !children[prop].isTerminal()) {
109+
ans[prop] = this.visit(children[prop]);
110+
} else {
111+
// delete predefined 'type' properties, like 'type', if explicitely removed
112+
delete ans[prop];
113+
}
114+
}
115+
}
116+
return ans;
117+
}
118+
119+
visitIter(node) {
120+
const children = node.childrenNoSpaces;
121+
if (node.isOptional()) {
122+
if (children.length === 0) {
123+
return null;
124+
} else {
125+
return this.visit(children[0]);
126+
}
127+
}
128+
129+
return children.map(c => this.visit(c));
130+
}
131+
}
132+
133+
// Returns a plain JavaScript object that includes an abstract syntax tree (AST)
134+
// for the given match result `res` containg a concrete syntax tree (CST) and grammar.
135+
// The optional `mapping` parameter can be used to customize how the nodes of the CST
136+
// are mapped to the AST (see /doc/extras.md#toastmatchresult-mapping).
137+
export function toAST(matcher, mapping) {
138+
mapping = Object.assign({}, defaultMapping, mapping);
139+
// pd: Unclear if/how this is actually being used?
140+
// const operation = Object.assign({}, defaultOperation);
141+
// for (const termName in mapping) {
142+
// if (typeof mapping[termName] === 'function') {
143+
// operation[termName] = mapping[termName];
144+
// delete mapping[termName];
145+
// }
146+
// }
147+
return new Visitor(mapping).visit(matcher.getCstRoot(), 0);
148+
}

packages/wasm/package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,20 @@
2222
"packageManager": "[email protected]",
2323
"devDependencies": {
2424
"@ohm-js/miniohm-js": "workspace:^",
25+
"@shopify/liquid-html-parser": "^2.8.2",
2526
"@thi.ng/leb128": "^3.1.48",
2627
"assemblyscript": "^0.27.36",
2728
"ava": "^6.2.0",
2829
"esbuild": "^0.25.5",
2930
"fast-check": "^4.2.0",
3031
"fast-glob": "^3.3.3",
31-
"liquid-html-parser": "link:@shopify/liquid-html-parser",
3232
"mitata": "^1.0.34",
3333
"wabt": "1.0.37-nightly.20250428"
3434
},
3535
"peerDependencies": {
3636
"ohm-js": "^17.1.0"
3737
},
3838
"dependencies": {
39-
"@shopify/liquid-html-parser": "^2.8.2",
4039
"@wasmgroundup/emit": "^1.0.2"
4140
}
4241
}

0 commit comments

Comments
 (0)