Skip to content

Commit aabaf90

Browse files
committed
wasm: remove "special" rules, use orig version of Liquid grammar
1 parent 8ef17b0 commit aabaf90

File tree

5 files changed

+51
-45
lines changed

5 files changed

+51
-45
lines changed

packages/ohm-js/src/InputStream.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ export class InputStream {
4646
4747
This is intended to be a locale-invariant comparison, which means it may not obey
4848
locale-specific expectations (e.g. "i" => "İ").
49+
50+
See also https://unicode.org/faq/casemap_charprop.html#casemap
4951
*/
5052
for (idx = 0; idx < s.length; idx++) {
5153
const actual = this.next();

packages/wasm/src/index.js

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -683,14 +683,10 @@ export class Compiler {
683683
// It is *not* the same as the function index the rule's eval function.
684684
this.ruleIdByName = new IndexedSet();
685685

686-
this._specialRules = ['spaces', 'alnum', 'any'];
687-
688686
// Ensure default start rule has id 0; $term, 1; and spaces, 2.
689687
this._ensureRuleId(grammar.defaultStartRule);
690688
this._ensureRuleId('$term');
691-
this._specialRules.forEach(name => {
692-
this._ensureRuleId(name);
693-
});
689+
this._ensureRuleId('$spaces');
694690

695691
this.rules = undefined;
696692
this._nextLiftedId = 0;
@@ -833,20 +829,18 @@ export class Compiler {
833829
simplifyApplications() {
834830
const {grammar} = this;
835831

836-
const lookUpRule = name => {
837-
assert(name in grammar.rules);
838-
return {...grammar.rules[name], isSyntactic: isSyntacticRule(name)};
839-
};
840-
841-
// Begin with all the rules in the grammar + all "special" rules.
842-
const rules = Object.entries(this.grammar.rules).map(([name, info]) => {
843-
const isSyntactic = isSyntacticRule(name);
844-
return [name, {...info, isSyntactic}];
845-
});
846-
this._specialRules.forEach(name => {
847-
rules.push([name, {...lookUpRule(name)}]);
832+
const lookUpRule = name => ({
833+
...checkNotNull(grammar.rules[name]),
834+
isSyntactic: isSyntacticRule(name),
848835
});
849836

837+
// Begin with all the rules directly defined in the grammar.
838+
const ownRuleNames = Object.keys(grammar.rules).filter(name =>
839+
Object.hasOwn(grammar.rules, name),
840+
);
841+
const rules = ownRuleNames.map(name => [name, lookUpRule(name)]);
842+
rules.push(['spaces', lookUpRule('spaces')]); // Ensure 'spaces' is always present.
843+
850844
const liftedTerminals = new IndexedSet();
851845

852846
const liftTerminal = ({obj}) => {
@@ -880,14 +874,21 @@ export class Compiler {
880874
if (exp === pexprs.any) return ir.any();
881875
if (exp === pexprs.end) return ir.end();
882876
switch (exp.constructor) {
883-
case pexprs.Apply:
884-
rules.push([exp.ruleName, checkNotNull(lookUpRule(exp.ruleName))]);
877+
case pexprs.Apply: {
878+
const ruleInfo = lookUpRule(exp.ruleName);
879+
880+
// Replace an application of the built-in caseInsensitive rule with
881+
// an inlined case-insensitive terminal.
882+
if (ruleInfo.body instanceof pexprs.CaseInsensitiveTerminal) {
883+
assert(exp.args.length === 1 && exp.args[0] instanceof pexprs.Terminal);
884+
return ir.terminal(exp.args[0].obj, true);
885+
}
886+
rules.push([exp.ruleName, ruleInfo]);
885887
return ir.apply(
886888
exp.ruleName,
887889
exp.args.map(arg => simplifyArg(arg, isSyntactic)),
888890
);
889-
case pexprs.CaseInsensitive:
890-
return ir.caseInsensitive(exp.obj);
891+
}
891892
case pexprs.Lex:
892893
return ir.lex(simplify(exp.expr, true));
893894
case pexprs.Lookahead:
@@ -1026,9 +1027,17 @@ export class Compiler {
10261027
ir.rewrite(exp, {
10271028
Apply: app => {
10281029
const {ruleName, children} = app;
1030+
const ruleInfo = getNotNull(rules, ruleName);
1031+
1032+
// Inline any applications of the built-in caseInsensitive rule.
1033+
if (ruleInfo.body instanceof pexprs.CaseInsensitiveTerminal) {
1034+
assert(children.length === 1 && children[0] instanceof pexprs.Terminal);
1035+
return ir.terminal(children[0], true);
1036+
}
1037+
10291038
// Inline these. TODO: Handle this elsewhere.
1030-
if (['caseInsensitive', 'liquidRawTagImpl', 'liquidTagRule'].includes(ruleName)) {
1031-
const ruleInfo = getNotNull(rules, ruleName);
1039+
// We need this to avoid having >256 rules in the Liquid grammar.
1040+
if (['liquidRawTagImpl', 'liquidTagRule'].includes(ruleName)) {
10321041
return specialize(ir.substituteParams(ruleInfo.body, children));
10331042
}
10341043

@@ -1038,7 +1047,6 @@ export class Compiler {
10381047
// If not yet seen, recursively visit the body of the specialized
10391048
// rule. Note that this also applies to non-parameterized rules!
10401049
if (!newRules.has(specializedName)) {
1041-
const ruleInfo = getNotNull(rules, ruleName);
10421050
newRules.set(specializedName, {}); // Prevent infinite recursion.
10431051

10441052
// Visit the body with the parameter substituted, to ensure we
@@ -1066,15 +1074,16 @@ export class Compiler {
10661074
},
10671075
});
10681076
specialize(ir.apply(this.grammar.defaultStartRule));
1069-
this._specialRules.forEach(name => {
1070-
specialize(ir.apply(name));
1071-
});
1072-
this.rules = newRules;
10731077

10741078
// Make a special rule for implicit space skipping, with the same body
10751079
// as the real `spaces` rule.
1076-
this._ensureRuleId('$spaces', {notMemoized: true});
1077-
newRules.set('$spaces', getNotNull(newRules, 'spaces'));
1080+
const spacesInfo = getNotNull(rules, 'spaces');
1081+
newRules.set('$spaces', {
1082+
...spacesInfo,
1083+
body: specialize(spacesInfo.body),
1084+
});
1085+
1086+
this.rules = newRules;
10781087

10791088
if (EMIT_GENERALIZED_RULES) {
10801089
const insertDispatches = (exp, patterns) =>
@@ -1602,9 +1611,10 @@ export class Compiler {
16021611
);
16031612
}
16041613

1605-
emitTerminal({value}) {
1614+
emitTerminal({value, caseInsensitive}) {
16061615
const {asm} = this;
16071616
asm.emit(JSON.stringify(value));
1617+
assert(!caseInsensitive || [...value].every(c => c <= '\x7f'), 'no unicode');
16081618
this.wrapTerminalLike(() => {
16091619
// TODO:
16101620
// - proper UTF-8!

packages/wasm/src/ir.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,14 @@ export const star = (child: Expr): Star => ({type: 'Star', child});
156156
export interface Terminal {
157157
type: 'Terminal';
158158
value: string;
159+
caseInsensitive: boolean;
159160
}
160161

161-
export const terminal = (value: string): Terminal => ({type: 'Terminal', value});
162+
export const terminal = (value: string, caseInsensitive = false): Terminal => ({
163+
type: 'Terminal',
164+
value,
165+
caseInsensitive
166+
});
162167

163168
export interface UnicodeChar {
164169
type: 'UnicodeChar';

packages/wasm/test/data/liquid-html.ohm

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@ Helpers {
33
TextNode = AnyExceptPlus<openControl>
44
openControl = end
55

6-
// Temp overrides for things not yet support in Wasm grammars
7-
caseInsensitive<t> := t
8-
96
empty = /* nothing */
107
anyExcept<lit> = (~ lit any)
118
anyExceptStar<lit> = (~ lit any)*

packages/wasm/test/test-wasm.js

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -843,11 +843,8 @@ test('specialized rule names', t => {
843843

844844
t.deepEqual([...compiler.rules.keys()].sort(), [
845845
'$spaces',
846-
'alnum',
847-
'any',
848846
'commaSep',
849847
'commaSep<exclaimed<$term$0>>',
850-
'digit',
851848
'emptyListOf',
852849
'emptyListOf<exclaimed<$term$0>,$term$1>',
853850
'exclaimed',
@@ -858,20 +855,15 @@ test('specialized rule names', t => {
858855
'flip<exclaimed<hello2>,hello>',
859856
'hello',
860857
'hello2',
861-
'letter',
862858
'listOf',
863859
'listOf<exclaimed<$term$0>,$term$1>',
864-
'lower',
865860
'nonemptyListOf',
866861
'nonemptyListOf<exclaimed<$term$0>,$term$1>',
867862
'one',
868863
'space',
869-
'spaces',
870864
'start',
871865
'three',
872-
'two',
873-
'unicodeLtmo',
874-
'upper',
866+
'two'
875867
]);
876868
});
877869

@@ -969,7 +961,7 @@ test('unicode built-ins: non-ASII (fast-check)', async t => {
969961
};
970962
const details = fc.check(hasExpectedResult(m), {
971963
includeErrorInReport: true,
972-
interruptAfterTimeLimit: 200,
964+
interruptAfterTimeLimit: 200
973965
});
974966
t.log(`numRuns: ${details.numRuns}`);
975967
t.is(details.failed, false, `${fc.defaultReportMessage(details)}`);

0 commit comments

Comments
 (0)