@@ -683,14 +683,10 @@ export class Compiler {
683683 // It is *not* the same as the function index the rule's eval function.
684684 this . ruleIdByName = new IndexedSet ( ) ;
685685
686- this . _specialRules = [ 'spaces' , 'alnum' , 'any' ] ;
687-
688686 // Ensure default start rule has id 0; $term, 1; and spaces, 2.
689687 this . _ensureRuleId ( grammar . defaultStartRule ) ;
690688 this . _ensureRuleId ( '$term' ) ;
691- this . _specialRules . forEach ( name => {
692- this . _ensureRuleId ( name ) ;
693- } ) ;
689+ this . _ensureRuleId ( '$spaces' ) ;
694690
695691 this . rules = undefined ;
696692 this . _nextLiftedId = 0 ;
@@ -833,20 +829,18 @@ export class Compiler {
833829 simplifyApplications ( ) {
834830 const { grammar} = this ;
835831
836- const lookUpRule = name => {
837- assert ( name in grammar . rules ) ;
838- return { ...grammar . rules [ name ] , isSyntactic : isSyntacticRule ( name ) } ;
839- } ;
840-
841- // Begin with all the rules in the grammar + all "special" rules.
842- const rules = Object . entries ( this . grammar . rules ) . map ( ( [ name , info ] ) => {
843- const isSyntactic = isSyntacticRule ( name ) ;
844- return [ name , { ...info , isSyntactic} ] ;
845- } ) ;
846- this . _specialRules . forEach ( name => {
847- rules . push ( [ name , { ...lookUpRule ( name ) } ] ) ;
832+ const lookUpRule = name => ( {
833+ ...checkNotNull ( grammar . rules [ name ] ) ,
834+ isSyntactic : isSyntacticRule ( name ) ,
848835 } ) ;
849836
837+ // Begin with all the rules directly defined in the grammar.
838+ const ownRuleNames = Object . keys ( grammar . rules ) . filter ( name =>
839+ Object . hasOwn ( grammar . rules , name ) ,
840+ ) ;
841+ const rules = ownRuleNames . map ( name => [ name , lookUpRule ( name ) ] ) ;
842+ rules . push ( [ 'spaces' , lookUpRule ( 'spaces' ) ] ) ; // Ensure 'spaces' is always present.
843+
850844 const liftedTerminals = new IndexedSet ( ) ;
851845
852846 const liftTerminal = ( { obj} ) => {
@@ -880,14 +874,21 @@ export class Compiler {
880874 if ( exp === pexprs . any ) return ir . any ( ) ;
881875 if ( exp === pexprs . end ) return ir . end ( ) ;
882876 switch ( exp . constructor ) {
883- case pexprs . Apply :
884- rules . push ( [ exp . ruleName , checkNotNull ( lookUpRule ( exp . ruleName ) ) ] ) ;
877+ case pexprs . Apply : {
878+ const ruleInfo = lookUpRule ( exp . ruleName ) ;
879+
880+ // Replace an application of the built-in caseInsensitive rule with
881+ // an inlined case-insensitive terminal.
882+ if ( ruleInfo . body instanceof pexprs . CaseInsensitiveTerminal ) {
883+ assert ( exp . args . length === 1 && exp . args [ 0 ] instanceof pexprs . Terminal ) ;
884+ return ir . terminal ( exp . args [ 0 ] . obj , true ) ;
885+ }
886+ rules . push ( [ exp . ruleName , ruleInfo ] ) ;
885887 return ir . apply (
886888 exp . ruleName ,
887889 exp . args . map ( arg => simplifyArg ( arg , isSyntactic ) ) ,
888890 ) ;
889- case pexprs . CaseInsensitive :
890- return ir . caseInsensitive ( exp . obj ) ;
891+ }
891892 case pexprs . Lex :
892893 return ir . lex ( simplify ( exp . expr , true ) ) ;
893894 case pexprs . Lookahead :
@@ -1026,9 +1027,17 @@ export class Compiler {
10261027 ir . rewrite ( exp , {
10271028 Apply : app => {
10281029 const { ruleName, children} = app ;
1030+ const ruleInfo = getNotNull ( rules , ruleName ) ;
1031+
1032+ // Inline any applications of the built-in caseInsensitive rule.
1033+ if ( ruleInfo . body instanceof pexprs . CaseInsensitiveTerminal ) {
1034+ assert ( children . length === 1 && children [ 0 ] instanceof pexprs . Terminal ) ;
1035+ return ir . terminal ( children [ 0 ] , true ) ;
1036+ }
1037+
10291038 // Inline these. TODO: Handle this elsewhere.
1030- if ( [ 'caseInsensitive' , 'liquidRawTagImpl' , 'liquidTagRule' ] . includes ( ruleName ) ) {
1031- const ruleInfo = getNotNull ( rules , ruleName ) ;
1039+ // We need this to avoid having >256 rules in the Liquid grammar.
1040+ if ( [ 'liquidRawTagImpl' , 'liquidTagRule' ] . includes ( ruleName ) ) {
10321041 return specialize ( ir . substituteParams ( ruleInfo . body , children ) ) ;
10331042 }
10341043
@@ -1038,7 +1047,6 @@ export class Compiler {
10381047 // If not yet seen, recursively visit the body of the specialized
10391048 // rule. Note that this also applies to non-parameterized rules!
10401049 if ( ! newRules . has ( specializedName ) ) {
1041- const ruleInfo = getNotNull ( rules , ruleName ) ;
10421050 newRules . set ( specializedName , { } ) ; // Prevent infinite recursion.
10431051
10441052 // Visit the body with the parameter substituted, to ensure we
@@ -1066,15 +1074,16 @@ export class Compiler {
10661074 } ,
10671075 } ) ;
10681076 specialize ( ir . apply ( this . grammar . defaultStartRule ) ) ;
1069- this . _specialRules . forEach ( name => {
1070- specialize ( ir . apply ( name ) ) ;
1071- } ) ;
1072- this . rules = newRules ;
10731077
10741078 // Make a special rule for implicit space skipping, with the same body
10751079 // as the real `spaces` rule.
1076- this . _ensureRuleId ( '$spaces' , { notMemoized : true } ) ;
1077- newRules . set ( '$spaces' , getNotNull ( newRules , 'spaces' ) ) ;
1080+ const spacesInfo = getNotNull ( rules , 'spaces' ) ;
1081+ newRules . set ( '$spaces' , {
1082+ ...spacesInfo ,
1083+ body : specialize ( spacesInfo . body ) ,
1084+ } ) ;
1085+
1086+ this . rules = newRules ;
10781087
10791088 if ( EMIT_GENERALIZED_RULES ) {
10801089 const insertDispatches = ( exp , patterns ) =>
@@ -1602,9 +1611,10 @@ export class Compiler {
16021611 ) ;
16031612 }
16041613
1605- emitTerminal ( { value} ) {
1614+ emitTerminal ( { value, caseInsensitive } ) {
16061615 const { asm} = this ;
16071616 asm . emit ( JSON . stringify ( value ) ) ;
1617+ assert ( ! caseInsensitive || [ ...value ] . every ( c => c <= '\x7f' ) , 'no unicode' ) ;
16081618 this . wrapTerminalLike ( ( ) => {
16091619 // TODO:
16101620 // - proper UTF-8!
0 commit comments