Reduce reliance on 'sweet.js' tokenizer algorithm

It'll now only run in plain-tokenizer or loose mode, so that the parser can use its actual knowledge about the syntax to drive disambiguation of / and regexps. Issue #589 Closes #552
acornjs · Sep 10, 2018 · effe659 · effe659
1 parent a17399c
commit effe659
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 9 deletions.
diff --git a/src/expression.js b/src/expression.js
@@ -297,7 +297,9 @@ pp.parseSubscripts = function(base, startPos, startLoc, noCalls) {
 // or `{}`.
 
 pp.parseExprAtom = function(refDestructuringErrors) {
+ this.turnSlashIntoRegexp()
  let node, canBeArrow = this.potentialArrowAt === this.start
+
  switch (this.type) {
  case tt._super:
  if (!this.inFunction)
@@ -527,17 +529,21 @@ pp.parseTemplateElement = function({isTagged}) {
 
 pp.parseTemplate = function({isTagged = false} = {}) {
  let node = this.startNode()
+ this.inTemplate = true
  this.next()
  node.expressions = []
  let curElt = this.parseTemplateElement({isTagged})
  node.quasis = [curElt]
  while (!curElt.tail) {
  if (this.type === tt.eof) this.raise(this.pos, "Unterminated template literal")
+ this.inTemplate = false
  this.expect(tt.dollarBraceL)
  node.expressions.push(this.parseExpression())
+ this.inTemplate = true
  this.expect(tt.braceR)
  node.quasis.push(curElt = this.parseTemplateElement({isTagged}))
  }
+ this.inTemplate = false
  this.next()
  return this.finishNode(node, "TemplateLiteral")
 }
@@ -857,6 +863,7 @@ pp.parseYield = function() {
 
  let node = this.startNode()
  this.next()
+ this.turnSlashIntoRegexp()
  if (this.type === tt.semi || this.canInsertSemicolon() || (this.type !== tt.star && !this.type.startsExpr)) {
  node.delegate = false
  node.argument = null

diff --git a/src/index.js b/src/index.js
@@ -64,7 +64,9 @@ export function parseExpressionAt(input, pos, options) {
 // The `tokenizer` export provides an interface to the tokenizer.
 
 export function tokenizer(input, options) {
- return new Parser(options, input)
+ let parser = new Parser(options, input)
+ parser.exprAllowed = true
+ return parser
 }
 
 // This is a terrible kludge to support the existing, pre-ES6

diff --git a/src/loose/expression.js b/src/loose/expression.js
@@ -340,13 +340,16 @@ lp.parseTemplateElement = function() {
 
 lp.parseTemplate = function() {
  let node = this.startNode()
+ this.toks.inTemplate = true
  this.next()
  node.expressions = []
  let curElt = this.parseTemplateElement()
  node.quasis = [curElt]
  while (!curElt.tail) {
+ this.toks.inTemplate = false
  this.next()
  node.expressions.push(this.parseExpression())
+ this.toks.inTemplate = true
  if (this.expect(tt.braceR)) {
  curElt = this.parseTemplateElement()
  } else {
@@ -357,6 +360,7 @@ lp.parseTemplate = function() {
  }
  node.quasis.push(curElt)
  }
+ this.toks.inTemplate = false
  this.expect(tt.backQuote)
  return this.finishNode(node, "TemplateLiteral")
 }

diff --git a/src/loose/tokenize.js b/src/loose/tokenize.js
@@ -1,4 +1,4 @@
-import {tokTypes as tt, Token, isNewLine, SourceLocation, getLineInfo, lineBreakG} from "../index"
+import {tokTypes as tt, isNewLine, SourceLocation, getLineInfo, lineBreakG} from "../index"
 import {LooseParser} from "./state"
 
 const lp = LooseParser.prototype
@@ -26,14 +26,15 @@ lp.next = function() {
 lp.readToken = function() {
  for (;;) {
  try {
- this.toks.next()
- if (this.toks.type === tt.dot &&
- this.input.substr(this.toks.end, 1) === "." &&
+ let tok = this.toks.getToken()
+ if (tok.type === tt.dot &&
+ this.toks.input.substr(tok.end, 1) === "." &&
  this.options.ecmaVersion >= 6) {
  this.toks.end++
- this.toks.type = tt.ellipsis
+ tok.end++
+ tok.type = tt.ellipsis
  }
- return new Token(this.toks)
+ return tok
  } catch (e) {
  if (!(e instanceof SyntaxError)) throw e
 

diff --git a/src/state.js b/src/state.js
@@ -68,7 +68,7 @@ export class Parser {
  // given position.
  this.context = this.initialContext()
  this.inTemplate = false
- this.exprAllowed = true
+ this.exprAllowed = false
 
  // Figure out if it's a module code.
  this.inModule = options.sourceType === "module"

diff --git a/src/tokenize.js b/src/tokenize.js
@@ -40,11 +40,19 @@ pp.next = function() {
 }
 
 pp.getToken = function() {
+ let prevType = this.type
  this.next()
  this.updateContext(prevType)
  return new Token(this)
 }
 
+pp.turnSlashIntoRegexp = function() {
+ if (this.type === tt.slash || this.type === tt.assign && this.value === "/=") {
+ this.pos = this.start + 1
+ this.readRegexp()
+ }
+}
+
 // If we're in an ES6 environment, make parsers iterable
 if (typeof Symbol !== "undefined")
  pp[Symbol.iterator] = function() {
@@ -170,7 +178,6 @@ pp.skipSpace = function() {
 pp.finishToken = function(type, val) {
  this.end = this.pos
  if (this.options.locations) this.endLoc = this.curPosition()
- let prevType = this.type
  this.type = type
  this.value = val
 }