From effe659825290e63849e17b9d6973149559fe17e Mon Sep 17 00:00:00 2001 From: Marijn Haverbeke Date: Mon, 10 Sep 2018 16:56:10 +0200 Subject: [PATCH] Reduce reliance on 'sweet.js' tokenizer algorithm It'll now only run in plain-tokenizer or loose mode, so that the parser can use its actual knowledge about the syntax to drive disambiguation of / and regexps. Issue #589 Closes #552 --- src/expression.js | 7 +++++++ src/index.js | 4 +++- src/loose/expression.js | 4 ++++ src/loose/tokenize.js | 13 +++++++------ src/state.js | 2 +- src/tokenize.js | 9 ++++++++- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/expression.js b/src/expression.js index eac0592a0..5fe33644e 100644 --- a/src/expression.js +++ b/src/expression.js @@ -297,7 +297,9 @@ pp.parseSubscripts = function(base, startPos, startLoc, noCalls) { // or `{}`. pp.parseExprAtom = function(refDestructuringErrors) { + this.turnSlashIntoRegexp() let node, canBeArrow = this.potentialArrowAt === this.start + switch (this.type) { case tt._super: if (!this.inFunction) @@ -527,17 +529,21 @@ pp.parseTemplateElement = function({isTagged}) { pp.parseTemplate = function({isTagged = false} = {}) { let node = this.startNode() + this.inTemplate = true this.next() node.expressions = [] let curElt = this.parseTemplateElement({isTagged}) node.quasis = [curElt] while (!curElt.tail) { if (this.type === tt.eof) this.raise(this.pos, "Unterminated template literal") + this.inTemplate = false this.expect(tt.dollarBraceL) node.expressions.push(this.parseExpression()) + this.inTemplate = true this.expect(tt.braceR) node.quasis.push(curElt = this.parseTemplateElement({isTagged})) } + this.inTemplate = false this.next() return this.finishNode(node, "TemplateLiteral") } @@ -857,6 +863,7 @@ pp.parseYield = function() { let node = this.startNode() this.next() + this.turnSlashIntoRegexp() if (this.type === tt.semi || this.canInsertSemicolon() || (this.type !== tt.star && !this.type.startsExpr)) { node.delegate = false node.argument = null diff --git a/src/index.js b/src/index.js index fbe245e33..ee7252a48 100644 --- a/src/index.js +++ b/src/index.js @@ -64,7 +64,9 @@ export function parseExpressionAt(input, pos, options) { // The `tokenizer` export provides an interface to the tokenizer. export function tokenizer(input, options) { - return new Parser(options, input) + let parser = new Parser(options, input) + parser.exprAllowed = true + return parser } // This is a terrible kludge to support the existing, pre-ES6 diff --git a/src/loose/expression.js b/src/loose/expression.js index 1f65ff080..310fa27db 100644 --- a/src/loose/expression.js +++ b/src/loose/expression.js @@ -340,13 +340,16 @@ lp.parseTemplateElement = function() { lp.parseTemplate = function() { let node = this.startNode() + this.toks.inTemplate = true this.next() node.expressions = [] let curElt = this.parseTemplateElement() node.quasis = [curElt] while (!curElt.tail) { + this.toks.inTemplate = false this.next() node.expressions.push(this.parseExpression()) + this.toks.inTemplate = true if (this.expect(tt.braceR)) { curElt = this.parseTemplateElement() } else { @@ -357,6 +360,7 @@ lp.parseTemplate = function() { } node.quasis.push(curElt) } + this.toks.inTemplate = false this.expect(tt.backQuote) return this.finishNode(node, "TemplateLiteral") } diff --git a/src/loose/tokenize.js b/src/loose/tokenize.js index 619b6ee9e..45e30a2f0 100644 --- a/src/loose/tokenize.js +++ b/src/loose/tokenize.js @@ -1,4 +1,4 @@ -import {tokTypes as tt, Token, isNewLine, SourceLocation, getLineInfo, lineBreakG} from "../index" +import {tokTypes as tt, isNewLine, SourceLocation, getLineInfo, lineBreakG} from "../index" import {LooseParser} from "./state" const lp = LooseParser.prototype @@ -26,14 +26,15 @@ lp.next = function() { lp.readToken = function() { for (;;) { try { - this.toks.next() - if (this.toks.type === tt.dot && - this.input.substr(this.toks.end, 1) === "." && + let tok = this.toks.getToken() + if (tok.type === tt.dot && + this.toks.input.substr(tok.end, 1) === "." && this.options.ecmaVersion >= 6) { this.toks.end++ - this.toks.type = tt.ellipsis + tok.end++ + tok.type = tt.ellipsis } - return new Token(this.toks) + return tok } catch (e) { if (!(e instanceof SyntaxError)) throw e diff --git a/src/state.js b/src/state.js index 8697f6295..d007ad9bd 100644 --- a/src/state.js +++ b/src/state.js @@ -68,7 +68,7 @@ export class Parser { // given position. this.context = this.initialContext() this.inTemplate = false - this.exprAllowed = true + this.exprAllowed = false // Figure out if it's a module code. this.inModule = options.sourceType === "module" diff --git a/src/tokenize.js b/src/tokenize.js index 775db67f1..c4a89e88e 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -40,11 +40,19 @@ pp.next = function() { } pp.getToken = function() { + let prevType = this.type this.next() this.updateContext(prevType) return new Token(this) } +pp.turnSlashIntoRegexp = function() { + if (this.type === tt.slash || this.type === tt.assign && this.value === "/=") { + this.pos = this.start + 1 + this.readRegexp() + } +} + // If we're in an ES6 environment, make parsers iterable if (typeof Symbol !== "undefined") pp[Symbol.iterator] = function() { @@ -170,7 +178,6 @@ pp.skipSpace = function() { pp.finishToken = function(type, val) { this.end = this.pos if (this.options.locations) this.endLoc = this.curPosition() - let prevType = this.type this.type = type this.value = val }