feat: add start and end positions of tag nodes (#246)

Closes #134 * feat: Add start and end positions of tag nodes Improves accuracy of row/col error reporting. Now targets the start of the relevant token instead of the end. * Simplify language for TagNode and Token * Update static TagNode.create to ingest setStart() logic improve readability of end pos offset for no attr tags
JiLiZART · Aug 1, 2024 · 4084874 · 4084874
1 parent 0beab56
commit 4084874
Show file tree

Hide file tree

Showing 13 changed files with 931 additions and 388 deletions.
diff --git a/.changeset/ninety-points-explode.md b/.changeset/ninety-points-explode.md
@@ -0,0 +1,17 @@
+---
+"@bbob/plugin-helper": minor
+"@bbob/parser": minor
+"@bbob/types": minor
+"@bbob/cli": minor
+"@bbob/core": minor
+"@bbob/html": minor
+"@bbob/preset": minor
+"@bbob/preset-html5": minor
+"@bbob/preset-react": minor
+"@bbob/preset-vue": minor
+"@bbob/react": minor
+"@bbob/vue2": minor
+"@bbob/vue3": minor
+---
+
+feat: Add start and end positions of tag nodes
diff --git a/packages/bbob-core/test/index.test.ts b/packages/bbob-core/test/index.test.ts
@@ -1,5 +1,5 @@
-import { TagNode } from '@bbob/parser'
-import core, { BBobPluginFunction, BBobPlugins } from '../src'
+import { TagNode } from '@bbob/parser';
+import core, { BBobPluginFunction, BBobPlugins } from '../src';
 import { isTagNode } from "@bbob/plugin-helper";
 
 const stringify = (val: unknown) => JSON.stringify(val);
@@ -11,15 +11,17 @@ describe('@bbob/core', () => {
     const res = process([], '[style size="15px"]Large Text[/style]');
     const ast = res.tree;
 
-    expect(res.html).toBe('[{"tag":"style","attrs":{"size":"15px"},"content":["Large"," ","Text"]}]');
+    expect(res.html).toBe('[{"tag":"style","attrs":{"size":"15px"},"content":["Large"," ","Text"],"start":{"from":0,"to":19},"end":{"from":29,"to":37}}]');
     expect(ast).toBeInstanceOf(Array);
     expect(stringify(ast)).toEqual(stringify([
       {
         tag: 'style',
         attrs: { size: '15px' },
-        content: ["Large", " ", "Text"]
+        content: ["Large", " ", "Text"],
+        start: { from: 0, to: 19 },
+        end: { from: 29, to: 37 },
       }
-    ]))
+    ]));
   });
 
   test('plugin walk api node', () => {
@@ -39,11 +41,11 @@ describe('@bbob/core', () => {
 
         }
 
-        return node
+        return node;
       });
 
-      return plugin
-    }
+      return plugin;
+    };
 
     const res = process([testPlugin()], '[mytag size="15px"]Large Text[/mytag]');
     const ast = res.tree;
@@ -61,7 +63,15 @@ describe('@bbob/core', () => {
           ' ',
           'Text',
           'Test'
-        ]
+        ],
+        start: {
+          from: 0,
+          to: 19
+        },
+        end: {
+          from: 29,
+          to: 37
+        }
       }
     ]));
   });
@@ -71,13 +81,13 @@ describe('@bbob/core', () => {
 
       const plugin: BBobPluginFunction = (tree) => tree.walk(node => {
         if (node === ':)') {
-          return TagNode.create('test-tag', {}, [])
+          return TagNode.create('test-tag', {}, []);
         }
 
-        return node
-      })
+        return node;
+      });
 
-      return plugin
+      return plugin;
     };
 
     const res = process([testPlugin()], '[mytag]Large Text :)[/mytag]');
@@ -99,7 +109,15 @@ describe('@bbob/core', () => {
             attrs: {},
             content: [],
           }
-        ]
+        ],
+        start: {
+          from: 0,
+          to: 7
+        },
+        end: {
+          from: 20,
+          to: 28
+        }
       }
     ]));
   });
@@ -109,13 +127,13 @@ describe('@bbob/core', () => {
 
       const plugin: BBobPluginFunction = (tree) => tree.match([{ tag: 'mytag1' }, { tag: 'mytag2' }], node => {
         if (isTagNode(node) && node.attrs) {
-          node.attrs['pass'] = 1
+          node.attrs['pass'] = 1;
         }
 
-        return node
-      })
+        return node;
+      });
 
-      return plugin
+      return plugin;
     };
 
     const res = process([testPlugin()], `[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]`);
@@ -132,7 +150,9 @@ describe('@bbob/core', () => {
         },
         content: [
           'Tag1'
-        ]
+        ],
+        start: { from: 0, to: 18 },
+        end: { from: 22, to: 31 }
       },
       {
         tag: 'mytag2',
@@ -142,15 +162,19 @@ describe('@bbob/core', () => {
         },
         content: [
           'Tag2'
-        ]
+        ],
+        start: { from: 31, to: 49 },
+        end: { from: 53, to: 62 }
       },
       {
         tag: 'mytag3',
         attrs: {},
         content: [
           'Tag3'
-        ]
+        ],
+        start: { from: 62, to: 70 },
+        end: { from: 74, to: 83 }
       }
     ]));
-  })
+  });
 });
diff --git a/packages/bbob-parser/src/Token.ts b/packages/bbob-parser/src/Token.ts
@@ -5,12 +5,14 @@ import {
 } from '@bbob/plugin-helper';
 import type { Token as TokenInterface } from "@bbob/types";
 
-// type, value, line, row,
+// type, value, line, row, start pos, end pos
 
 const TOKEN_TYPE_ID = 't'; // 0;
 const TOKEN_VALUE_ID = 'v'; // 1;
 const TOKEN_COLUMN_ID = 'r'; // 2;
 const TOKEN_LINE_ID = 'l'; // 3;
+const TOKEN_START_POS_ID = 's'; // 4;
+const TOKEN_END_POS_ID = 'e'; // 5;
 
 const TOKEN_TYPE_WORD = 1; // 'word';
 const TOKEN_TYPE_TAG = 2; // 'tag';
@@ -31,11 +33,15 @@ const getTokenLine = (token: Token) => (token && token[TOKEN_LINE_ID]) || 0;
 
 const getTokenColumn = (token: Token) => (token && token[TOKEN_COLUMN_ID]) || 0;
 
+const getStartPosition = (token: Token) => (token && token[TOKEN_START_POS_ID]) || 0;
+
+const getEndPosition = (token: Token) => (token && token[TOKEN_END_POS_ID]) || 0;
+
 const isTextToken = (token: Token) => {
   if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
     return token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE
-        || token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
-        || token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
+      || token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
+      || token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
   }
 
   return false;
@@ -88,21 +94,25 @@ const tokenToText = (token: Token) => {
  * @export
  * @class Token
  */
-class Token<TokenValue = string>  implements TokenInterface {
-  readonly t: number // type
-  readonly v: string // value
-  readonly l: number // line
-  readonly r: number // row
-
-  constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0) {
+class Token<TokenValue = string> implements TokenInterface {
+  readonly t: number; // type
+  readonly v: string; // value
+  readonly l: number; // line
+  readonly r: number; // row
+  readonly s: number; // start pos
+  readonly e: number; // end pos
+
+  constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0, start: number = 0, end: number = 0) {
     this[TOKEN_LINE_ID] = row;
     this[TOKEN_COLUMN_ID] = col;
     this[TOKEN_TYPE_ID] = type || 0;
     this[TOKEN_VALUE_ID] = String(value);
+    this[TOKEN_START_POS_ID] = start;
+    this[TOKEN_END_POS_ID] = end;
   }
 
   get type() {
-    return this[TOKEN_TYPE_ID]
+    return this[TOKEN_TYPE_ID];
   }
 
   isEmpty() {
@@ -149,6 +159,14 @@ class Token<TokenValue = string>  implements TokenInterface {
     return getTokenColumn(this);
   }
 
+  getStart() {
+    return getStartPosition(this);
+  }
+
+  getEnd() {
+    return getEndPosition(this);
+  }
+
   toString() {
     return tokenToText(this);
   }
@@ -158,6 +176,8 @@ export const TYPE_ID = TOKEN_TYPE_ID;
 export const VALUE_ID = TOKEN_VALUE_ID;
 export const LINE_ID = TOKEN_LINE_ID;
 export const COLUMN_ID = TOKEN_COLUMN_ID;
+export const START_POS_ID = TOKEN_START_POS_ID;
+export const END_POS_ID = TOKEN_END_POS_ID;
 export const TYPE_WORD = TOKEN_TYPE_WORD;
 export const TYPE_TAG = TOKEN_TYPE_TAG;
 export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;

diff --git a/packages/bbob-parser/src/lexer.ts b/packages/bbob-parser/src/lexer.ts
@@ -20,8 +20,8 @@ import { CharGrabber, createCharGrabber, trimChar, unquote } from './utils';
 // for cases <!-- -->
 const EM = '!';
 
-export function createTokenOfType(type: number, value: string, r = 0, cl = 0) {
-  return new Token(type, value, r, cl)
+export function createTokenOfType(type: number, value: string, r = 0, cl = 0, p = 0, e = 0) {
+  return new Token(type, value, r, cl, p, e);
 }
 
 const STATE_WORD = 0;
@@ -34,6 +34,7 @@ const TAG_STATE_VALUE = 2;
 
 const WHITESPACES = [SPACE, TAB];
 const SPECIAL_CHARS = [EQ, SPACE, TAB];
+const END_POS_OFFSET = 2;  // length + start position offset
 
 const isWhiteSpace = (char: string) => (WHITESPACES.indexOf(char) >= 0);
 const isEscapeChar = (char: string) => char === BACKSLASH;
@@ -43,6 +44,7 @@ const unq = (val: string) => unquote(trimChar(val, QUOTEMARK));
 
 export function createLexer(buffer: string, options: LexerOptions = {}): LexerTokenizer {
   let row = 0;
+  let prevCol = 0;
   let col = 0;
 
   let tokenIndex = -1;
@@ -89,16 +91,17 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
    * @param {Number} type
    * @param {String} value
    */
-  function emitToken(type: number, value: string) {
-    const token = createTokenOfType(type, value, row, col);
+  function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
+    const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
 
     onToken(token);
 
+    prevCol = col;
     tokenIndex += 1;
     tokens[tokenIndex] = token;
   }
 
-  function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean) {
+  function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean, masterStartPos: number) {
     if (tagMode === TAG_STATE_ATTR) {
       const validAttrName = (char: string) => !(char === EQ || isWhiteSpace(char));
       const name = tagChars.grabWhile(validAttrName);
@@ -161,6 +164,9 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
       tagChars.skip();
 
       emitToken(TYPE_ATTR_VALUE, unq(name));
+      if (tagChars.getPrev() === QUOTEMARK) {
+        prevCol++;
+      }
 
       if (tagChars.isLast()) {
         return TAG_STATE_NAME;
@@ -169,13 +175,15 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
       return TAG_STATE_ATTR;
     }
 
+    const start = masterStartPos + tagChars.getPos() - 1;
     const validName = (char: string) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
     const name = tagChars.grabWhile(validName);
 
-    emitToken(TYPE_TAG, name);
+    emitToken(TYPE_TAG, name, start, masterStartPos + tagChars.getLength() + 1);
     checkContextFreeMode(name);
 
     tagChars.skip();
+    prevCol++;
 
     // in cases when we has [url=someval]GET[/url] and we dont need to parse all
     if (isSingleValueTag) {
@@ -209,11 +217,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
     const isClosingTag = substr[0] === SLASH;
 
     if (isNoAttrsInTag || isClosingTag) {
+      const startPos = chars.getPos() - 1;
       const name = chars.grabWhile((char) => char !== closeTag);
+      const endPos = startPos + name.length + END_POS_OFFSET;
 
       chars.skip(); // skip closeTag
 
-      emitToken(TYPE_TAG, name);
+      emitToken(TYPE_TAG, name, startPos, endPos);
       checkContextFreeMode(name, isClosingTag);
 
       return STATE_WORD;
@@ -223,6 +233,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
   }
 
   function stateAttrs() {
+    const startPos = chars.getPos();
     const silent = true;
     const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
     const tagGrabber = createCharGrabber(tagStr, { onSkip });
@@ -231,7 +242,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
     tagMode = TAG_STATE_NAME;
 
     while (tagGrabber.hasNext()) {
-      tagMode = nextTagState(tagGrabber, !hasSpace);
+      tagMode = nextTagState(tagGrabber, !hasSpace, startPos);
     }
 
     chars.skip(); // skip closeTag
@@ -246,6 +257,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
       chars.skip();
 
       col = 0;
+      prevCol = 0;
       row++;
 
       return STATE_WORD;
@@ -276,6 +288,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
       emitToken(TYPE_WORD, chars.getCurr());
 
       chars.skip();
+      prevCol++;
 
       return STATE_WORD;
     }
@@ -345,7 +358,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
     if (nestedMap.has(value)) {
       return !!nestedMap.get(value);
     } else {
-      const status = (buffer.indexOf(value) > -1)
+      const status = (buffer.indexOf(value) > -1);
 
       nestedMap.set(value, status);
 
@@ -356,5 +369,5 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
   return {
     tokenize,
     isTokenNested,
-  }
+  };
 }