Skip to content

Commit

Permalink
feat: add start and end positions of tag nodes (#246)
Browse files Browse the repository at this point in the history
Closes #134

* feat: Add start and end positions of tag nodes

Improves accuracy of row/col error reporting. Now targets the start of the relevant token instead of the end.

* Simplify language for TagNode and Token

* Update static TagNode.create to ingest setStart() logic

improve readability of end pos offset for no attr tags
  • Loading branch information
Alteras1 authored Aug 1, 2024
1 parent 0beab56 commit 4084874
Show file tree
Hide file tree
Showing 13 changed files with 931 additions and 388 deletions.
17 changes: 17 additions & 0 deletions .changeset/ninety-points-explode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
"@bbob/plugin-helper": minor
"@bbob/parser": minor
"@bbob/types": minor
"@bbob/cli": minor
"@bbob/core": minor
"@bbob/html": minor
"@bbob/preset": minor
"@bbob/preset-html5": minor
"@bbob/preset-react": minor
"@bbob/preset-vue": minor
"@bbob/react": minor
"@bbob/vue2": minor
"@bbob/vue3": minor
---

feat: Add start and end positions of tag nodes
68 changes: 46 additions & 22 deletions packages/bbob-core/test/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { TagNode } from '@bbob/parser'
import core, { BBobPluginFunction, BBobPlugins } from '../src'
import { TagNode } from '@bbob/parser';
import core, { BBobPluginFunction, BBobPlugins } from '../src';
import { isTagNode } from "@bbob/plugin-helper";

const stringify = (val: unknown) => JSON.stringify(val);
Expand All @@ -11,15 +11,17 @@ describe('@bbob/core', () => {
const res = process([], '[style size="15px"]Large Text[/style]');
const ast = res.tree;

expect(res.html).toBe('[{"tag":"style","attrs":{"size":"15px"},"content":["Large"," ","Text"]}]');
expect(res.html).toBe('[{"tag":"style","attrs":{"size":"15px"},"content":["Large"," ","Text"],"start":{"from":0,"to":19},"end":{"from":29,"to":37}}]');
expect(ast).toBeInstanceOf(Array);
expect(stringify(ast)).toEqual(stringify([
{
tag: 'style',
attrs: { size: '15px' },
content: ["Large", " ", "Text"]
content: ["Large", " ", "Text"],
start: { from: 0, to: 19 },
end: { from: 29, to: 37 },
}
]))
]));
});

test('plugin walk api node', () => {
Expand All @@ -39,11 +41,11 @@ describe('@bbob/core', () => {

}

return node
return node;
});

return plugin
}
return plugin;
};

const res = process([testPlugin()], '[mytag size="15px"]Large Text[/mytag]');
const ast = res.tree;
Expand All @@ -61,7 +63,15 @@ describe('@bbob/core', () => {
' ',
'Text',
'Test'
]
],
start: {
from: 0,
to: 19
},
end: {
from: 29,
to: 37
}
}
]));
});
Expand All @@ -71,13 +81,13 @@ describe('@bbob/core', () => {

const plugin: BBobPluginFunction = (tree) => tree.walk(node => {
if (node === ':)') {
return TagNode.create('test-tag', {}, [])
return TagNode.create('test-tag', {}, []);
}

return node
})
return node;
});

return plugin
return plugin;
};

const res = process([testPlugin()], '[mytag]Large Text :)[/mytag]');
Expand All @@ -99,7 +109,15 @@ describe('@bbob/core', () => {
attrs: {},
content: [],
}
]
],
start: {
from: 0,
to: 7
},
end: {
from: 20,
to: 28
}
}
]));
});
Expand All @@ -109,13 +127,13 @@ describe('@bbob/core', () => {

const plugin: BBobPluginFunction = (tree) => tree.match([{ tag: 'mytag1' }, { tag: 'mytag2' }], node => {
if (isTagNode(node) && node.attrs) {
node.attrs['pass'] = 1
node.attrs['pass'] = 1;
}

return node
})
return node;
});

return plugin
return plugin;
};

const res = process([testPlugin()], `[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]`);
Expand All @@ -132,7 +150,9 @@ describe('@bbob/core', () => {
},
content: [
'Tag1'
]
],
start: { from: 0, to: 18 },
end: { from: 22, to: 31 }
},
{
tag: 'mytag2',
Expand All @@ -142,15 +162,19 @@ describe('@bbob/core', () => {
},
content: [
'Tag2'
]
],
start: { from: 31, to: 49 },
end: { from: 53, to: 62 }
},
{
tag: 'mytag3',
attrs: {},
content: [
'Tag3'
]
],
start: { from: 62, to: 70 },
end: { from: 74, to: 83 }
}
]));
})
});
});
42 changes: 31 additions & 11 deletions packages/bbob-parser/src/Token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ import {
} from '@bbob/plugin-helper';
import type { Token as TokenInterface } from "@bbob/types";

// type, value, line, row,
// type, value, line, row, start pos, end pos

const TOKEN_TYPE_ID = 't'; // 0;
const TOKEN_VALUE_ID = 'v'; // 1;
const TOKEN_COLUMN_ID = 'r'; // 2;
const TOKEN_LINE_ID = 'l'; // 3;
const TOKEN_START_POS_ID = 's'; // 4;
const TOKEN_END_POS_ID = 'e'; // 5;

const TOKEN_TYPE_WORD = 1; // 'word';
const TOKEN_TYPE_TAG = 2; // 'tag';
Expand All @@ -31,11 +33,15 @@ const getTokenLine = (token: Token) => (token && token[TOKEN_LINE_ID]) || 0;

const getTokenColumn = (token: Token) => (token && token[TOKEN_COLUMN_ID]) || 0;

const getStartPosition = (token: Token) => (token && token[TOKEN_START_POS_ID]) || 0;

const getEndPosition = (token: Token) => (token && token[TOKEN_END_POS_ID]) || 0;

const isTextToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
}

return false;
Expand Down Expand Up @@ -88,21 +94,25 @@ const tokenToText = (token: Token) => {
* @export
* @class Token
*/
class Token<TokenValue = string> implements TokenInterface {
readonly t: number // type
readonly v: string // value
readonly l: number // line
readonly r: number // row

constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0) {
class Token<TokenValue = string> implements TokenInterface {
readonly t: number; // type
readonly v: string; // value
readonly l: number; // line
readonly r: number; // row
readonly s: number; // start pos
readonly e: number; // end pos

constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0, start: number = 0, end: number = 0) {
this[TOKEN_LINE_ID] = row;
this[TOKEN_COLUMN_ID] = col;
this[TOKEN_TYPE_ID] = type || 0;
this[TOKEN_VALUE_ID] = String(value);
this[TOKEN_START_POS_ID] = start;
this[TOKEN_END_POS_ID] = end;
}

get type() {
return this[TOKEN_TYPE_ID]
return this[TOKEN_TYPE_ID];
}

isEmpty() {
Expand Down Expand Up @@ -149,6 +159,14 @@ class Token<TokenValue = string> implements TokenInterface {
return getTokenColumn(this);
}

getStart() {
return getStartPosition(this);
}

getEnd() {
return getEndPosition(this);
}

toString() {
return tokenToText(this);
}
Expand All @@ -158,6 +176,8 @@ export const TYPE_ID = TOKEN_TYPE_ID;
export const VALUE_ID = TOKEN_VALUE_ID;
export const LINE_ID = TOKEN_LINE_ID;
export const COLUMN_ID = TOKEN_COLUMN_ID;
export const START_POS_ID = TOKEN_START_POS_ID;
export const END_POS_ID = TOKEN_END_POS_ID;
export const TYPE_WORD = TOKEN_TYPE_WORD;
export const TYPE_TAG = TOKEN_TYPE_TAG;
export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
Expand Down
33 changes: 23 additions & 10 deletions packages/bbob-parser/src/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import { CharGrabber, createCharGrabber, trimChar, unquote } from './utils';
// for cases <!-- -->
const EM = '!';

export function createTokenOfType(type: number, value: string, r = 0, cl = 0) {
return new Token(type, value, r, cl)
export function createTokenOfType(type: number, value: string, r = 0, cl = 0, p = 0, e = 0) {
return new Token(type, value, r, cl, p, e);
}

const STATE_WORD = 0;
Expand All @@ -34,6 +34,7 @@ const TAG_STATE_VALUE = 2;

const WHITESPACES = [SPACE, TAB];
const SPECIAL_CHARS = [EQ, SPACE, TAB];
const END_POS_OFFSET = 2; // length + start position offset

const isWhiteSpace = (char: string) => (WHITESPACES.indexOf(char) >= 0);
const isEscapeChar = (char: string) => char === BACKSLASH;
Expand All @@ -43,6 +44,7 @@ const unq = (val: string) => unquote(trimChar(val, QUOTEMARK));

export function createLexer(buffer: string, options: LexerOptions = {}): LexerTokenizer {
let row = 0;
let prevCol = 0;
let col = 0;

let tokenIndex = -1;
Expand Down Expand Up @@ -89,16 +91,17 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
* @param {Number} type
* @param {String} value
*/
function emitToken(type: number, value: string) {
const token = createTokenOfType(type, value, row, col);
function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);

onToken(token);

prevCol = col;
tokenIndex += 1;
tokens[tokenIndex] = token;
}

function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean) {
function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean, masterStartPos: number) {
if (tagMode === TAG_STATE_ATTR) {
const validAttrName = (char: string) => !(char === EQ || isWhiteSpace(char));
const name = tagChars.grabWhile(validAttrName);
Expand Down Expand Up @@ -161,6 +164,9 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
tagChars.skip();

emitToken(TYPE_ATTR_VALUE, unq(name));
if (tagChars.getPrev() === QUOTEMARK) {
prevCol++;
}

if (tagChars.isLast()) {
return TAG_STATE_NAME;
Expand All @@ -169,13 +175,15 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
return TAG_STATE_ATTR;
}

const start = masterStartPos + tagChars.getPos() - 1;
const validName = (char: string) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
const name = tagChars.grabWhile(validName);

emitToken(TYPE_TAG, name);
emitToken(TYPE_TAG, name, start, masterStartPos + tagChars.getLength() + 1);
checkContextFreeMode(name);

tagChars.skip();
prevCol++;

// in cases when we has [url=someval]GET[/url] and we dont need to parse all
if (isSingleValueTag) {
Expand Down Expand Up @@ -209,11 +217,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
const isClosingTag = substr[0] === SLASH;

if (isNoAttrsInTag || isClosingTag) {
const startPos = chars.getPos() - 1;
const name = chars.grabWhile((char) => char !== closeTag);
const endPos = startPos + name.length + END_POS_OFFSET;

chars.skip(); // skip closeTag

emitToken(TYPE_TAG, name);
emitToken(TYPE_TAG, name, startPos, endPos);
checkContextFreeMode(name, isClosingTag);

return STATE_WORD;
Expand All @@ -223,6 +233,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
}

function stateAttrs() {
const startPos = chars.getPos();
const silent = true;
const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
const tagGrabber = createCharGrabber(tagStr, { onSkip });
Expand All @@ -231,7 +242,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
tagMode = TAG_STATE_NAME;

while (tagGrabber.hasNext()) {
tagMode = nextTagState(tagGrabber, !hasSpace);
tagMode = nextTagState(tagGrabber, !hasSpace, startPos);
}

chars.skip(); // skip closeTag
Expand All @@ -246,6 +257,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
chars.skip();

col = 0;
prevCol = 0;
row++;

return STATE_WORD;
Expand Down Expand Up @@ -276,6 +288,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
emitToken(TYPE_WORD, chars.getCurr());

chars.skip();
prevCol++;

return STATE_WORD;
}
Expand Down Expand Up @@ -345,7 +358,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
if (nestedMap.has(value)) {
return !!nestedMap.get(value);
} else {
const status = (buffer.indexOf(value) > -1)
const status = (buffer.indexOf(value) > -1);

nestedMap.set(value, status);

Expand All @@ -356,5 +369,5 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
return {
tokenize,
isTokenNested,
}
};
}
Loading

0 comments on commit 4084874

Please sign in to comment.