Skip to content

Commit

Permalink
feat: begin/end ranges for primitive nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
Artawower committed Sep 13, 2022
1 parent c6fdf0d commit 286e3c4
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 25 deletions.
5 changes: 5 additions & 0 deletions packages/uniorg-parse/src/parse-options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ export interface ParseOptions {
* matching. Corresponds to `org-match-sexp-depth` in Emacs.
*/
matchSexpDepth: number;
/**
* Add begin/end properties to primitive nodes
*/
positions: boolean;
}

export const defaultOptions: ParseOptions = {
Expand Down Expand Up @@ -167,4 +171,5 @@ export const defaultOptions: ParseOptions = {
'do',
],
matchSexpDepth: 3,
positions: true,
};
148 changes: 123 additions & 25 deletions packages/uniorg-parse/src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import {
} from './utils.js';
import { ParseOptions, defaultOptions } from './parse-options.js';
import { Reader } from './reader.js';
import { Position } from 'unist';

/*
(defun rasen/org-debug ()
Expand Down Expand Up @@ -425,7 +426,10 @@ class Parser {
if (objectBegin !== prevEnd) {
// parse text before object
const value = this.r.substring(prevEnd, objectBegin);
objects.push(u('text', { value }));
const end = prevEnd + value.length;
objects.push(
u('text', { value, position: this.getPosition(prevEnd, end) })
);
}

// @ts-expect-error contentsBegin is not defined for "literals"
Expand All @@ -447,9 +451,13 @@ class Parser {

// handle text after the last object
const text = this.r.rest();
const [begin, end] = [this.r.offset(), this.r.offset() + text.length];

this.r.advance(text.length);
if (text.trim().length) {
objects.push(u('text', { value: text }));
objects.push(
u('text', { value: text, position: this.getPosition(begin, end) })
);
}

return objects;
Expand Down Expand Up @@ -754,6 +762,8 @@ class Parser {
private parseComment(): Comment {
let valueLines = [];
this.r.advance(this.r.forceLookingAt(/^[ \t]*# ?/));
const begin = this.r.offset();

valueLines.push(this.r.advance(this.r.line()));

while (true) {
Expand All @@ -762,17 +772,21 @@ class Parser {

valueLines.push(this.r.advance(this.r.line()));
}

let value = valueLines.join('');
if (value[value.length - 1] === '\n') {
value = value.substring(0, value.length - 1);
}
const end = begin + value.length;

return u('comment', { value: value });
return u('comment', {
value: value,
position: this.getPosition(begin, end),
});
}

private parseFixedWidth(affiliated: AffiliatedKeywords): FixedWidth {
let valueLines = [];
const begin = this.r.offset();
while (true) {
const m = this.r.lookingAt(/^[ \t]*: ?(.*)$/m);
if (!m) break;
Expand All @@ -781,8 +795,13 @@ class Parser {
valueLines.push(m[1]);
}
const value = valueLines.join('\n');
const end = this.r.offset();

return u('fixed-width', { affiliated, value });
return u('fixed-width', {
affiliated,
value,
position: this.getPosition(begin, end),
});
}

private parseCommentBlock(
Expand All @@ -794,7 +813,12 @@ class Parser {
return comment;
}
const value = this.r.substring(comment.contentsBegin, comment.contentsEnd);
return u('comment-block', { affiliated, value });
const [begin, end] = this.getCurrentRange(value);
return u('comment-block', {
affiliated,
value,
position: this.getPosition(begin, end),
});
}

private parseSrcBlock(affiliated: AffiliatedKeywords): SrcBlock | Paragraph {
Expand All @@ -821,9 +845,13 @@ class Parser {
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
this.parseEmptyLines();
const _end = this.r.offset();

return u('src-block', { affiliated, language, value });
return u('src-block', {
affiliated,
language,
value,
position: this.getPosition(contentsBegin, contentsEnd),
});
}

private parseExampleBlock(
Expand All @@ -836,7 +864,11 @@ class Parser {
return block;
}
const value = this.r.substring(block.contentsBegin, block.contentsEnd);
return u('example-block', { affiliated, value });
return u('example-block', {
affiliated,
value,
position: this.getPosition(block.contentsBegin, block.contentsEnd),
});
}

private parseExportBlock(
Expand Down Expand Up @@ -864,7 +896,12 @@ class Parser {
this.parseEmptyLines();
const _end = this.r.offset();

return u('export-block', { affiliated, backend, value });
return u('export-block', {
affiliated,
backend,
value,
position: this.getPosition(contentsBegin, contentsEnd),
});
}

private parseSpecialBlock(
Expand Down Expand Up @@ -952,9 +989,17 @@ class Parser {
const m = this.r.forceLookingAt(/[ \t]*#\+(\S+):(.*)/);
const key = m[1].toUpperCase();
const value = m[2].trim();
const [begin, end] = this.getCurrentRange(m[0]);

this.r.advance(this.r.line());
this.parseEmptyLines();
return u('keyword', { affiliated, key, value });

return u('keyword', {
affiliated,
key,
value,
position: this.getPosition(begin, end),
});
}

private parseLatexEnvironment(
Expand All @@ -977,7 +1022,11 @@ class Parser {

const value = this.r.substring(beginOffset, endOffset);

return u('latex-environment', { affiliated, value });
return u('latex-environment', {
affiliated,
value,
position: this.getPosition(beginOffset, endOffset),
});
}

private parseDrawer(affiliated: AffiliatedKeywords): Drawer | Paragraph {
Expand All @@ -999,9 +1048,12 @@ class Parser {
}

private parseClock(): Clock {
this.r.advance(this.r.forceMatch(/^[ \t]*CLOCK:[ \t]*/));
const begin = this.r.offset();
const parsedClock = this.r.forceMatch(/^[ \t]*CLOCK:[ \t]*/);
this.r.advance(parsedClock);
const value = this.parseTimestamp();

const end = begin + parsedClock.input.length;
this.r.advance(this.r.match(/^[ \t]+=>[ \t]*/));
const durationM = this.r.advance(this.r.lookingAt(/^(\S+)[ \t]*$/m));
const duration = durationM ? durationM[1] : null;
Expand All @@ -1010,16 +1062,28 @@ class Parser {

this.parseEmptyLines();

return u('clock', { value, duration, status });
return u('clock', {
value,
duration,
status,
position: this.getPosition(begin, end),
});
}

private parseNodeProperty(): NodeProperty {
const propertyRe = /^[ \t]*:(?<key>\S+):(?:(?<value1>$)|[ \t]+(?<value2>.*?))[ \t]*$/m;
const begin = this.r.offset();
const propertyRe =
/^[ \t]*:(?<key>\S+):(?:(?<value1>$)|[ \t]+(?<value2>.*?))[ \t]*$/m;
const m = this.r.forceLookingAt(propertyRe);
const key = m.groups!['key'];
const value = m.groups!['value1'] ?? m.groups!['value2'];
this.r.advance(this.r.line());
return u('node-property', { key, value });
const end = this.r.offset();
return u('node-property', {
key,
value,
position: this.getPosition(begin, end),
});
}

private parseParagraph(affiliated: AffiliatedKeywords): Paragraph {
Expand Down Expand Up @@ -1141,10 +1205,16 @@ class Parser {
}

private parseDiarySexp(affiliated: AffiliatedKeywords): DiarySexp {
const begin = this.r.offset();
const value = this.r.forceLookingAt(/^(%%\(.*)[ \t]*$/m)[1];
this.r.advance(this.r.line());
this.parseEmptyLines();
return u('diary-sexp', { affiliated, value });
const end = begin + value.length;
return u('diary-sexp', {
affiliated,
value,
position: this.getPosition(begin, end),
});
}

private parseTable(affiliated: AffiliatedKeywords): Table {
Expand Down Expand Up @@ -1466,7 +1536,8 @@ class Parser {
const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
const contentsEnd = contentsBegin + m[4].length;
this.r.resetOffset(contentsEnd + 1);
return u('code', { value }, []);
const [begin, end] = this.getCurrentRange(value);
return u('code', { value, position: this.getPosition(begin, end) }, []);
}

private parseVerbatim(): Verbatim | null {
Expand Down Expand Up @@ -1505,11 +1576,13 @@ class Parser {
}

private parseEntity(): Entity | null {
const begin = this.r.offset();
const m = this.r.advance(
this.r.lookingAt(
/^\\(?:(?<value1>_ +)|(?<value2>there4|sup[123]|frac[13][24]|[a-zA-Z]+)(?<brackets>$|\{\}|\P{Letter}))/mu
)
);

if (!m) return null;
const hasBrackets = m.groups!.brackets === '{}';
if (!hasBrackets) {
Expand All @@ -1520,7 +1593,12 @@ class Parser {
}
const value = getOrgEntity(m.groups!.value1 ?? m.groups!.value2);
if (!value) return null;
return u('entity', { useBrackets: hasBrackets, ...value });
const end = begin + m[0].length;
return u('entity', {
useBrackets: hasBrackets,
...value,
position: this.getPosition(begin, end),
});
}

private parseLatexFragment(): LatexFragment | null {
Expand Down Expand Up @@ -1574,7 +1652,11 @@ class Parser {
if (begin === end) return null;

const value = this.r.substring(begin, end);
return u('latex-fragment', { value, contents: contents ?? value });
return u('latex-fragment', {
value,
contents: contents ?? value,
position: this.getPosition(begin, end),
});
}

private parseFootnoteReference(): FootnoteReference | null {
Expand Down Expand Up @@ -1633,7 +1715,8 @@ class Parser {
// TODO: Type 1: Text targeted from a radio target.

// Type 2: Standard link.
const linkBracketRe = /\[\[(?<link>([^\[\]]|\\(\\\\)*[\[\]]|\\+[^\[\]])+)\](\[(?<text>[\s\S]+?)\])?\]/m;
const linkBracketRe =
/\[\[(?<link>([^\[\]]|\\(\\\\)*[\[\]]|\\+[^\[\]])+)\](\[(?<text>[\s\S]+?)\])?\]/m;
const bracketM = this.r.advance(this.r.lookingAt(linkBracketRe));
if (bracketM) {
const m = bracketM;
Expand Down Expand Up @@ -1741,6 +1824,7 @@ class Parser {
}

private parseTimestamp(): Timestamp | null {
const begin = this.r.offset();
// org-ts--internal-regexp
const tsInternalRe = '\\d{4}-\\d{2}-\\d{2}(:? .*?)?';
// org-ts-regexp-both
Expand Down Expand Up @@ -1808,14 +1892,13 @@ class Parser {
rawValue,
start,
end,
position: this.getPosition(begin, begin + rawValue.length),
});
}

// Helpers

private static parseDate(
s: string
): {
private static parseDate(s: string): {
year: number;
month: number;
day: number;
Expand Down Expand Up @@ -1857,6 +1940,20 @@ class Parser {
private atHeading(): boolean {
return this.r.lookingAt(/^\*+[ \t]/) !== null;
}

/*
* Return begin and end positions from current cursor position + val length
*/
private getCurrentRange(val: string): [number, number] {
return [this.r.offset(), this.r.offset() + val.length];
}

private getPosition(begin: number, end: number): Position | undefined {
if (!this.options.positions) {
return;
}
return this.r.toPosition(begin, end);
}
}

const drawerRe = /^[ \t]*:((?:\w|[-_])+):[ \t]*$/m;
Expand Down Expand Up @@ -1915,7 +2012,8 @@ const affiliatedRe = new RegExp(
'i'
);

const footnoteRe = /\[fn:(?:(?<label_inline>[-_\w]+)?(?<inline>:)|(?<label>[-_\w]+)\])/;
const footnoteRe =
/\[fn:(?:(?<label_inline>[-_\w]+)?(?<inline>:)|(?<label>[-_\w]+)\])/;
const footnoteDefinitionRe = /^\[fn:([-_\w]+)\]/;
const footnoteDefinitionSeparatorRe = /^\*|^\[fn:([-_\w]+)\]|^([ \t]*\n){2,}/m;

Expand Down
11 changes: 11 additions & 0 deletions packages/uniorg-parse/src/reader.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { Point, Position } from 'unist';
import { VFile } from 'vfile';
import { location } from 'vfile-location';

Expand Down Expand Up @@ -166,4 +167,14 @@ export class Reader {
}
}
}
public toPoint(offset: number): Point {
return this.#location.toPoint(offset);
}

public toPosition(begin: number, end: number): Position {
return {
start: this.toPoint(begin),
end: this.toPoint(end),
};
}
}

0 comments on commit 286e3c4

Please sign in to comment.