Skip to content

Commit

Permalink
refactor(parser): prep to generate static parser
Browse files Browse the repository at this point in the history
Sorry for the commented out code; as you can see, I copied the lexer
generator and will drop in code to generate the static parser there.

Added some comments noting stuff I want to do.

Renamed `parser/src/parse.ts` to `parser/src/definition.ts` so that I
can write the static parser to `parser/src/parse.ts`.

Also changed `parseWithTable` to take an augmented grammar.
  • Loading branch information
wincent committed Aug 8, 2023
1 parent d6e0a68 commit cc8a4ba
Show file tree
Hide file tree
Showing 17 changed files with 113 additions and 43 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ node_modules: yarn.lock $(PACKAGE_JSON)
@yarn
@touch $@

.PHONY: parser
parser: packages/parser/lib/bin/generate.js
@node packages/parser/lib/bin/generate.js

$(TSC_SENTINEL): $(PACKAGE_JSON) $(TS_CONFIG) $(TS_SRC) node_modules
@yarn run build # runs: tsc --build
@touch $(TSC_SENTINEL)
Expand Down
3 changes: 3 additions & 0 deletions packages/lexer/src/Token.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// TODO: consider using integer IDs for tokens instead of names, to make
// comparisons cheaper in the parser. Can look up by token ID in an array
// instead of in a map by name.
export default class Token {
name: string;
start: number;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import lex from '@masochist/lexer';
import {promises as fs} from 'fs';
import path from 'path';

import {table, grammar} from '../parse';
import {table, grammar} from '../definition';
import parseWithTable, {makeNode} from '../parseWithTable';

import type {ParseTree} from '../parseWithTable';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {unaugmentedGrammar} from '../definition';
import extendedGrammarForItemSets from '../extendedGrammarForItemSets';
import getItemSets from '../getItemSets';
import {grammar} from '../parse';
import stringifyGrammar from '../stringifyGrammar';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';

Expand Down Expand Up @@ -123,8 +123,11 @@ describe('extendedGrammarForItemSets()', () => {
});

it('returns an extended grammar for the GraphQL grammar', () => {
const itemSets = getItemSets(grammar);
const extendedGrammar = extendedGrammarForItemSets(itemSets, grammar);
const itemSets = getItemSets(unaugmentedGrammar);
const extendedGrammar = extendedGrammarForItemSets(
itemSets,
unaugmentedGrammar,
);
expect('\n' + stringifyGrammar(extendedGrammar)).toMatchInlineSnapshot(`
"
%token 0/FRAGMENT/11
Expand Down
4 changes: 2 additions & 2 deletions packages/parser/src/__tests__/getFirstSets-test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import {dedent} from '@masochist/common';

import {unaugmentedGrammar} from '../definition';
import extendedGrammarForItemSets from '../extendedGrammarForItemSets';
import getFirstSets from '../getFirstSets';
import getItemSets from '../getItemSets';
import {grammar} from '../parse';
import stringifySymbolSets from '../stringifySymbolSets';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';

Expand Down Expand Up @@ -97,7 +97,7 @@ describe('getFirstSets()', () => {
});

it('produces first sets for the GraphQL grammar', () => {
expect('\n' + stringifySymbolSets(getFirstSets(grammar)))
expect('\n' + stringifySymbolSets(getFirstSets(unaugmentedGrammar)))
.toMatchInlineSnapshot(`
"
Alias : {FRAGMENT, NAME, ON}
Expand Down
11 changes: 7 additions & 4 deletions packages/parser/src/__tests__/getFollowSets-test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import {dedent} from '@masochist/common';

import {unaugmentedGrammar} from '../definition';
import extendedGrammarForItemSets from '../extendedGrammarForItemSets';
import getFollowSets from '../getFollowSets';
import getItemSets from '../getItemSets';
import {grammar} from '../parse';
import stringifySymbolSets from '../stringifySymbolSets';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';

Expand Down Expand Up @@ -97,7 +97,7 @@ describe('getFollowSets()', () => {
});

it('produces follow sets for the GraphQL grammar', () => {
expect('\n' + stringifySymbolSets(getFollowSets(grammar)))
expect('\n' + stringifySymbolSets(getFollowSets(unaugmentedGrammar)))
.toMatchInlineSnapshot(`
"
Alias : {FRAGMENT, NAME, ON}
Expand Down Expand Up @@ -159,8 +159,11 @@ describe('getFollowSets()', () => {
});

it('produces follow sets for the extended GraphQL grammar', () => {
const itemSets = getItemSets(grammar);
const extendedGrammar = extendedGrammarForItemSets(itemSets, grammar);
const itemSets = getItemSets(unaugmentedGrammar);
const extendedGrammar = extendedGrammarForItemSets(
itemSets,
unaugmentedGrammar,
);
expect('\n' + stringifySymbolSets(getFollowSets(extendedGrammar)))
.toMatchInlineSnapshot(`
"
Expand Down
2 changes: 1 addition & 1 deletion packages/parser/src/__tests__/getItemSets-test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {dedent} from '@masochist/common';

import {itemSets} from '../definition';
import getItemSets from '../getItemSets';
import {itemSets} from '../parse';
import stringifyItemSets from '../stringifyItemSets';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';

Expand Down
5 changes: 2 additions & 3 deletions packages/parser/src/__tests__/getParseTable-test.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import {dedent} from '@masochist/common';

import {grammar, table} from '../definition';
import getAugmentedGrammar from '../getAugmentedGrammar';
import getItemSets from '../getItemSets';
import getParseTable from '../getParseTable';
import itemSetsToTransitionTable from '../itemSetsToTransitionTable';
import {grammar, table} from '../parse';
import stringifyGrammar from '../stringifyGrammar';
import stringifyParseTable from '../stringifyParseTable';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';
Expand Down Expand Up @@ -430,8 +430,7 @@ describe('getParseTable()', () => {
`);

// Just to make the above actually readable...
expect('\n' + stringifyGrammar(getAugmentedGrammar(grammar)))
.toMatchInlineSnapshot(`
expect('\n' + stringifyGrammar(grammar)).toMatchInlineSnapshot(`
"
%token AMPERSAND
%token AT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {itemSets, unaugmentedGrammar} from '../definition';
import getItemSets from '../getItemSets';
import itemSetsToTransitionTable from '../itemSetsToTransitionTable';
import {itemSets, grammar} from '../parse';
import stringifyTransitionTable from '../stringifyTransitionTable';
import {epsilonGrammar, subsetGrammar, toyGrammar} from './grammars';

Expand Down Expand Up @@ -107,8 +107,8 @@ describe('itemSetsToTransitionTable()', () => {
expect(
'\n' +
stringifyTransitionTable(
itemSetsToTransitionTable(itemSets, grammar),
grammar,
itemSetsToTransitionTable(itemSets, unaugmentedGrammar),
unaugmentedGrammar,
),
).toMatchInlineSnapshot(`
"
Expand Down
12 changes: 8 additions & 4 deletions packages/parser/src/__tests__/parseWithTable-test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {Token} from '@masochist/lexer';

import getAugmentedGrammar from '../getAugmentedGrammar';
import getItemSets from '../getItemSets';
import getParseTable from '../getParseTable';
import itemSetsToTransitionTable from '../itemSetsToTransitionTable';
Expand All @@ -10,6 +11,7 @@ import type {ParseTree} from '../parseWithTable';

describe('parseWithTable()', () => {
it('parses samples for the toy grammar', () => {
const augmentedGrammar = getAugmentedGrammar(toyGrammar);
const itemSets = getItemSets(toyGrammar);
const transitionTable = itemSetsToTransitionTable(itemSets, toyGrammar);
const table = getParseTable(itemSets, transitionTable, toyGrammar);
Expand All @@ -27,7 +29,7 @@ describe('parseWithTable()', () => {
expect(tokens[3].contents).toBe('10');

expect(
parseWithTable<ParseTree>(table, tokens, toyGrammar, makeNode),
parseWithTable<ParseTree>(table, tokens, augmentedGrammar, makeNode),
).toEqual({
kind: 'S',
children: [
Expand Down Expand Up @@ -66,6 +68,7 @@ describe('parseWithTable()', () => {
});

it('parses samples for the subset grammar', () => {
const augmentedGrammar = getAugmentedGrammar(subsetGrammar);
const itemSets = getItemSets(subsetGrammar);
const transitionTable = itemSetsToTransitionTable(itemSets, subsetGrammar);
const table = getParseTable(itemSets, transitionTable, subsetGrammar);
Expand All @@ -85,7 +88,7 @@ describe('parseWithTable()', () => {
expect(tokens[3].contents).toBe('baz');

expect(
parseWithTable<ParseTree>(table, tokens, subsetGrammar, makeNode),
parseWithTable<ParseTree>(table, tokens, augmentedGrammar, makeNode),
).toEqual({
kind: 'Document',
children: [
Expand Down Expand Up @@ -136,6 +139,7 @@ describe('parseWithTable()', () => {
});

it('parses samples for the epsilon grammar', () => {
const augmentedGrammar = getAugmentedGrammar(epsilonGrammar);
const itemSets = getItemSets(epsilonGrammar);
const transitionTable = itemSetsToTransitionTable(itemSets, epsilonGrammar);
const table = getParseTable(itemSets, transitionTable, epsilonGrammar);
Expand All @@ -156,7 +160,7 @@ describe('parseWithTable()', () => {
expect(tokens[3].contents).toBe('}');

expect(
parseWithTable<ParseTree>(table, tokens, epsilonGrammar, makeNode),
parseWithTable<ParseTree>(table, tokens, augmentedGrammar, makeNode),
).toEqual({
kind: 'S',
children: [
Expand Down Expand Up @@ -192,7 +196,7 @@ describe('parseWithTable()', () => {
expect(tokens[2].contents).toBe('}');

expect(
parseWithTable<ParseTree>(table, tokens, epsilonGrammar, makeNode),
parseWithTable<ParseTree>(table, tokens, augmentedGrammar, makeNode),
).toEqual({
kind: 'S',
children: [
Expand Down
39 changes: 39 additions & 0 deletions packages/parser/src/bin/generate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Generates the parser implementation and writes it to src/parse.ts.
*
* Run this with `make parser`.
*/

// import {print} from '@masochist/codegen';
// import path from 'path';
// import {promises as fs} from 'fs';

// import definition from '../definition';
// import build from '../build';
//
// import type {Stats} from '../build';

async function main() {
// const stats: Stats = {};
// const ast = build(definition, stats);
// const source = print(ast);
// const file = path.join(__dirname, '..', '..', 'src', 'parse.ts');
//
// // We write only if different, for the sake of Make...
// let current;
// try {
// current = await fs.readFile(file, 'utf8');
// } catch {
// // Doesn't exist.
// }
// if (current !== source) {
// await fs.writeFile(file, source, 'utf8');
// }
//
// console.table(stats);
}

main().catch((error) => {
console.log(error);
process.exit(1);
});
28 changes: 28 additions & 0 deletions packages/parser/src/definition.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import fs from 'fs';
import path from 'path';

import getAugmentedGrammar from './getAugmentedGrammar';
import getItemSets from './getItemSets';
import getParseTable from './getParseTable';
import itemSetsToTransitionTable from './itemSetsToTransitionTable';
import parseDSL from './parseDSL';

const grammarDeclaration = fs.readFileSync(
path.join(__dirname, '..', 'src', 'graphql.grammar'),
'utf8',
);

// Numerous tests rely on the unaugmented grammar.
export const unaugmentedGrammar = parseDSL(grammarDeclaration);

export const grammar = getAugmentedGrammar(unaugmentedGrammar);
export const itemSets = getItemSets(unaugmentedGrammar);
export const transitionTable = itemSetsToTransitionTable(
itemSets,
unaugmentedGrammar,
);
export const table = getParseTable(
itemSets,
transitionTable,
unaugmentedGrammar,
);
2 changes: 1 addition & 1 deletion packages/parser/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export {grammar, itemSets, transitionTable, table} from './parse';
export {grammar, itemSets, transitionTable, table} from './definition';

export {default as parseWithTable, makeNode} from './parseWithTable';
17 changes: 0 additions & 17 deletions packages/parser/src/parse.ts

This file was deleted.

5 changes: 1 addition & 4 deletions packages/parser/src/parseWithTable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ import {StringScanner, invariant} from '@masochist/common';
import {Token} from '@masochist/lexer';
import vm from 'vm';

import getAugmentedGrammar from './getAugmentedGrammar';

import type {ParseTable} from './getParseTable';
import type {Grammar} from './types';

Expand Down Expand Up @@ -54,7 +52,6 @@ export default function parseWithTable<P>(
): P | Token | null {
const EOF = new Token('$', -1, -1, '');

const augmentedGrammar = getAugmentedGrammar(grammar);
const stack: Array<[P | Token | null, number]> = [[null, 0]];
let pointer = 0;

Expand Down Expand Up @@ -87,7 +84,7 @@ export default function parseWithTable<P>(
stack.push([token, action.state]);
pointer++;
} else if (action.kind === 'Reduce') {
const {lhs, rhs, action: code} = augmentedGrammar.rules[action.rule];
const {lhs, rhs, action: code} = grammar.rules[action.rule];
const popped: Array<P | Token | null> = [];
invariant(stack.length > rhs.length);
for (let i = 0; i < rhs.length; i++) {
Expand Down
7 changes: 7 additions & 0 deletions packages/parser/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ export type Grammar = {
export type Rule = {
lhs: string;
rhs: Array<string>;

/**
* A semantic action (JS code) used to produce an AST node.
*
* Not to be confused with the accept/reduce/shift `Actions` in the
* `ParseTable` type.
*/
action?: string;
};

Expand Down

0 comments on commit cc8a4ba

Please sign in to comment.