Skip to content

Commit

Permalink
Major improvements to transformers (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment authored Sep 2, 2023
1 parent 14bbd9b commit 4c6193f
Show file tree
Hide file tree
Showing 50 changed files with 35,391 additions and 30,914 deletions.
4 changes: 2 additions & 2 deletions scripts/perf.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { DFA, JS, NFA, Transformers, combineTransformers, transform } from "../src";
import { CombinedTransformer, DFA, JS, NFA, Transformers, transform } from "../src";
import { PrismRegexes } from "../tests/helper/prism-regex-data";
import { performance } from "perf_hooks";
import { logDurations } from "./util";
Expand Down Expand Up @@ -45,7 +45,7 @@ function perfTest(): void {
measure("toLiteral fast", () => JS.toLiteral(expression, { fastCharacters: true }));

const finalExpression = measure("transformers", () => {
const applyTransformer = combineTransformers([
const applyTransformer = new CombinedTransformer([
Transformers.inline(),
Transformers.removeDeadBranches(),
Transformers.removeUnnecessaryAssertions(),
Expand Down
39 changes: 38 additions & 1 deletion src/ast-analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,43 @@ export function isPotentiallyEmpty(node: NoParent<Node> | NoParent<Concatenation
}
}

/**
* Returns whether there all of the given nodes are guaranteed to consume at least one character.
*
* If no unknown nodes are present, then this function will return `!isPotentiallyZeroLength(node)`.
*
* @param node
*/
export function alwaysConsumesCharacters(node: NoParent<Node> | NoParent<Concatenation>[]): boolean {
if (Array.isArray(node)) {
return node.every(alwaysConsumesCharacters);
}

switch (node.type) {
case "Alternation":
case "Expression":
return alwaysConsumesCharacters(node.alternatives);

case "Assertion":
return false;

case "CharacterClass":
return true;

case "Concatenation":
return node.elements.some(alwaysConsumesCharacters);

case "Quantifier":
return node.min > 0 && alwaysConsumesCharacters(node.alternatives);

case "Unknown":
return false;

default:
assertNever(node);
}
}

/**
* Returns whether the given assertion will always trivially accept regardless of the input string and other RE AST
* nodes in the regular expression.
Expand Down Expand Up @@ -875,7 +912,7 @@ function firstConsumedCharConcat(iter: Iterable<Readonly<FirstConsumedChar>>, ma
*
* @param first
*/
function firstConsumedToLook(first: Readonly<FirstConsumedChar>): FirstLookChar {
export function firstConsumedToLook(first: Readonly<FirstConsumedChar>): FirstLookChar {
if (first.empty) {
// We have 2 cases:
// (1) (?=a|(?=b))
Expand Down
218 changes: 157 additions & 61 deletions src/ast/transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ import { visitAst } from "./visit";
* the given AST.
*/
export interface Transformer {
/**
* An optional name useful for diagnostics.
*/
readonly name?: string;
onAlternation?(node: NoParent<Alternation>, context: TransformContext): void;
onAssertion?(node: NoParent<Assertion>, context: TransformContext): void;
onCharacterClass?(node: NoParent<CharacterClass>, context: TransformContext): void;
Expand All @@ -47,14 +51,81 @@ export interface TransformContext {
readonly signalMutation: () => void;
}

const TRANSFORMER_KEYS: readonly (keyof Transformer)[] = [
"onAlternation",
"onAssertion",
"onCharacterClass",
"onConcatenation",
"onExpression",
"onQuantifier",
];
/**
* A transformer that runs all given transformers in sequentially order.
*/
export class CombinedTransformer implements Transformer {
readonly name = "CombinedTransformer";
readonly transformers: readonly Transformer[];

constructor(transformers: Iterable<Transformer>) {
const list: Transformer[] = [];
for (const t of transformers) {
if (t instanceof CombinedTransformer) {
list.push(...t.transformers);
} else {
list.push(t);
}
}
this.transformers = list;
}

onAlternation(node: NoParent<Alternation>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onAlternation) {
t.onAlternation(node, context);
}
}
}

onAssertion(node: NoParent<Assertion>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onAssertion) {
t.onAssertion(node, context);
}
}
}

onCharacterClass(node: NoParent<CharacterClass>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onCharacterClass) {
t.onCharacterClass(node, context);
}
}
}

onConcatenation(node: NoParent<Concatenation>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onConcatenation) {
t.onConcatenation(node, context);
}
}
}

onExpression(node: NoParent<Expression>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onExpression) {
t.onExpression(node, context);
}
}
}

onQuantifier(node: NoParent<Quantifier>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onQuantifier) {
t.onQuantifier(node, context);
}
}
}

onUnknown(node: NoParent<Unknown>, context: TransformContext): void {
for (const t of this.transformers) {
if (t.onUnknown) {
t.onUnknown(node, context);
}
}
}
}
/**
* Creates a new transformer that performs all given transformers in sequentially order.
*
Expand All @@ -64,47 +135,29 @@ const TRANSFORMER_KEYS: readonly (keyof Transformer)[] = [
* The given iterable can be changed and reused after this function returns.
*
* @param transformers
* @deprecated Use `new CombinedTransformer(transformers)` instead.
*/
export function combineTransformers(transformers: Iterable<Transformer>): Transformer {
type OnFunction = (path: never, context: TransformContext) => void;
const functionLists: Partial<Record<keyof Transformer, OnFunction[]>> = {};

for (const t of transformers) {
for (const key of TRANSFORMER_KEYS) {
const fn = t[key];
if (fn) {
const list = (functionLists[key] = functionLists[key] ?? []);
list.push(fn.bind(t));
}
}
}

function toFunction(key: keyof Transformer): OnFunction | undefined {
const list = functionLists[key];
if (list === undefined || list.length === 0) {
return undefined;
} else if (list.length === 1) {
return list[0];
} else {
return function (path, context) {
for (const fn of list) {
fn(path, context);
}
};
}
}

return {
onAlternation: toFunction("onAlternation"),
onAssertion: toFunction("onAssertion"),
onCharacterClass: toFunction("onCharacterClass"),
onConcatenation: toFunction("onConcatenation"),
onExpression: toFunction("onExpression"),
onQuantifier: toFunction("onQuantifier"),
onUnknown: toFunction("onUnknown"),
};
export function combineTransformers(transformers: Iterable<Transformer>): CombinedTransformer {
return new CombinedTransformer(transformers);
}

export interface TransformEvents {
/**
* An optional callback that will be called at the start of every pass.
*
* @param ast The AST that will be transformed.
* @param pass The number of the pass that will be performed. Starts at `1`.
*/
onPassStart?: (ast: NoParent<Expression>, pass: number) => void;
/**
* An optional callback that will be called every time a transformer mutates the AST.
*
* @param ast The AST that was transformed.
* @param node The node that was mutated by the transformer. Descendants of this node may have been mutated as well.
* @param transformer The transformer that mutated the AST.
*/
onChange?: (ast: NoParent<Expression>, node: NoParent<Node>, transformer: Transformer) => void;
}
export interface TransformOptions {
/**
* The maximum number of times the transformer will be applied to the AST.
Expand All @@ -115,6 +168,11 @@ export interface TransformOptions {
* @default 10
*/
maxPasses?: number;

/**
* Optional events to observe the transformation process.
*/
events?: TransformEvents;
}

/**
Expand All @@ -131,16 +189,18 @@ export function transform(
ast: NoParent<Expression>,
options?: Readonly<TransformOptions>
): NoParent<Expression> {
options = options ?? {};
let passesLeft = options.maxPasses ?? 10;
const { maxPasses = 10, events } = options ?? {};

const context: Context = {
transformer,
ast,
maxCharacter: determineMaxCharacter(ast),
events: events,
};

for (; passesLeft >= 1; passesLeft--) {
for (let i = 1; i <= maxPasses; i++) {
events?.onPassStart?.(ast, i);

if (!transformPass(context)) {
break;
}
Expand Down Expand Up @@ -174,24 +234,60 @@ interface Context {
transformer: Transformer;
ast: NoParent<Expression>;
maxCharacter: Char;
events: TransformEvents | undefined;
}

function transformPass({ transformer, ast, maxCharacter }: Context): boolean {
function transformPass({ transformer, ast, maxCharacter, events }: Context): boolean {
let changed = false;
const transformerContext: TransformContext = {
maxCharacter,
signalMutation() {
changed = true;
},
let leaveNode: (node: NoParent<Node>) => void;

const transformers = transformer instanceof CombinedTransformer ? transformer.transformers : [transformer];
const byKey: Record<`on${Node["type"]}`, Transformer[]> = {
onAlternation: transformers.filter(t => t.onAlternation),
onAssertion: transformers.filter(t => t.onAssertion),
onCharacterClass: transformers.filter(t => t.onCharacterClass),
onConcatenation: transformers.filter(t => t.onConcatenation),
onExpression: transformers.filter(t => t.onExpression),
onQuantifier: transformers.filter(t => t.onQuantifier),
onUnknown: transformers.filter(t => t.onUnknown),
};

function leaveNode(node: NoParent<Node>): void {
const fnName = "on" + node.type;
if (events?.onChange) {
let changedPrivate = false;
const transformerContext: TransformContext = {
maxCharacter,
signalMutation() {
changed = changedPrivate = true;
},
};

leaveNode = node => {
const fnName = `on${node.type}` as const;

const fn = transformer[fnName as keyof Transformer];
if (fn) {
fn(node as never, transformerContext);
}
for (const t of byKey[fnName]) {
changedPrivate = false;
t[fnName]!(node as never, transformerContext);
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (changedPrivate) {
events.onChange!(ast, node, t);
}
}
};
} else {
const transformerContext: TransformContext = {
maxCharacter,
signalMutation() {
changed = true;
},
};

leaveNode = node => {
const fnName = `on${node.type}` as const;

for (const t of byKey[fnName]) {
t[fnName]!(node as never, transformerContext);
}
};
}

visitAst(ast, {
Expand Down
11 changes: 5 additions & 6 deletions src/ast/visit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@ export interface VisitNoParentAstHandler {
export function visitAst(node: Node, visitor: VisitAstHandler): void;
export function visitAst(node: NoParent<Node>, visitor: VisitNoParentAstHandler): void;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export function visitAst(node: NoParent<Node>, visitor: Record<string, any>): void {
type Fn = (node: NoParent<Node>) => void;
const enter: Fn | undefined = visitor["on" + node.type + "Enter"];
export function visitAst(node: NoParent<Node>, visitor: VisitNoParentAstHandler): void {
const enter = visitor[`on${node.type}Enter`];
if (enter) {
enter(node);
enter(node as never);
}

switch (node.type) {
Expand All @@ -80,8 +79,8 @@ export function visitAst(node: NoParent<Node>, visitor: Record<string, any>): vo
break;
}

const leave: Fn | undefined = visitor["on" + node.type + "Leave"];
const leave = visitor[`on${node.type}Leave`];
if (leave) {
leave(node);
leave(node as never);
}
}
Loading

0 comments on commit 4c6193f

Please sign in to comment.