From 4c9ad8f17b92fdad2e2adefadc4b96322f50ad4f Mon Sep 17 00:00:00 2001 From: Samuel Attard Date: Mon, 23 Sep 2024 06:38:27 -0700 Subject: [PATCH] fix: support double quote string enum (#122) --- src/__tests__/markdown-helpers.spec.ts | 129 ++++++++++++++++++ src/markdown-helpers.ts | 180 ++++++++++++++++++++++--- 2 files changed, 292 insertions(+), 17 deletions(-) diff --git a/src/__tests__/markdown-helpers.spec.ts b/src/__tests__/markdown-helpers.spec.ts index 3a5a590..051d13a 100644 --- a/src/__tests__/markdown-helpers.spec.ts +++ b/src/__tests__/markdown-helpers.spec.ts @@ -104,6 +104,76 @@ def fn(): expect(extractStringEnum('wassup')).toBe(null); }); + it('should error helpfully on invalid value separators', () => { + expect(() => extractStringEnum('Can be `x` sometimes `y')) + .toThrowErrorMatchingInlineSnapshot(` + "Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "s" + Context: \`x\` sometimes \`y + ^" + `); + }); + + it('should error helpfully on unterminated enum strings', () => { + expect(() => extractStringEnum('Can be `x` or `y')).toThrowErrorMatchingInlineSnapshot(` + "Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote? + Context: \`x\` or \`y" + `); + }); + + describe('mixed ticks', () => { + it('should extract an enum when mixed quotes are used', () => { + const values = extractStringEnum('Can be `x"` or "`y"')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x"'); + expect(values[1].value).toBe('`y'); + }); + }); + + describe('deprecated wrappers', () => { + it('should handle strikethrough deprecation wrappers', () => { + const values = extractStringEnum('Can be `x` or ~~`y`~~')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + }); + + describe('lead-in descriptions', () => { + it('should handle value lists that smoothly lead in to prose with a comma', () => { + const values = extractStringEnum('Can be `x` or `y`, where `x` implies that...')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + + it('should handle value lists that smoothly lead in to prose with a fullstop', () => { + const values = extractStringEnum('Can be `x` or `y`. The `x` value implies that...')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + + it('should handle value lists that smoothly lead in to prose with a semicolon', () => { + const values = extractStringEnum('Can be `x` or `y`; the `x` value implies that...')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + + it('should handle value lists that smoothly lead in to prose with a hyphen', () => { + const values = extractStringEnum('Can be `x` or `y` - the `x` value implies that...')!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + }); + describe('with backticks', () => { it('should extract an enum of the format "can be x"', () => { const values = extractStringEnum('Can be `x`')!; @@ -260,6 +330,65 @@ def fn(): }); }); + describe('with double quotes', () => { + it('should extract an enum of the format "can be x"', () => { + const values = extractStringEnum(`Can be "x"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(1); + expect(values[0].value).toBe('x'); + }); + + it('should extract an enum of the format "can be x or y"', () => { + const values = extractStringEnum(`Can be "x" or "y"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + }); + + it('should extract an enum of the format "can be x, y or z"', () => { + const values = extractStringEnum(`Can be "x", "y" or "z"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(3); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + expect(values[2].value).toBe('z'); + }); + + it('should extract an enum of the format "can be x, y, or z"', () => { + const values = extractStringEnum(`Can be "x", "y", or "z"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(3); + expect(values[0].value).toBe('x'); + expect(values[1].value).toBe('y'); + expect(values[2].value).toBe('z'); + }); + + it('should extract an enum of the format "values include a', () => { + const values = extractStringEnum(`Values include "a"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(1); + expect(values[0].value).toBe('a'); + }); + + it('should extract an enum of the format "values include a and b', () => { + const values = extractStringEnum(`Values include "a" and "b"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(2); + expect(values[0].value).toBe('a'); + expect(values[1].value).toBe('b'); + }); + + it('should extract an enum of the format "values include a, b and c', () => { + const values = extractStringEnum(`Values include "a", "b" and "c"`)!; + expect(values).not.toBe(null); + expect(values).toHaveLength(3); + expect(values[0].value).toBe('a'); + expect(values[1].value).toBe('b'); + expect(values[2].value).toBe('c'); + }); + }); + describe('rawTypeToTypeInformation()', () => { it('should map a primitive types correctly', () => { expect(rawTypeToTypeInformation('Boolean', '', null)).toMatchSnapshot(); diff --git a/src/markdown-helpers.ts b/src/markdown-helpers.ts index 6413a10..6c6bb1f 100644 --- a/src/markdown-helpers.ts +++ b/src/markdown-helpers.ts @@ -453,28 +453,174 @@ export enum StripReturnTypeBehavior { DO_NOT_STRIP, } +// All possible value separators, sorted by reverse length to ensure +// that we match the longer comma prefix variants first if they are present +const niceSeparators = [',', 'and', 'or', ', and', ', or'].sort((a, b) => b.length - a.length); +// Some string enums can also be objects, the final phrase is "or an object" and we +// should gracefully terminate in that case +const niceTerminators = [', or an Object', 'or an Object'].sort((a, b) => b.length - a.length); +const suffixesToIgnore = ['(Deprecated)']; + export const extractStringEnum = (description: string): PossibleStringValue[] | null => { - const possibleValues: PossibleStringValue[] = []; - - const inlineValuesPattern = /(?:can be|values? includes?) ((?:(?:[`|'][a-zA-Z0-9-_\.:]+[`|'])(?:(, | )?))*(?:(?:or|and) [`|'][a-zA-Z0-9-_\.:]+[`|'])?)/i; - const inlineMatch = inlineValuesPattern.exec(description); - if (inlineMatch) { - const valueString = inlineMatch[1]; - const valuePattern = /[`|']([a-zA-Z0-9-_\.:]+)[`|']/g; - let value = valuePattern.exec(valueString); - - while (value) { - possibleValues.push({ - value: value[1], - description: '', - }); - value = valuePattern.exec(valueString); + const inlineValuesLocatorPattern = /(?:can be|values? includes?) (.+)/i; + const locatorMatch = inlineValuesLocatorPattern.exec(description); + if (!locatorMatch) return null; + + const valuesTokens = locatorMatch[1].split(''); + + const state = { + // Where are we in the valueTokens array + position: 0, + // What values have we found so far + values: [] as string[], + // The current value we are building, was found wrapped by `currentQuoter` + currentValue: '', + // The quote character that we encountered to start building a value + // We won't stop adding characters to `currentValue` until the same character + // is encountered again + currentQuoter: null as null | string, + // In some cases quoted values are wrapped with other markdown indicators, for + // instance strikethrough ~ characters. This handles those to ensure anything + // we allow as a wrapping character is unwrapped after a value is extracted. + currentQuoterWrappers: [] as string[], + // This is set to true after a value is extracted to allow us to parse out a + // nice separator. For instance a "comma", a complete list is in `niceSeparators` + // above. + expectingNiceSeparator: false, + // This is set after the state machine reaches a point that _could_ be the end, + // an invalid token when this is set to true is not a fatal error rather the + // graceful termination of the state machine. + couldBeDone: false, + }; + const lookAhead = (length: number) => { + return valuesTokens.slice(state.position - 1, state.position + length - 1).join(''); + }; + stringEnumTokenLoop: while (state.position < valuesTokens.length) { + const char = valuesTokens[state.position]; + state.position++; + + if (state.currentQuoter) { + // We should never expect a separator inside a quoted value + if (state.expectingNiceSeparator) { + throw new Error('Impossible state encountered while extracting a string enum'); + } + if (char === state.currentQuoter) { + state.currentQuoter = null; + state.values.push(state.currentValue); + state.currentValue = ''; + state.expectingNiceSeparator = true; + } else { + state.currentValue += char; + } + } else { + // Whitespace can be skipped + if (char === ' ') { + continue stringEnumTokenLoop; + } + + // If we're between values we should be expecting one of the above "nice" + // separators. + if (state.expectingNiceSeparator) { + // Before checking for a separator we need to ensure we have unwrapped any wrapping + // chars + if (state.currentQuoterWrappers.length) { + const expectedUnwrap = state.currentQuoterWrappers.pop(); + if (char !== expectedUnwrap) { + throw new Error( + `Unexpected token while extracting string enum. Expected an unwrapping token that matched "${expectedUnwrap}". But found token: ${char}\nContext: "${ + locatorMatch[1] + }"\n${' '.repeat(8 + state.position)}^`, + ); + } + continue stringEnumTokenLoop; + } + + if (char === '.' || char === ';' || char === '-') { + break stringEnumTokenLoop; + } + + for (const suffix of suffixesToIgnore) { + if (lookAhead(suffix.length) === suffix) { + state.position += suffix.length - 1; + continue stringEnumTokenLoop; + } + } + + for (const niceTerminator of niceTerminators) { + if (lookAhead(niceTerminator.length) === niceTerminator) { + state.position += niceTerminator.length - 1; + state.expectingNiceSeparator = false; + state.couldBeDone = true; + continue stringEnumTokenLoop; + } + } + + for (const niceSeparator of niceSeparators) { + if (lookAhead(niceSeparator.length) === niceSeparator) { + state.position += niceSeparator.length - 1; + state.expectingNiceSeparator = false; + if (niceSeparator === ',') { + state.couldBeDone = true; + } + continue stringEnumTokenLoop; + } + } + throw new Error( + `Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "${char}"\nContext: ${ + locatorMatch[1] + }\n${' '.repeat(8 + state.position)}^`, + ); + } + + if (['"', "'", '`'].includes(char)) { + // Quote chars start a new value + state.currentQuoter = char; + // A new value has started, we no longer could be done on an invalid char + state.couldBeDone = false; + continue stringEnumTokenLoop; + } + if (['~'].includes(char)) { + // Deprecated string enum values are wrapped with strikethrough + state.currentQuoterWrappers.push(char); + continue stringEnumTokenLoop; + } + // If we are at the very start we should just assume our heuristic found something silly + // and bail, 0 valid characters is skip-able + if (state.position === 1) { + return null; + } + // If the last thing we parsed _could_ have been a termination character + // let's assume an invalid character here confirms that. + if (state.couldBeDone) { + break stringEnumTokenLoop; + } + // Anything else is unexpected + throw new Error( + `Unexpected token while extracting string enum. Token: ${char}\nContext: "${ + locatorMatch[1] + }"\n${' '.repeat(9 + state.position)}^`, + ); } + } + + // Reached the end of the description, we should check + // if we are in a clean state (not inside a quote). + // If so we're good, if not hard error + if (state.currentQuoter || state.currentValue) { + throw new Error( + `Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?\nContext: ${locatorMatch[1]}`, + ); + } - return possibleValues.length === 0 ? null : possibleValues; + // No options we should just bail, can't have a string enum with 0 options + if (!state.values.length) { + return null; } - return null; + return state.values.map(value => ({ + value, + description: '', + })); }; export const extractReturnType = (