Skip to content

Commit 38dbb1a

Browse files
authored
feat(text): add ability for user to control word comparison function (denoland#5448)
1 parent dff594b commit 38dbb1a

File tree

3 files changed

+42
-29
lines changed

3 files changed

+42
-29
lines changed

text/closest_string.ts

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,30 @@
22
// This module is browser compatible.
33
import { levenshteinDistance } from "./levenshtein_distance.ts";
44

5-
// NOTE: this metric may change in future versions (e.g. better than levenshteinDistance)
6-
const getWordDistance = levenshteinDistance;
5+
/** Options for {@linkcode closestString}. */
6+
export interface ClosestStringOptions {
7+
/**
8+
* Whether the distance should include case.
9+
*
10+
* @default {false}
11+
*/
12+
caseSensitive?: boolean;
13+
/**
14+
* A custom comparison function to use for comparing strings.
15+
*
16+
* @param a The first string for comparison.
17+
* @param b The second string for comparison.
18+
* @returns The distance between the two strings.
19+
* @default {levenshteinDistance}
20+
*/
21+
compareFn?: (a: string, b: string) => number;
22+
}
723

824
/**
925
* Finds the most similar string from an array of strings.
1026
*
11-
* Note: the ordering of words may change with version-updates
12-
* E.g. word-distance metric may change (improve)
13-
* use a named-distance (e.g. levenshteinDistance) to
14-
* guarantee a particular ordering.
27+
* By default, calculates the distance between words using the
28+
* {@link https://en.wikipedia.org/wiki/Levenshtein_distance | Levenshtein distance}.
1529
*
1630
* @example Usage
1731
* ```ts
@@ -26,23 +40,20 @@ const getWordDistance = levenshteinDistance;
2640
*
2741
* @param givenWord The string to measure distance against
2842
* @param possibleWords The string-array to pick the closest string from
29-
* @param options An options bag containing a `caseSensitive` flag indicating
30-
* whether the distance should include case. Default is false.
43+
* @param options The options for the comparison.
3144
* @returns The closest string
3245
*/
3346
export function closestString(
3447
givenWord: string,
3548
possibleWords: ReadonlyArray<string>,
36-
options?: {
37-
caseSensitive?: boolean;
38-
},
49+
options?: ClosestStringOptions,
3950
): string {
4051
if (possibleWords.length === 0) {
4152
throw new TypeError(
4253
"When using closestString(), the possibleWords array must contain at least one word",
4354
);
4455
}
45-
const { caseSensitive } = { ...options };
56+
const { caseSensitive, compareFn = levenshteinDistance } = { ...options };
4657

4758
if (!caseSensitive) {
4859
givenWord = givenWord.toLowerCase();
@@ -52,13 +63,12 @@ export function closestString(
5263
let closestStringDistance = Infinity;
5364
for (const each of possibleWords) {
5465
const distance = caseSensitive
55-
? getWordDistance(givenWord, each)
56-
: getWordDistance(givenWord, each.toLowerCase());
66+
? compareFn(givenWord, each)
67+
: compareFn(givenWord, each.toLowerCase());
5768
if (distance < closestStringDistance) {
5869
nearestWord = each;
5970
closestStringDistance = distance;
6071
}
6172
}
62-
// this distance metric could be swapped/improved in the future
6373
return nearestWord;
6474
}

text/compare_similarity.ts

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// This module is browser compatible.
33
import { levenshteinDistance } from "./levenshtein_distance.ts";
44

5-
// Note: this metric may change in future versions (e.g. better than levenshteinDistance)
6-
const getWordDistance = levenshteinDistance;
7-
85
/** Options for {@linkcode compareSimilarity}. */
96
export interface CompareSimilarityOptions {
107
/**
@@ -13,16 +10,23 @@ export interface CompareSimilarityOptions {
1310
* @default {false}
1411
*/
1512
caseSensitive?: boolean;
13+
/**
14+
* A custom comparison function to use for comparing strings.
15+
*
16+
* @param a The first string for comparison.
17+
* @param b The second string for comparison.
18+
* @returns The distance between the two strings.
19+
* @default {levenshteinDistance}
20+
*/
21+
compareFn?: (a: string, b: string) => number;
1622
}
1723

1824
/**
1925
* Takes a string and generates a comparator function to determine which of two
2026
* strings is more similar to the given one.
2127
*
22-
* Note: the ordering of words may change with version-updates
23-
* E.g. word-distance metric may change (improve)
24-
* use a named-distance (e.g. levenshteinDistance) to
25-
* guarantee a particular ordering.
28+
* By default, calculates the distance between words using the
29+
* {@link https://en.wikipedia.org/wiki/Levenshtein_distance | Levenshtein distance}.
2630
*
2731
* @param givenWord The string to measure distance against.
2832
* @param options Options for the sort.
@@ -48,12 +52,13 @@ export function compareSimilarity(
4852
givenWord: string,
4953
options?: CompareSimilarityOptions,
5054
): (a: string, b: string) => number {
55+
const { compareFn = levenshteinDistance } = { ...options };
5156
if (options?.caseSensitive) {
5257
return (a: string, b: string) =>
53-
getWordDistance(givenWord, a) - getWordDistance(givenWord, b);
58+
compareFn(givenWord, a) - compareFn(givenWord, b);
5459
}
5560
givenWord = givenWord.toLowerCase();
5661
return (a: string, b: string) =>
57-
getWordDistance(givenWord, a.toLowerCase()) -
58-
getWordDistance(givenWord, b.toLowerCase());
62+
compareFn(givenWord, a.toLowerCase()) -
63+
compareFn(givenWord, b.toLowerCase());
5964
}

text/word_similarity_sort.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@ export interface WordSimilaritySortOptions extends CompareSimilarityOptions {}
1111
/**
1212
* Sorts a string-array by similarity to a given string.
1313
*
14-
* Note: the ordering of words may change with version-updates
15-
* E.g. word-distance metric may change (improve)
16-
* use a named-distance (e.g. levenshteinDistance) to
17-
* guarantee a particular ordering.
14+
* By default, calculates the distance between words using the
15+
* {@link https://en.wikipedia.org/wiki/Levenshtein_distance | Levenshtein distance}.
1816
*
1917
* @example Basic usage
2018
*

0 commit comments

Comments
 (0)