From d8e27b2085a6dfeb106007c7b072a4e20c82c81d Mon Sep 17 00:00:00 2001 From: Konstantinos Kalafatis Date: Thu, 1 Aug 2024 21:44:36 +0300 Subject: [PATCH 1/4] Add implementation of the Damerau-Levenshtein Distance Algorithm --- .../Similarity/DamerauLevenshteinDistance.cs | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs diff --git a/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs b/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs new file mode 100644 index 00000000..3ff1f866 --- /dev/null +++ b/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs @@ -0,0 +1,104 @@ +using System; + +namespace Algorithms.Strings.Similarity; + +public class DamerauLevenshteinDistance +{ + /// + /// Calculates the Damerau-Levenshtein distance between two strings. + /// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences. + /// It is calculated as the minimum number of operations needed to transform one sequence into the other. + /// The possible operations are insertion, deletion, substitution, and transposition. + /// + /// The first string. + /// The second string. + /// The Damerau-Levenshtein distance between the two strings. + public static int Calculate(string left, string right) + { + // Get the lengths of the input strings. + var leftSize = left.Length; + var rightSize = right.Length; + + // Initialize a matrix of distances between the two strings. + var distances = InitializeDistanceArray(leftSize, rightSize); + + // Iterate over each character in the left string. + for (var i = 1; i < leftSize + 1; i++) + { + // Iterate over each character in the right string. + for (var j = 1; j < rightSize + 1; j++) + { + // Calculate the cost of the current operation. + // If the characters at the current positions are the same, the cost is 0. + // Otherwise, the cost is 1. + var cost = left[i - 1] == right[j - 1] ? 0 : 1; + + // Calculate the minimum distance by considering three possible operations: + // deletion, insertion, and substitution. + distances[i, j] = Math.Min( + Math.Min( // deletion + distances[i - 1, j] + 1, // delete the character from the left string + distances[i, j - 1] + 1), // insert the character into the right string + distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string + + // If the current character in the left string is the same as the character + // two positions to the left in the right string and the current character + // in the right string is the same as the character one position to the right + // in the left string, then we can also consider a transposition operation. + if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1]) + { + distances[i, j] = Math.Min( + distances[i, j], // current minimum distance + distances[i - 2, j - 2] + cost); // transpose the last two characters + } + } + } + + // Return the distance between the two strings. + return distances[leftSize, rightSize]; + } + + /// + /// Initializes a matrix of distances between two string representations. + /// + /// This method creates a matrix of distances where the dimensions are one larger + /// than the input strings. The first row of the matrix represents the distances + /// when the left string is empty, and the first column represents the distances + /// when the right string is empty. The values in the first row and first column + /// are the lengths of the corresponding strings. + /// + /// The matrix is used by the Damerau-Levenshtein algorithm to calculate the + /// minimum number of single-character edits (insertions, deletions, or substitutions) + /// required to change one word into the other. + /// The matrix is initialized with dimensions one larger than the input strings. + /// The first row of the matrix represents the distances when the left string is empty. + /// The first column of the matrix represents the distances when the right string is empty. + /// The values in the first row and first column are the lengths of the corresponding strings. + /// Initializes a matrix of distances between two strings representations. + /// + /// The size of the left string. + /// The size of the right string. + /// A matrix of distances. + private static int[,] InitializeDistanceArray(int leftSize, int rightSize) + { + // Initialize a matrix of distances with dimensions one larger than the input strings. + var matrix = new int[leftSize + 1, rightSize + 1]; + + // Set the values in the first row to the lengths of the left string. + // This represents the distance when the left string is empty. + for (var i = 1; i < leftSize + 1; i++) + { + matrix[i, 0] = i; + } + + // Set the values in the first column to the lengths of the right string. + // This represents the distance when the right string is empty. + for (var i = 1; i < rightSize + 1; i++) + { + matrix[0, i] = i; + } + + // Return the initialized matrix of distances. + return matrix; + } +} From 66f79a72634639d3939bb201adb8782b831ddca5 Mon Sep 17 00:00:00 2001 From: Konstantinos Kalafatis Date: Thu, 1 Aug 2024 21:44:56 +0300 Subject: [PATCH 2/4] Add tests for Damerau-Levenshtein algorithm --- .../DamerauLevenshteinDistanceTests.cs | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs diff --git a/Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs b/Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs new file mode 100644 index 00000000..5f522aff --- /dev/null +++ b/Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs @@ -0,0 +1,116 @@ +using Algorithms.Strings.Similarity; +using NUnit.Framework; + +namespace Algorithms.Tests.Strings.Similarity; + +[TestFixture] +public class DamerauLevenshteinDistanceTests +{ + [Test] + public void Calculate_IdenticalStrings_ReturnsZero() + { + var str1 = "test"; + var str2 = "test"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0."); + } + + [Test] + public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString() + { + var str1 = "abc"; + var str2 = "xyz"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string."); + } + + [Test] + public void Calculate_OneEmptyString_ReturnsLengthOfOtherString() + { + var str1 = "test"; + var str2 = ""; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string."); + } + + [Test] + public void Calculate_BothEmptyStrings_ReturnsZero() + { + var str1 = ""; + var str2 = ""; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0."); + } + + [Test] + public void Calculate_DifferentLengths_ReturnsCorrectValue() + { + var str1 = "short"; + var str2 = "longer"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_SpecialCharacters_ReturnsCorrectValue() + { + var str1 = "hello!"; + var str2 = "hello?"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_DifferentCases_ReturnsCorrectValue() + { + var str1 = "Hello"; + var str2 = "hello"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_CommonPrefixes_ReturnsCorrectValue() + { + var str1 = "prefix"; + var str2 = "pre"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_CommonSuffixes_ReturnsCorrectValue() + { + var str1 = "suffix"; + var str2 = "fix"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_Transpositions_ReturnsCorrectValue() + { + var str1 = "abcd"; + var str2 = "acbd"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_RepeatedCharacters_ReturnsCorrectValue() + { + var str1 = "aaa"; + var str2 = "aaaaa"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance."); + } + + [Test] + public void Calculate_UnicodeCharacters_ReturnsCorrectValue() + { + var str1 = "こんにちは"; + var str2 = "こんばんは"; + var result = DamerauLevenshteinDistance.Calculate(str1, str2); + Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance."); + } +} From a53a0c88db84aac759690728001ffadc88c6f6fa Mon Sep 17 00:00:00 2001 From: Konstantinos Kalafatis Date: Thu, 1 Aug 2024 21:45:06 +0300 Subject: [PATCH 3/4] Update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c66e20eb..8ed99dc9 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ find more than one implementation for the same objective but using different alg * [String](./Algorithms/Strings) * [Similarity](./Algorithms/Strings/Similarity/) * [Cosine Similarity](./Algorithms/Strings/Similarity/CosineSimilarity.cs) + * [Damerau-Levenshtein Distance](./Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs) * [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs) * [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs) * [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs) From 406e5ad7787b314b182ccf0c5046e0a554a27e01 Mon Sep 17 00:00:00 2001 From: Konstantinos Kalafatis Date: Thu, 1 Aug 2024 22:05:02 +0300 Subject: [PATCH 4/4] Fix Codacy suggestions --- Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs b/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs index 3ff1f866..a00bdae6 100644 --- a/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs +++ b/Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs @@ -2,7 +2,7 @@ namespace Algorithms.Strings.Similarity; -public class DamerauLevenshteinDistance +public static class DamerauLevenshteinDistance { /// /// Calculates the Damerau-Levenshtein distance between two strings.