-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Optimal String Alignment (OSA) Distance Algorithm (#464)
- Loading branch information
Showing
3 changed files
with
232 additions
and
0 deletions.
There are no files selected for viewing
74 changes: 74 additions & 0 deletions
74
Algorithms.Tests/Strings/Similarity/OptimalStringAlignmentTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
using Algorithms.Strings.Similarity; | ||
using FluentAssertions; | ||
using NUnit.Framework; | ||
using System; | ||
|
||
namespace Algorithms.Tests.Strings.Similarity | ||
{ | ||
[TestFixture] | ||
public class OptimalStringAlignmentTests | ||
{ | ||
[Test] | ||
public void Calculate_IdenticalStrings_ReturnsZero() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("example", "example"); | ||
result.Should().Be(0.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_FirstStringEmpty_ReturnsLengthOfSecondString() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("", "example"); | ||
result.Should().Be("example".Length); | ||
} | ||
|
||
[Test] | ||
public void Calculate_SecondStringEmpty_ReturnsLengthOfFirstString() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("example", ""); | ||
result.Should().Be("example".Length); | ||
} | ||
|
||
[Test] | ||
public void Calculate_BothStringsEmpty_ReturnsZero() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("", ""); | ||
result.Should().Be(0.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_OneInsertion_ReturnsOne() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("example", "examples"); | ||
result.Should().Be(1.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_OneDeletion_ReturnsOne() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("examples", "example"); | ||
result.Should().Be(1.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_OneSubstitution_ReturnsOne() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("example", "exbmple"); | ||
result.Should().Be(1.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_OneTransposition_ReturnsOne() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("example", "exmaple"); | ||
result.Should().Be(1.0); | ||
} | ||
|
||
[Test] | ||
public void Calculate_MultipleOperations_ReturnsCorrectDistance() | ||
{ | ||
var result = OptimalStringAlignment.Calculate("kitten", "sitting"); | ||
result.Should().Be(3.0); | ||
} | ||
} | ||
} |
157 changes: 157 additions & 0 deletions
157
Algorithms/Strings/Similarity/OptimalStringAlignment.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
using System; | ||
|
||
namespace Algorithms.Strings.Similarity | ||
{ | ||
/// <summary> | ||
/// Provides methods to calculate the Optimal String Alignment distance between two strings. | ||
/// | ||
/// The Optimal String Alignment distance, also known as the restricted Damerau-Levenshtein distance, | ||
/// is a string metric used to measure the difference between two sequences. It is similar to the | ||
/// Levenshtein distance, but it also considers transpositions (swapping of two adjacent characters) | ||
/// as a single operation. This metric is particularly useful when adjacent characters are commonly | ||
/// transposed, such as in typographical errors. | ||
/// | ||
/// The OSA distance between two strings is defined as the minimum number of operations required to | ||
/// transform one string into the other, where the operations include: | ||
/// | ||
/// 1. Insertion: Adding a single character. | ||
/// 2. Deletion: Removing a single character. | ||
/// 3. Substitution: Replacing one character with another. | ||
/// 4. Transposition: Swapping two adjacent characters (this is what distinguishes OSA from the | ||
/// traditional Levenshtein distance). | ||
/// | ||
/// The OSA distance algorithm ensures that no operation is applied more than once to the same | ||
/// character in the same position. This is the main difference between the OSA and the more general | ||
/// Damerau-Levenshtein distance, which does not have this restriction. | ||
/// | ||
/// <example> | ||
/// Example Usage: | ||
/// <code> | ||
/// int distance = OptimalStringAlignmentDistance("example", "exmaple"); | ||
/// Console.WriteLine(distance); // Output: 1 | ||
/// </code> | ||
/// In this example, the strings "example" and "exmaple" differ by one transposition of adjacent characters ('a' and 'm'), | ||
/// so the OSA distance is 1. | ||
/// | ||
/// <code> | ||
/// int distance = OptimalStringAlignmentDistance("kitten", "sitting"); | ||
/// Console.WriteLine(distance); // Output: 3 | ||
/// </code> | ||
/// Here, the strings "kitten" and "sitting" have three differences (substitutions 'k' to 's', 'e' to 'i', and insertion of 'g'), | ||
/// resulting in an OSA distance of 3. | ||
/// </example> | ||
/// </summary> | ||
/// <remarks> | ||
/// This algorithm has a time complexity of O(n * m), where n and m are the lengths of the two input strings. | ||
/// It is efficient for moderate-sized strings but may become computationally expensive for very long strings. | ||
/// </remarks> | ||
public static class OptimalStringAlignment | ||
{ | ||
/// <summary> | ||
/// Calculates the Optimal String Alignment distance between two strings. | ||
/// </summary> | ||
/// <param name="firstString">The first string.</param> | ||
/// <param name="secondString">The second string.</param> | ||
/// <returns>The Optimal String Alignment distance between the two strings.</returns> | ||
/// <exception cref="ArgumentNullException">Thrown when either of the input strings is null.</exception> | ||
public static double Calculate(string firstString, string secondString) | ||
{ | ||
ArgumentNullException.ThrowIfNull(nameof(firstString)); | ||
ArgumentNullException.ThrowIfNull(nameof(secondString)); | ||
|
||
if (firstString == secondString) | ||
{ | ||
return 0.0; | ||
} | ||
|
||
if (firstString.Length == 0) | ||
{ | ||
return secondString.Length; | ||
} | ||
|
||
if (secondString.Length == 0) | ||
{ | ||
return firstString.Length; | ||
} | ||
|
||
var distanceMatrix = GenerateDistanceMatrix(firstString.Length, secondString.Length); | ||
distanceMatrix = CalculateDistance(firstString, secondString, distanceMatrix); | ||
|
||
return distanceMatrix[firstString.Length, secondString.Length]; | ||
} | ||
|
||
/// <summary> | ||
/// Generates the initial distance matrix for the given lengths of the two strings. | ||
/// </summary> | ||
/// <param name="firstLength">The length of the first string.</param> | ||
/// <param name="secondLength">The length of the second string.</param> | ||
/// <returns>The initialized distance matrix.</returns> | ||
private static int[,] GenerateDistanceMatrix(int firstLength, int secondLength) | ||
{ | ||
var distanceMatrix = new int[firstLength + 2, secondLength + 2]; | ||
|
||
for (var i = 0; i <= firstLength; i++) | ||
{ | ||
distanceMatrix[i, 0] = i; | ||
} | ||
|
||
for (var j = 0; j <= secondLength; j++) | ||
{ | ||
distanceMatrix[0, j] = j; | ||
} | ||
|
||
return distanceMatrix; | ||
} | ||
|
||
/// <summary> | ||
/// Calculates the distance matrix for the given strings using the Optimal String Alignment algorithm. | ||
/// </summary> | ||
/// <param name="firstString">The first string.</param> | ||
/// <param name="secondString">The second string.</param> | ||
/// <param name="distanceMatrix">The initial distance matrix.</param> | ||
/// <returns>The calculated distance matrix.</returns> | ||
private static int[,] CalculateDistance(string firstString, string secondString, int[,] distanceMatrix) | ||
{ | ||
for (var i = 1; i <= firstString.Length; i++) | ||
{ | ||
for (var j = 1; j <= secondString.Length; j++) | ||
{ | ||
var cost = 1; | ||
|
||
if (firstString[i - 1] == secondString[j - 1]) | ||
{ | ||
cost = 0; | ||
} | ||
|
||
distanceMatrix[i, j] = Minimum( | ||
distanceMatrix[i - 1, j - 1] + cost, // substitution | ||
distanceMatrix[i, j - 1] + 1, // insertion | ||
distanceMatrix[i - 1, j] + 1); // deletion | ||
|
||
if (i > 1 && j > 1 | ||
&& firstString[i - 1] == secondString[j - 2] | ||
&& firstString[i - 2] == secondString[j - 1]) | ||
{ | ||
distanceMatrix[i, j] = Math.Min( | ||
distanceMatrix[i, j], | ||
distanceMatrix[i - 2, j - 2] + cost); // transposition | ||
} | ||
} | ||
} | ||
|
||
return distanceMatrix; | ||
} | ||
|
||
/// <summary> | ||
/// Returns the minimum of three integers. | ||
/// </summary> | ||
/// <param name="a">The first integer.</param> | ||
/// <param name="b">The second integer.</param> | ||
/// <param name="c">The third integer.</param> | ||
/// <returns>The minimum of the three integers.</returns> | ||
private static int Minimum(int a, int b, int c) | ||
{ | ||
return Math.Min(a, Math.Min(b, c)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters