Skip to content

Commit

Permalink
Add Optimal String Alignment (OSA) Distance Algorithm (#464)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kalkwst authored Sep 21, 2024
1 parent 6b37d04 commit 5eb0254
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 0 deletions.
74 changes: 74 additions & 0 deletions Algorithms.Tests/Strings/Similarity/OptimalStringAlignmentTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using Algorithms.Strings.Similarity;
using FluentAssertions;
using NUnit.Framework;
using System;

namespace Algorithms.Tests.Strings.Similarity
{
[TestFixture]
public class OptimalStringAlignmentTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsZero()
{
var result = OptimalStringAlignment.Calculate("example", "example");
result.Should().Be(0.0);
}

[Test]
public void Calculate_FirstStringEmpty_ReturnsLengthOfSecondString()
{
var result = OptimalStringAlignment.Calculate("", "example");
result.Should().Be("example".Length);
}

[Test]
public void Calculate_SecondStringEmpty_ReturnsLengthOfFirstString()
{
var result = OptimalStringAlignment.Calculate("example", "");
result.Should().Be("example".Length);
}

[Test]
public void Calculate_BothStringsEmpty_ReturnsZero()
{
var result = OptimalStringAlignment.Calculate("", "");
result.Should().Be(0.0);
}

[Test]
public void Calculate_OneInsertion_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "examples");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneDeletion_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("examples", "example");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneSubstitution_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "exbmple");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneTransposition_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "exmaple");
result.Should().Be(1.0);
}

[Test]
public void Calculate_MultipleOperations_ReturnsCorrectDistance()
{
var result = OptimalStringAlignment.Calculate("kitten", "sitting");
result.Should().Be(3.0);
}
}
}
157 changes: 157 additions & 0 deletions Algorithms/Strings/Similarity/OptimalStringAlignment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
using System;

namespace Algorithms.Strings.Similarity
{
/// <summary>
/// Provides methods to calculate the Optimal String Alignment distance between two strings.
///
/// The Optimal String Alignment distance, also known as the restricted Damerau-Levenshtein distance,
/// is a string metric used to measure the difference between two sequences. It is similar to the
/// Levenshtein distance, but it also considers transpositions (swapping of two adjacent characters)
/// as a single operation. This metric is particularly useful when adjacent characters are commonly
/// transposed, such as in typographical errors.
///
/// The OSA distance between two strings is defined as the minimum number of operations required to
/// transform one string into the other, where the operations include:
///
/// 1. Insertion: Adding a single character.
/// 2. Deletion: Removing a single character.
/// 3. Substitution: Replacing one character with another.
/// 4. Transposition: Swapping two adjacent characters (this is what distinguishes OSA from the
/// traditional Levenshtein distance).
///
/// The OSA distance algorithm ensures that no operation is applied more than once to the same
/// character in the same position. This is the main difference between the OSA and the more general
/// Damerau-Levenshtein distance, which does not have this restriction.
///
/// <example>
/// Example Usage:
/// <code>
/// int distance = OptimalStringAlignmentDistance("example", "exmaple");
/// Console.WriteLine(distance); // Output: 1
/// </code>
/// In this example, the strings "example" and "exmaple" differ by one transposition of adjacent characters ('a' and 'm'),
/// so the OSA distance is 1.
///
/// <code>
/// int distance = OptimalStringAlignmentDistance("kitten", "sitting");
/// Console.WriteLine(distance); // Output: 3
/// </code>
/// Here, the strings "kitten" and "sitting" have three differences (substitutions 'k' to 's', 'e' to 'i', and insertion of 'g'),
/// resulting in an OSA distance of 3.
/// </example>
/// </summary>
/// <remarks>
/// This algorithm has a time complexity of O(n * m), where n and m are the lengths of the two input strings.
/// It is efficient for moderate-sized strings but may become computationally expensive for very long strings.
/// </remarks>
public static class OptimalStringAlignment
{
/// <summary>
/// Calculates the Optimal String Alignment distance between two strings.
/// </summary>
/// <param name="firstString">The first string.</param>
/// <param name="secondString">The second string.</param>
/// <returns>The Optimal String Alignment distance between the two strings.</returns>
/// <exception cref="ArgumentNullException">Thrown when either of the input strings is null.</exception>
public static double Calculate(string firstString, string secondString)
{
ArgumentNullException.ThrowIfNull(nameof(firstString));
ArgumentNullException.ThrowIfNull(nameof(secondString));

if (firstString == secondString)
{
return 0.0;
}

if (firstString.Length == 0)
{
return secondString.Length;
}

if (secondString.Length == 0)
{
return firstString.Length;
}

var distanceMatrix = GenerateDistanceMatrix(firstString.Length, secondString.Length);
distanceMatrix = CalculateDistance(firstString, secondString, distanceMatrix);

return distanceMatrix[firstString.Length, secondString.Length];
}

/// <summary>
/// Generates the initial distance matrix for the given lengths of the two strings.
/// </summary>
/// <param name="firstLength">The length of the first string.</param>
/// <param name="secondLength">The length of the second string.</param>
/// <returns>The initialized distance matrix.</returns>
private static int[,] GenerateDistanceMatrix(int firstLength, int secondLength)
{
var distanceMatrix = new int[firstLength + 2, secondLength + 2];

for (var i = 0; i <= firstLength; i++)
{
distanceMatrix[i, 0] = i;
}

for (var j = 0; j <= secondLength; j++)
{
distanceMatrix[0, j] = j;
}

return distanceMatrix;
}

/// <summary>
/// Calculates the distance matrix for the given strings using the Optimal String Alignment algorithm.
/// </summary>
/// <param name="firstString">The first string.</param>
/// <param name="secondString">The second string.</param>
/// <param name="distanceMatrix">The initial distance matrix.</param>
/// <returns>The calculated distance matrix.</returns>
private static int[,] CalculateDistance(string firstString, string secondString, int[,] distanceMatrix)
{
for (var i = 1; i <= firstString.Length; i++)
{
for (var j = 1; j <= secondString.Length; j++)
{
var cost = 1;

if (firstString[i - 1] == secondString[j - 1])
{
cost = 0;
}

distanceMatrix[i, j] = Minimum(
distanceMatrix[i - 1, j - 1] + cost, // substitution
distanceMatrix[i, j - 1] + 1, // insertion
distanceMatrix[i - 1, j] + 1); // deletion

if (i > 1 && j > 1
&& firstString[i - 1] == secondString[j - 2]
&& firstString[i - 2] == secondString[j - 1])
{
distanceMatrix[i, j] = Math.Min(
distanceMatrix[i, j],
distanceMatrix[i - 2, j - 2] + cost); // transposition
}
}
}

return distanceMatrix;
}

/// <summary>
/// Returns the minimum of three integers.
/// </summary>
/// <param name="a">The first integer.</param>
/// <param name="b">The second integer.</param>
/// <param name="c">The third integer.</param>
/// <returns>The minimum of the three integers.</returns>
private static int Minimum(int a, int b, int c)
{
return Math.Min(a, Math.Min(b, c));
}
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ find more than one implementation for the same objective but using different alg
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)
* [Optimal String Alignment](./Algorithms/Strings/Similarity/OptimalStringAlignment.cs)
* [Pattern Matching](./Algorithms/Strings/PatternMatching/)
* [Bitop Pattern Matching](./Algorithms/Strings/PatternMatching/Bitap.cs)
* [Naive String Search](./Algorithms/Strings/PatternMatching/NaiveStringSearch.cs)
Expand Down

0 comments on commit 5eb0254

Please sign in to comment.