-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
27 changed files
with
1,237 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
AllCops: | ||
TargetRubyVersion: 2.4 | ||
Metrics/AbcSize: | ||
Enabled: false | ||
Metrics/CyclomaticComplexity: | ||
Enabled: false | ||
Metrics/MethodLength: | ||
Enabled: false | ||
Metrics/PerceivedComplexity: | ||
Enabled: false | ||
Metrics/BlockLength: | ||
Exclude: | ||
- "tasks/**/*.rake" | ||
|
||
Style/StringLiterals: | ||
EnforcedStyle: double_quotes | ||
Layout/AlignParameters: | ||
EnforcedStyle: with_fixed_indentation | ||
Layout/MultilineMethodCallIndentation: | ||
EnforcedStyle: indented | ||
Layout/MultilineOperationIndentation: | ||
EnforcedStyle: indented |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
-m markdown - LICENSE.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
# frozen_string_literal: true | ||
|
||
source "https://rubygems.org" | ||
|
||
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } | ||
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } | ||
|
||
# Specify your gem's dependencies in edits.gemspec | ||
gemspec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
# frozen_string_literal: true | ||
|
||
require "bundler/gem_tasks" | ||
require "rspec/core/rake_task" | ||
|
||
Dir["tasks/**/*.rake"].each { |t| load t } | ||
|
||
RSpec::Core::RakeTask.new(:spec) | ||
|
||
task :default => :spec | ||
task default: :spec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
|
||
require "bundler/setup" | ||
require "edits" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# coding: utf-8 | ||
# frozen_string_literal: true | ||
|
||
lib = File.expand_path("../lib", __FILE__) | ||
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) | ||
require "edits/version" | ||
|
@@ -9,21 +10,12 @@ Gem::Specification.new do |spec| | |
spec.authors = ["Tom Crouch"] | ||
spec.email = ["[email protected]"] | ||
|
||
spec.summary = %q{TODO: Write a short summary, because Rubygems requires one.} | ||
spec.description = %q{TODO: Write a longer description or delete this line.} | ||
spec.homepage = "TODO: Put your gem's website or public repo URL here." | ||
spec.summary = "A collection of edit distance algorithms." | ||
# spec.description = "TODO: Write a longer description or delete this line." | ||
spec.homepage = "https://github.com/tcrouch/edits" | ||
spec.license = "MIT" | ||
|
||
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' | ||
# to allow pushing to a single host or delete this section to allow pushing to any host. | ||
if spec.respond_to?(:metadata) | ||
spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'" | ||
else | ||
raise "RubyGems 2.0 or newer is required to protect against " \ | ||
"public gem pushes." | ||
end | ||
|
||
spec.files = `git ls-files -z`.split("\x0").reject do |f| | ||
spec.files = `git ls-files -z`.split("\x0").reject do |f| | ||
f.match(%r{^(test|spec|features)/}) | ||
end | ||
spec.bindir = "exe" | ||
|
@@ -32,5 +24,8 @@ Gem::Specification.new do |spec| | |
|
||
spec.add_development_dependency "bundler", "~> 1.15" | ||
spec.add_development_dependency "rake", "~> 10.0" | ||
spec.add_development_dependency "rspec", "~> 3.0" | ||
spec.add_development_dependency "rspec", "~> 3.6" | ||
spec.add_development_dependency "benchmark-ips" | ||
spec.add_development_dependency "redcarpet" | ||
spec.add_development_dependency "yard", "~> 0.9.9" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,15 @@ | ||
# frozen_string_literal: true | ||
|
||
require "edits/version" | ||
|
||
require "edits/damerau_levenshtein" | ||
require "edits/hamming" | ||
require "edits/jaro" | ||
require "edits/jaro_winkler" | ||
require "edits/levenshtein" | ||
require "edits/restricted_edit" | ||
|
||
# A collection of edit distance algorithms | ||
module Edits | ||
# Your code goes here... | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# frozen_string_literal: true | ||
|
||
module Edits | ||
# Implemention of the Damerau/Levenshtein distance algorithm. | ||
# | ||
# Determines distance between two strings by counting edits, identifying: | ||
# * Insertion | ||
# * Deletion | ||
# * Substitution | ||
# * Transposition | ||
module DamerauLevenshtein | ||
# Calculate the Damerau/Levenshtein distance of two sequences. | ||
# | ||
# @example | ||
# DamerauLevenshtein.distance("acer", "earn") | ||
# # => 3 | ||
# @param seq1 [String, Array] | ||
# @param seq2 [String, Array] | ||
# @return [Integer] | ||
def self.distance(seq1, seq2) | ||
if seq1.length > seq2.length | ||
temp = seq1 | ||
seq1 = seq2 | ||
seq2 = temp | ||
end | ||
|
||
# array of Integer codepoints outperforms String | ||
seq1 = seq1.codepoints if seq1.is_a? String | ||
seq2 = seq2.codepoints if seq2.is_a? String | ||
|
||
rows = seq1.length | ||
cols = seq2.length | ||
return cols if rows.zero? | ||
return rows if cols.zero? | ||
|
||
# 'infinite' edit distance for padding cost matrix. | ||
# Can be any value greater than max[rows, cols] | ||
inf = rows + cols | ||
|
||
# Initialize first two rows of cost matrix. | ||
# The full initial state where cols=3, rows=2 (inf=5) would be: | ||
# [[5, 5, 5, 5, 5], | ||
# [5, 0, 1, 2, 3], | ||
# [5, 1, 0, 0, 0], | ||
# [5, 2, 0, 0, 0]] | ||
matrix = [Array.new(cols + 2, inf)] | ||
matrix << 0.upto(cols).to_a.unshift(inf) | ||
|
||
# element => last row seen | ||
item_history = Hash.new(0) | ||
|
||
1.upto(rows) do |row| | ||
# generate next row of cost matrix | ||
new_row = Array.new(cols + 2, 0) | ||
new_row[0] = inf | ||
new_row[1] = row | ||
matrix << new_row | ||
|
||
last_match_col = 0 | ||
seq1_item = seq1[row - 1] | ||
|
||
1.upto(cols) do |col| | ||
seq2_item = seq2[col - 1] | ||
last_match_row = item_history[seq2_item] | ||
|
||
sub_cost = seq1_item == seq2_item ? 0 : 1 | ||
|
||
transposition = 1 + matrix[last_match_row][last_match_col] | ||
transposition += row - last_match_row - 1 | ||
transposition += col - last_match_col - 1 | ||
|
||
# TODO: do insertion/deletion need to be considered when | ||
# seq1_item == seq2_item ? | ||
deletion = matrix[row][col + 1] + 1 | ||
insertion = matrix[row + 1][col] + 1 | ||
substitution = matrix[row][col] + sub_cost | ||
|
||
# step cost is min of operation costs | ||
cost = substitution < insertion ? substitution : insertion | ||
cost = deletion if deletion < cost | ||
cost = transposition if transposition < cost | ||
|
||
matrix[row + 1][col + 1] = cost | ||
|
||
last_match_col = col if sub_cost.zero? | ||
end | ||
|
||
item_history[seq1_item] = row | ||
end | ||
|
||
matrix[rows + 1][cols + 1] | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# frozen_string_literal: true | ||
|
||
module Edits | ||
# @see https://en.wikipedia.org/wiki/Hamming_distance | ||
module Hamming | ||
# Calculate the Hamming distance between two sequences. | ||
# | ||
# @note A true distance metric, satisfies triangle inequality. | ||
# | ||
# @param seq1 [String, Array] | ||
# @param seq2 [String, Array] | ||
# @return [Integer] Hamming distance | ||
def self.distance(seq1, seq2) | ||
# if seq1.is_a?(Integer) && seq2.is_a?(Integer) | ||
# return (seq1 ^ seq2).to_s(2).count("1") | ||
# end | ||
|
||
length = seq1.length < seq2.length ? seq1.length : seq2.length | ||
diff = (seq1.length - seq2.length).abs | ||
|
||
length.times.reduce(diff) do |distance, i| | ||
seq1[i] == seq2[i] ? distance : distance + 1 | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.