Skip to content

Commit

Permalink
Add ClosestMatch.id
Browse files Browse the repository at this point in the history
This field keeps IDs unique across multiple invocations of
`ClosestMatch::Add()`.
  • Loading branch information
Aaron Burrow committed Jun 16, 2019
1 parent 1fbe626 commit a83b855
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
25 changes: 19 additions & 6 deletions closestmatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package closestmatch
import (
"compress/gzip"
"encoding/json"
"math"
"math/rand"
"os"
"sort"
Expand All @@ -17,6 +18,7 @@ type ClosestMatch struct {
SubstringSizes []int
SubstringToID map[string]map[uint32]struct{}
ID map[uint32]IDInfo
IDx uint32
mux sync.Mutex
}

Expand All @@ -28,19 +30,25 @@ type IDInfo struct {

// New returns a new structure for performing closest matches
func New(possible []string, subsetSize []int) *ClosestMatch {
if len(possible)-1 > math.MaxUint32 {
panic("List is too long. ClosestMatch::IDx will overflow.")
}

cm := new(ClosestMatch)
cm.SubstringSizes = subsetSize
cm.SubstringToID = make(map[string]map[uint32]struct{})
cm.ID = make(map[uint32]IDInfo)
for i, s := range possible {
cm.IDx = 0
for _, s := range possible {
substrings := cm.splitWord(strings.ToLower(s))
cm.ID[uint32(i)] = IDInfo{Key: s, NumSubstrings: len(substrings)}
cm.ID[cm.IDx] = IDInfo{Key: s, NumSubstrings: len(substrings)}
for substring := range substrings {
if _, ok := cm.SubstringToID[substring]; !ok {
cm.SubstringToID[substring] = make(map[uint32]struct{})
}
cm.SubstringToID[substring][uint32(i)] = struct{}{}
cm.SubstringToID[substring][cm.IDx] = struct{}{}
}
cm.IDx++
}

return cm
Expand All @@ -67,16 +75,21 @@ func Load(filename string) (*ClosestMatch, error) {

// Add more words to ClosestMatch structure
func (cm *ClosestMatch) Add(possible []string) {
if len(possible)-1 > math.MaxUint32 || uint32(len(possible)-1) > math.MaxUint32-cm.IDx {
panic("List is too long. ClosestMatch::IDx will overflow.")
}

cm.mux.Lock()
for i, s := range possible {
for _, s := range possible {
substrings := cm.splitWord(strings.ToLower(s))
cm.ID[uint32(i)] = IDInfo{Key: s, NumSubstrings: len(substrings)}
cm.ID[cm.IDx] = IDInfo{Key: s, NumSubstrings: len(substrings)}
for substring := range substrings {
if _, ok := cm.SubstringToID[substring]; !ok {
cm.SubstringToID[substring] = make(map[uint32]struct{})
}
cm.SubstringToID[substring][uint32(i)] = struct{}{}
cm.SubstringToID[substring][cm.IDx] = struct{}{}
}
cm.IDx++
}
cm.mux.Unlock()
}
Expand Down
23 changes: 23 additions & 0 deletions closestmatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,26 @@ func TestSaveLoad(t *testing.T) {
t.Errorf("Differing answers: '%s' '%s'", answer1, answer2)
}
}

func TestMultipleAddInvocations(t *testing.T) {
cm := New([]string{}, []int{2})
for _, x := range []string{"uppermost", "up", "uppity"} {
cm.Add([]string{x})
}
if cm.Closest("uppermost") != "uppermost" {
t.Errorf("Should have been an exact match.")
}
}

func TestAddAfterLoad(t *testing.T) {
cm := New([]string{"Darth", "Vader", "loves", "Doritos"}, []int{2})
cm.Save("test/vader.cm.gz")
cm, err := Load("test/vader.cm.gz")
if err != nil {
t.Errorf("Load should succeed")
}
cm.Add([]string{"Elephant"})
if cm.Closest("Darth") != "Darth" {
t.Errorf("Should have been an exact match.")
}
}

0 comments on commit a83b855

Please sign in to comment.