From 1450a1030a65fa3ef0ce77639b1a875130f1847b Mon Sep 17 00:00:00 2001 From: Aaron Burrow Date: Sun, 16 Jun 2019 03:51:00 -0400 Subject: [PATCH] Add ClosestMatch.id This field keeps IDs unique across multiple invocations of `ClosestMatch::Add()`. --- closestmatch.go | 25 +++++++++++++++++++------ closestmatch_test.go | 23 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/closestmatch.go b/closestmatch.go index e933625..87def95 100755 --- a/closestmatch.go +++ b/closestmatch.go @@ -3,6 +3,7 @@ package closestmatch import ( "compress/gzip" "encoding/json" + "math" "math/rand" "os" "sort" @@ -17,6 +18,7 @@ type ClosestMatch struct { SubstringSizes []int SubstringToID map[string]map[uint32]struct{} ID map[uint32]IDInfo + IDx uint32 mux sync.Mutex } @@ -28,19 +30,25 @@ type IDInfo struct { // New returns a new structure for performing closest matches func New(possible []string, subsetSize []int) *ClosestMatch { + if len(possible)-1 > math.MaxUint32 { + panic("List is too long. ClosestMatch::id will overflow.") + } + cm := new(ClosestMatch) cm.SubstringSizes = subsetSize cm.SubstringToID = make(map[string]map[uint32]struct{}) cm.ID = make(map[uint32]IDInfo) - for i, s := range possible { + cm.IDx = 0 + for _, s := range possible { substrings := cm.splitWord(strings.ToLower(s)) - cm.ID[uint32(i)] = IDInfo{Key: s, NumSubstrings: len(substrings)} + cm.ID[cm.IDx] = IDInfo{Key: s, NumSubstrings: len(substrings)} for substring := range substrings { if _, ok := cm.SubstringToID[substring]; !ok { cm.SubstringToID[substring] = make(map[uint32]struct{}) } - cm.SubstringToID[substring][uint32(i)] = struct{}{} + cm.SubstringToID[substring][cm.IDx] = struct{}{} } + cm.IDx++ } return cm @@ -67,16 +75,21 @@ func Load(filename string) (*ClosestMatch, error) { // Add more words to ClosestMatch structure func (cm *ClosestMatch) Add(possible []string) { + if len(possible)-1 > math.MaxUint32 || uint32(len(possible)-1) > math.MaxUint32-cm.IDx { + panic("List is too long. ClosestMatch::id will overflow.") + } + cm.mux.Lock() - for i, s := range possible { + for _, s := range possible { substrings := cm.splitWord(strings.ToLower(s)) - cm.ID[uint32(i)] = IDInfo{Key: s, NumSubstrings: len(substrings)} + cm.ID[cm.IDx] = IDInfo{Key: s, NumSubstrings: len(substrings)} for substring := range substrings { if _, ok := cm.SubstringToID[substring]; !ok { cm.SubstringToID[substring] = make(map[uint32]struct{}) } - cm.SubstringToID[substring][uint32(i)] = struct{}{} + cm.SubstringToID[substring][cm.IDx] = struct{}{} } + cm.IDx++ } cm.mux.Unlock() } diff --git a/closestmatch_test.go b/closestmatch_test.go index 1a19789..e77518a 100755 --- a/closestmatch_test.go +++ b/closestmatch_test.go @@ -197,3 +197,26 @@ func TestSaveLoad(t *testing.T) { t.Errorf("Differing answers: '%s' '%s'", answer1, answer2) } } + +func TestMultipleAddInvocations(t *testing.T) { + cm := New([]string{}, []int{2}) + for _, x := range []string{"uppermost", "up", "uppity"} { + cm.Add([]string{x}) + } + if cm.Closest("uppermost") != "uppermost" { + t.Errorf("Should have been an exact match.") + } +} + +func TestAddAfterLoad(t *testing.T) { + cm := New([]string{"Darth", "Vader", "loves", "Doritos"}, []int{2}) + cm.Save("test/vader.cm.gz") + cm, err := Load("test/vader.cm.gz") + if err != nil { + t.Errorf("Load should succeed") + } + cm.Add([]string{"Elephant"}) + if cm.Closest("Darth") != "Darth" { + t.Errorf("Should have been an exact match.") + } +}