-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
62 lines (51 loc) · 888 Bytes
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package main
import (
"bufio"
"fmt"
"log"
"os"
"github.com/adrg/strutil"
"github.com/adrg/strutil/metrics"
)
func main() {
f, err := os.Open("m.txt")
if err != nil {
log.Fatal(err)
}
defer f.Close()
rows := make([]string, 0)
scanner := bufio.NewScanner(f)
buf := make([]byte, 0, 64*1024)
scanner.Buffer(buf, 1024*1024)
for scanner.Scan() {
r := scanner.Text()
if len(r) > 50 {
r = r[50:]
}
rows = append(rows, r)
if len(rows) >= 150000 {
break
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
m := make(map[string]int)
for _, row := range rows {
found := false
for k, v := range m {
similarity := strutil.Similarity(row, k, metrics.NewHamming())
if similarity >= 0.5 {
m[k] = v + 1
found = true
break
}
}
if !found {
m[row] = 1
}
}
for k, v := range m {
fmt.Printf("%d;%s\n", v, k)
}
}