Skip to content

Commit

Permalink
Merge pull request #2 from essentialkaos/develop
Browse files Browse the repository at this point in the history
Version 0.0.2
  • Loading branch information
andyone authored Jan 31, 2020
2 parents b4fcc39 + 29bc20d commit 3fa5a77
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 19 deletions.
6 changes: 5 additions & 1 deletion common/uc.spec
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

Summary: Simple utility for counting unique lines
Name: uc
Version: 0.0.1
Version: 0.0.2
Release: 0%{?dist}
Group: Applications/System
License: EKOL
Expand Down Expand Up @@ -57,5 +57,9 @@ rm -rf %{buildroot}
################################################################################

%changelog
* Fri Jan 31 2020 Anton Novojilov <[email protected]> - 0.0.2-0
- Added option -m/--max for defining maximum unique lines to process
- Added option -d/--dist for printing data distribution

* Thu Jan 30 2020 Anton Novojilov <[email protected]> - 0.0.1-0
- Initial build
109 changes: 91 additions & 18 deletions uc.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"hash/crc64"
"os"
"runtime"
"sort"
"sync"
"time"

Expand All @@ -21,6 +22,7 @@ import (
"pkg.re/essentialkaos/ek.v11/fsutil"
"pkg.re/essentialkaos/ek.v11/options"
"pkg.re/essentialkaos/ek.v11/signal"
"pkg.re/essentialkaos/ek.v11/strutil"
"pkg.re/essentialkaos/ek.v11/usage"
"pkg.re/essentialkaos/ek.v11/usage/completion/bash"
"pkg.re/essentialkaos/ek.v11/usage/completion/fish"
Expand All @@ -33,24 +35,33 @@ import (
// Application basic info
const (
APP = "uc"
VER = "0.0.1"
VER = "0.0.2"
DESC = "Tool for counting unique lines"
)

// Constants with options names
const (
OPT_NO_PROGRESS = "np:no-progress"
OPT_NO_COLOR = "nc:no-color"
OPT_HELP = "h:help"
OPT_VER = "v:version"
OPT_MAX_LINES = "m:max"
OPT_DISTRIBUTION = "d:dist"
OPT_NO_PROGRESS = "np:no-progress"
OPT_NO_COLOR = "nc:no-color"
OPT_HELP = "h:help"
OPT_VER = "v:version"

OPT_COMPLETION = "completion"
)

// ////////////////////////////////////////////////////////////////////////////////// //

// MAX_SAMPLE_SIZE is maximum sample size
const MAX_SAMPLE_SIZE = 512

// ////////////////////////////////////////////////////////////////////////////////// //

// Stats contains data info
type Stats struct {
Data map[uint64]uint32
Counters map[uint64]uint32 // crc64 → num
Samples map[uint64]string // crc64 → sample (512 symbols)
LastReadLines uint64
LastReadBytes float64
TotalReadLines uint64
Expand All @@ -63,12 +74,30 @@ type Stats struct {

// ////////////////////////////////////////////////////////////////////////////////// //

// LineInfo is struct with line info
type LineInfo struct {
CRC uint64
Num uint32
}

type linesSlice []LineInfo

func (s linesSlice) Len() int { return len(s) }
func (s linesSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s linesSlice) Less(i, j int) bool {
return s[i].Num < s[j].Num
}

// ////////////////////////////////////////////////////////////////////////////////// //

// optMap is map with options
var optMap = options.Map{
OPT_NO_PROGRESS: {Type: options.BOOL},
OPT_NO_COLOR: {Type: options.BOOL},
OPT_HELP: {Type: options.BOOL, Alias: "u:usage"},
OPT_VER: {Type: options.BOOL, Alias: "ver"},
OPT_MAX_LINES: {Type: options.INT},
OPT_DISTRIBUTION: {Type: options.BOOL},
OPT_NO_PROGRESS: {Type: options.BOOL},
OPT_NO_COLOR: {Type: options.BOOL},
OPT_HELP: {Type: options.BOOL, Alias: "u:usage"},
OPT_VER: {Type: options.BOOL, Alias: "ver"},

OPT_COMPLETION: {},
}
Expand Down Expand Up @@ -142,8 +171,8 @@ func processData(input string) {
var r *bufio.Reader

stats = &Stats{
Data: make(map[uint64]uint32),
mx: &sync.Mutex{},
Counters: make(map[uint64]uint32),
mx: &sync.Mutex{},
}

if input == "-" {
Expand All @@ -165,6 +194,12 @@ func processData(input string) {
// readData reads data
func readData(s *bufio.Scanner) {
ct := crc64.MakeTable(crc64.ECMA)
dist := options.GetB(OPT_DISTRIBUTION)
maxLines := options.GetI(OPT_MAX_LINES)

if dist {
stats.Samples = make(map[uint64]string)
}

stats.LastReadDate = time.Now()

Expand All @@ -174,13 +209,31 @@ func readData(s *bufio.Scanner) {

for s.Scan() {
data := s.Bytes()
dataLen := float64(len(data))
dataCrc := crc64.Checksum(data, ct)

stats.mx.Lock()

stats.Data[crc64.Checksum(data, ct)]++
stats.LastReadBytes += float64(len(data))
stats.Counters[dataCrc]++
stats.LastReadBytes += dataLen
stats.LastReadLines++

stats.TotalReadLines++
stats.TotalReadBytes += dataLen

if dist {
_, exist := stats.Samples[dataCrc]

if !exist {
stats.Samples[dataCrc] = strutil.Substr(string(data), 0, MAX_SAMPLE_SIZE)
}
}

if maxLines > 0 && len(stats.Counters) == maxLines {
stats.mx.Unlock()
break
}

stats.mx.Unlock()
}

Expand All @@ -200,9 +253,6 @@ func printProgress() {
dur := now.Sub(stats.LastReadDate)
readSpeed := stats.LastReadBytes / dur.Seconds()

stats.TotalReadLines += stats.LastReadLines
stats.TotalReadBytes += stats.LastReadBytes

fmtc.TPrintf(
"{s}%12s/s {s-}|{s} %-12s {s-}|{s} %12s/s {s-}|{s} %-12s{!}",
fmtutil.PrettyNum(stats.LastReadLines),
Expand All @@ -225,11 +275,30 @@ func printResults() {

stats.Finished = true

fmtc.TPrintln(len(stats.Data))
if options.GetB(OPT_DISTRIBUTION) {
printDistribution()
} else {
fmtc.TPrintln(len(stats.Counters))
}

stats.mx.Unlock()
}

// printDistribution prints distrubution info
func printDistribution() {
var distData linesSlice

for crc, num := range stats.Counters {
distData = append(distData, LineInfo{crc, num})
}

sort.Sort(sort.Reverse(distData))

for _, info := range distData {
fmtc.TPrintf(" %7d %s\n", info.Num, stats.Samples[info.CRC])
}
}

// signalHandler is signal handler
func signalHandler() {
printResults()
Expand All @@ -252,12 +321,16 @@ func showUsage() {
func genUsage() *usage.Info {
info := usage.NewInfo(APP, "file")

info.AddOption(OPT_DISTRIBUTION, "Show number of occurrences for every line")
info.AddOption(OPT_MAX_LINES, "Max number of unique lines {s-}(default: 5000){!}", "num")
info.AddOption(OPT_NO_PROGRESS, "Disable progress output")
info.AddOption(OPT_NO_PROGRESS, "Disable progress output")
info.AddOption(OPT_NO_COLOR, "Disable colors in output")
info.AddOption(OPT_HELP, "Show this help message")
info.AddOption(OPT_VER, "Show version")

info.AddExample("file.txt", "Count unique lines in file.txt")
info.AddExample("-d file.txt", "Show distribution for file.txt")
info.AddRawExample(
"cat file.txt | "+APP+" -",
"Count unique lines in stdin data",
Expand Down

0 comments on commit 3fa5a77

Please sign in to comment.