Skip to content

Commit

Permalink
Merge pull request #91 from essentialkaos/develop
Browse files Browse the repository at this point in the history
Version 3.0.0
  • Loading branch information
andyone authored Feb 19, 2024
2 parents cdc2c3a + 91e9d52 commit b2728b7
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:

strategy:
matrix:
go: [ '1.19.x', '1.20.x' ]
go: [ '1.21.x', '1.22.x' ]

steps:
- name: Checkout
Expand Down
28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,31 @@

[![demo](https://gh.kaos.st/uc-110.gif)](#usage-demo)

### Benchmarks

```
$ wc -l data.txt
18408096 data.txt
$ hyperfine 'sort -u data.txt | wc -l' 'uc -np data.txt'
Benchmark 1: sort -u data.txt | wc -l
Time (mean ± σ): 16.030 s ± 0.181 s [User: 86.713 s, System: 1.165 s]
Range (min … max): 15.699 s … 16.324 s 10 runs
Benchmark 2: uc -np data.txt
Time (mean ± σ): 2.889 s ± 0.101 s [User: 2.435 s, System: 0.454 s]
Range (min … max): 2.721 s … 3.065 s 10 runs
Summary
uc -np data.txt ran
5.55 ± 0.20 times faster than sort -u data.txt | wc -l
```

### Installation

#### From sources

To build the `uc` from scratch, make sure you have a working Go 1.19+ workspace (_[instructions](https://go.dev/doc/install)_), then:
To build the `uc` from scratch, make sure you have a working Go 1.20+ workspace (_[instructions](https://go.dev/doc/install)_), then:

```
go install github.com/essentialkaos/uc@latest
Expand Down Expand Up @@ -82,10 +102,9 @@ Usage: uc {options} file
Options
--dist, -d Show number of occurrences for every line
--dist, -d format Show number of occurrences for every line (-/simple/table/json)
--max, -m num Max number of unique lines
--no-progress, -np Disable progress output
--no-progress, -np Disable progress output
--no-color, -nc Disable colors in output
--help, -h Show this help message
--version, -v Show version
Expand All @@ -98,6 +117,9 @@ Examples
uc -d file.txt
Show distribution for file.txt
uc --dist=table file.txt
Show distribution as a table for file.txt
uc -d -m 5k file.txt
Show distribution for file.txt with 5,000 uniq lines max
Expand Down
104 changes: 79 additions & 25 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ package cli

// ////////////////////////////////////////////////////////////////////////////////// //
// //
// Copyright (c) 2023 ESSENTIAL KAOS //
// Copyright (c) 2024 ESSENTIAL KAOS //
// Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> //
// //
// ////////////////////////////////////////////////////////////////////////////////// //

import (
"bufio"
"fmt"
"hash/crc64"
"os"
"runtime"
"sort"
Expand All @@ -21,7 +20,9 @@ import (

"github.com/essentialkaos/ek/v12/fmtc"
"github.com/essentialkaos/ek/v12/fmtutil"
"github.com/essentialkaos/ek/v12/fmtutil/table"
"github.com/essentialkaos/ek/v12/fsutil"
"github.com/essentialkaos/ek/v12/mathutil"
"github.com/essentialkaos/ek/v12/options"
"github.com/essentialkaos/ek/v12/signal"
"github.com/essentialkaos/ek/v12/strutil"
Expand All @@ -33,6 +34,8 @@ import (
"github.com/essentialkaos/ek/v12/usage/man"
"github.com/essentialkaos/ek/v12/usage/update"

"github.com/cespare/xxhash"

"github.com/essentialkaos/uc/cli/support"
)

Expand All @@ -41,7 +44,7 @@ import (
// Application basic info
const (
APP = "uc"
VER = "2.0.1"
VER = "3.0.0"
DESC = "Tool for counting unique lines"
)

Expand All @@ -68,8 +71,8 @@ const MAX_SAMPLE_SIZE = 512

// Stats contains data info
type Stats struct {
Counters map[uint64]uint32 // crc64 → num
Samples map[uint64]string // crc64 → sample (512 symbols)
Counters map[uint64]uint32 // hash → num
Samples map[uint64][]byte // hash → sample (512 symbols)
LastReadLines uint64
LastReadBytes float64
TotalReadLines uint64
Expand Down Expand Up @@ -101,7 +104,7 @@ func (s linesSlice) Less(i, j int) bool {
// optMap is map with options
var optMap = options.Map{
OPT_MAX_LINES: {Type: options.INT},
OPT_DISTRIBUTION: {Type: options.BOOL},
OPT_DISTRIBUTION: {Type: options.MIXED},
OPT_NO_PROGRESS: {Type: options.BOOL},
OPT_NO_COLOR: {Type: options.BOOL},
OPT_HELP: {Type: options.BOOL},
Expand Down Expand Up @@ -173,6 +176,10 @@ func preConfigureUI() {
fmtc.DisableColors = true
rawMode = true
}

table.FullScreen = false
table.HeaderCapitalize = true
table.BorderSymbol = "–"
}

// configureUI configures user interface
Expand All @@ -195,8 +202,6 @@ func configureUI() {

// processData starts data processing
func processData(args options.Arguments) {
var r *bufio.Reader

stats = &Stats{
Counters: make(map[uint64]uint32),
mx: &sync.Mutex{},
Expand All @@ -205,19 +210,18 @@ func processData(args options.Arguments) {
input := getInput(args)

if input == "-" {
r = bufio.NewReader(os.Stdin)
} else {
fd, err := os.OpenFile(input, os.O_RDONLY, 0)
readData(bufio.NewScanner(os.Stdin))
return
}

if err != nil {
printError(err.Error())
os.Exit(1)
}
fd, err := os.OpenFile(input, os.O_RDONLY, 0)

r = bufio.NewReader(fd)
if err != nil {
printError(err.Error())
os.Exit(1)
}

readData(bufio.NewScanner(r))
readData(bufio.NewScanner(fd))
}

// getInput returns input for reading data
Expand All @@ -239,7 +243,6 @@ func getInput(args options.Arguments) string {

// readData reads data
func readData(s *bufio.Scanner) {
ct := crc64.MakeTable(crc64.ECMA)
dist := options.GetB(OPT_DISTRIBUTION)
maxLines, err := parseMaxLines(options.GetS(OPT_MAX_LINES))

Expand All @@ -249,7 +252,7 @@ func readData(s *bufio.Scanner) {
}

if dist {
stats.Samples = make(map[uint64]string)
stats.Samples = make(map[uint64][]byte)
}

stats.LastReadDate = time.Now()
Expand All @@ -261,7 +264,7 @@ func readData(s *bufio.Scanner) {
for s.Scan() {
data := s.Bytes()
dataLen := float64(len(data))
dataCrc := crc64.Checksum(data, ct)
dataCrc := xxhash.Sum64(data)

stats.mx.Lock()

Expand All @@ -276,7 +279,7 @@ func readData(s *bufio.Scanner) {
_, exist := stats.Samples[dataCrc]

if !exist {
stats.Samples[dataCrc] = strutil.Substr(string(data), 0, MAX_SAMPLE_SIZE)
stats.Samples[dataCrc] = data[:mathutil.Min(len(data), MAX_SAMPLE_SIZE)]
}
}

Expand Down Expand Up @@ -343,11 +346,62 @@ func printDistribution() {
distData = append(distData, LineInfo{crc, num})
}

fmtc.TPrintf("")

sort.Sort(sort.Reverse(distData))

for _, info := range distData {
fmtc.TPrintf(" %7d %s\n", info.Num, stats.Samples[info.CRC])
switch options.GetS(OPT_DISTRIBUTION) {
case "simple":
printDistributionSimple(distData)
case "table":
printDistributionTable(distData)
case "json":
printDistributionJSON(distData)
default:
printDistributionDefault(distData)
}
}

// printDistributionDefault prints distribution info in default format
func printDistributionDefault(data linesSlice) {
for _, info := range data {
fmtc.Printf(" %7d %s\n", info.Num, string(stats.Samples[info.CRC]))
}
}

// printDistributionSimple prints distribution info in simple format
func printDistributionSimple(data linesSlice) {
for _, info := range data {
fmtc.Printf("%d %s\n", info.Num, string(stats.Samples[info.CRC]))
}
}

// printDistributionTable prints distribution info as a table
func printDistributionTable(data linesSlice) {
t := table.NewTable("#", "DATA")

for _, info := range data {
t.Add(fmtutil.PrettyNum(info.Num), string(stats.Samples[info.CRC]))
}

t.Render()
}

// printDistributionTable prints distribution info in JSON format
func printDistributionJSON(data linesSlice) {
fmt.Println("[")

for index, info := range data {
fmt.Printf(` {"num":%d, "data":"%s"}`, info.Num, string(stats.Samples[info.CRC]))

if index+1 != len(data) {
fmt.Println(",")
} else {
fmt.Println("")
}
}

fmt.Println("]")
}

// parseMaxLines parses max line option
Expand Down Expand Up @@ -423,16 +477,16 @@ func genUsage() *usage.Info {

info.AppNameColorTag = colorTagApp

info.AddOption(OPT_DISTRIBUTION, "Show number of occurrences for every line")
info.AddOption(OPT_DISTRIBUTION, "Show number of occurrences for every line {s-}(-/simple/table/json){!}", "?format")
info.AddOption(OPT_MAX_LINES, "Max number of unique lines", "num")
info.AddOption(OPT_NO_PROGRESS, "Disable progress output")
info.AddOption(OPT_NO_PROGRESS, "Disable progress output")
info.AddOption(OPT_NO_COLOR, "Disable colors in output")
info.AddOption(OPT_HELP, "Show this help message")
info.AddOption(OPT_VER, "Show version")

info.AddExample("file.txt", "Count unique lines in file.txt")
info.AddExample("-d file.txt", "Show distribution for file.txt")
info.AddExample("--dist=table file.txt", "Show distribution as a table for file.txt")
info.AddExample("-d -m 5k file.txt", "Show distribution for file.txt with 5,000 uniq lines max")
info.AddRawExample("cat file.txt | "+APP, "Count unique lines in stdin data")

Expand Down
2 changes: 1 addition & 1 deletion cli/support/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package support

// ////////////////////////////////////////////////////////////////////////////////// //
// //
// Copyright (c) 2023 ESSENTIAL KAOS //
// Copyright (c) 2024 ESSENTIAL KAOS //
// Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> //
// //
// ////////////////////////////////////////////////////////////////////////////////// //
Expand Down
38 changes: 36 additions & 2 deletions common/uc.spec
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Summary: Simple utility for counting unique lines
Name: uc
Version: 2.0.1
Version: 3.0.0
Release: 0%{?dist}
Group: Applications/System
License: Apache License, Version 2.0
Expand All @@ -16,7 +16,7 @@ Source0: https://source.kaos.st/%{name}/%{name}-%{version}.tar.bz2

BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)

BuildRequires: golang >= 1.19
BuildRequires: golang >= 1.20

Provides: %{name} = %{version}-%{release}

Expand Down Expand Up @@ -54,6 +54,34 @@ install -pm 755 %{name}/%{name} %{buildroot}%{_bindir}/
%clean
rm -rf %{buildroot}

%post
if [[ -d %{_sysconfdir}/bash_completion.d ]] ; then
%{name} --completion=bash 1> %{_sysconfdir}/bash_completion.d/%{name} 2>/dev/null
fi

if [[ -d %{_datarootdir}/fish/vendor_completions.d ]] ; then
%{name} --completion=fish 1> %{_datarootdir}/fish/vendor_completions.d/%{name}.fish 2>/dev/null
fi

if [[ -d %{_datadir}/zsh/site-functions ]] ; then
%{name} --completion=zsh 1> %{_datadir}/zsh/site-functions/_%{name} 2>/dev/null
fi

%postun
if [[ $1 == 0 ]] ; then
if [[ -f %{_sysconfdir}/bash_completion.d/%{name} ]] ; then
rm -f %{_sysconfdir}/bash_completion.d/%{name} &>/dev/null || :
fi

if [[ -f %{_datarootdir}/fish/vendor_completions.d/%{name}.fish ]] ; then
rm -f %{_datarootdir}/fish/vendor_completions.d/%{name}.fish &>/dev/null || :
fi

if [[ -f %{_datadir}/zsh/site-functions/_%{name} ]] ; then
rm -f %{_datadir}/zsh/site-functions/_%{name} &>/dev/null || :
fi
fi

################################################################################

%files
Expand All @@ -65,6 +93,12 @@ rm -rf %{buildroot}
################################################################################

%changelog
* Mon Feb 19 2024 Anton Novojilov <[email protected]> - 3.0.0-0
- crc64 replaced by xxhash
- Added different output formats for distribution info
- Code refactoring
- Dependencies update

* Tue Dec 19 2023 Anton Novojilov <[email protected]> - 2.0.1-0
- Dependencies update
- Code refactoring
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ module github.com/essentialkaos/uc
go 1.18

require (
github.com/cespare/xxhash v1.1.0
github.com/essentialkaos/depsy v1.1.0
github.com/essentialkaos/ek/v12 v12.92.0
github.com/essentialkaos/ek/v12 v12.100.0
)

require golang.org/x/sys v0.15.0 // indirect
require golang.org/x/sys v0.17.0 // indirect
Loading

0 comments on commit b2728b7

Please sign in to comment.