From 20a94f2ac2a0159331e1e61a0e0967208bbbcb32 Mon Sep 17 00:00:00 2001 From: filipe oliveira Date: Sat, 17 Oct 2020 15:47:29 +0100 Subject: [PATCH] V2 HdrHistogram Log format (v1.3) (#35) * [add] V2 HdrHistogram Log format (v1.3) * [fix] Added nolint to the examples * [fix] Fixed error message typo on minimum length on bytes for 7,8,and 9 bytelen * [fix] Fixed ExampleHistogram_PercentilesPrint --- .gitignore | 4 + Makefile | 4 + example_hdr_test.go | 52 +++++++++ example_log_writer_test.go | 114 +++++++++++++++++++ hdr.go | 30 +++++ hdr_encoding.go | 87 ++++++-------- hdr_encoding_test.go | 17 +-- hdr_encoding_whitebox_test.go | 62 +--------- hdr_test.go | 3 + log_reader.go | 209 ++++++++++++++++++++++++++++++++++ log_writer.go | 141 +++++++++++++++++++++++ log_writer_test.go | 81 +++++++++++++ window_test.go | 1 + zigzag.go | 133 ++++++++++++++++++++++ zigzag_whitebox_test.go | 81 +++++++++++++ 15 files changed, 892 insertions(+), 127 deletions(-) create mode 100644 example_log_writer_test.go create mode 100644 log_reader.go create mode 100644 log_writer.go create mode 100644 log_writer_test.go create mode 100644 zigzag.go create mode 100644 zigzag_whitebox_test.go diff --git a/.gitignore b/.gitignore index 18c2f3a..7588593 100644 --- a/.gitignore +++ b/.gitignore @@ -14,8 +14,12 @@ coverage.txt # Test binary, built with `go test -c` *.test +# Test example output +example.logV2.hlog + # Output of the go coverage tool, specifically when used with LiteIDE *.out # Dependency directories (remove the comment below to include it) # vendor/ + diff --git a/Makefile b/Makefile index d5c92e3..7eca75a 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ GOTEST=$(GOCMD) test GOGET=$(GOCMD) get GOMOD=$(GOCMD) mod GOFMT=$(GOCMD) fmt +GODOC=godoc .PHONY: all test coverage all: test coverage @@ -36,3 +37,6 @@ test: get fmt coverage: get test $(GOTEST) -race -coverprofile=coverage.txt -covermode=atomic . +godoc: + $(GODOC) + diff --git a/example_hdr_test.go b/example_hdr_test.go index 9a53a90..27bf272 100644 --- a/example_hdr_test.go +++ b/example_hdr_test.go @@ -1,13 +1,65 @@ package hdrhistogram_test import ( + "fmt" "github.com/HdrHistogram/hdrhistogram-go" "os" ) +// This latency Histogram could be used to track and analyze the counts of +// observed integer values between 0 us and 30000000 us ( 30 secs ) +// while maintaining a value precision of 4 significant digits across that range, +// translating to a value resolution of : +// - 1 microsecond up to 10 milliseconds, +// - 100 microsecond (or better) from 10 milliseconds up to 10 seconds, +// - 300 microsecond (or better) from 10 seconds up to 30 seconds, +// nolint +func ExampleNew() { + lH := hdrhistogram.New(1, 30000000, 4) + input := []int64{ + 459876, 669187, 711612, 816326, 931423, 1033197, 1131895, 2477317, + 3964974, 12718782, + } + + for _, sample := range input { + lH.RecordValue(sample) + } + + fmt.Printf("Percentile 50: %d\n", lH.ValueAtQuantile(50.0)) + + // Output: + // Percentile 50: 931423 +} + +// This latency Histogram could be used to track and analyze the counts of +// observed integer values between 0 us and 30000000 us ( 30 secs ) +// while maintaining a value precision of 3 significant digits across that range, +// translating to a value resolution of : +// - 1 microsecond up to 1 millisecond, +// - 1 millisecond (or better) up to one second, +// - 1 second (or better) up to it's maximum tracked value ( 30 seconds ). +// nolint +func ExampleHistogram_RecordValue() { + lH := hdrhistogram.New(1, 30000000, 3) + input := []int64{ + 459876, 669187, 711612, 816326, 931423, 1033197, 1131895, 2477317, + 3964974, 12718782, + } + + for _, sample := range input { + lH.RecordValue(sample) + } + + fmt.Printf("Percentile 50: %d\n", lH.ValueAtQuantile(50.0)) + + // Output: + // Percentile 50: 931839 +} + // The following example details the creation of an histogram used to track // and analyze the counts of observed integer values between 0 us and 30000000 us ( 30 secs ) // and the printing of the percentile output format +// nolint func ExampleHistogram_PercentilesPrint() { lH := hdrhistogram.New(1, 30000000, 3) input := []int64{ diff --git a/example_log_writer_test.go b/example_log_writer_test.go new file mode 100644 index 0000000..3438a8e --- /dev/null +++ b/example_log_writer_test.go @@ -0,0 +1,114 @@ +package hdrhistogram_test + +import ( + "bytes" + "fmt" + hdrhistogram "github.com/HdrHistogram/hdrhistogram-go" + "io/ioutil" +) + +// The log format encodes into a single file, multiple histograms with optional shared meta data. +// The following example showcases reading a log file into a slice of histograms +// nolint +func ExampleNewHistogramLogReader() { + dat, _ := ioutil.ReadFile("./test/tagged-Log.logV2.hlog") + r := bytes.NewReader(dat) + + // Create a histogram log reader + reader := hdrhistogram.NewHistogramLogReader(r) + var histograms []*hdrhistogram.Histogram = make([]*hdrhistogram.Histogram, 0) + + // Read all histograms in the file + for hist, err := reader.NextIntervalHistogram(); hist != nil && err == nil; hist, err = reader.NextIntervalHistogram() { + histograms = append(histograms, hist) + } + fmt.Printf("Read a total of %d histograms\n", len(histograms)) + + min := reader.RangeObservedMin() + max := reader.RangeObservedMax() + sigdigits := 3 + overallHistogram := hdrhistogram.New(min, max, sigdigits) + + //// We can then merge all histograms into one and retrieve overall metrics + for _, hist := range histograms { + overallHistogram.Merge(hist) + } + fmt.Printf("Overall count: %d samples\n", overallHistogram.TotalCount()) + fmt.Printf("Overall Percentile 50: %d\n", overallHistogram.ValueAtQuantile(50.0)) + + // Output: + // Read a total of 42 histograms + // Overall count: 32290 samples + // Overall Percentile 50: 344319 + +} + +// The log format encodes into a single file, multiple histograms with optional shared meta data. +// The following example showcases writing multiple histograms into a log file and then +// processing them again to confirm a proper encode-decode flow +// nolint +func ExampleNewHistogramLogWriter() { + var buff bytes.Buffer + + // Create a histogram log writer to write to a bytes.Buffer + writer := hdrhistogram.NewHistogramLogWriter(&buff) + + writer.OutputLogFormatVersion() + writer.OutputStartTime(0) + writer.OutputLegend() + + // Lets create 3 distinct histograms to exemply the logwriter features + // each one with a time-frame of 60 secs ( 60000 ms ) + hist1 := hdrhistogram.New(1, 30000000, 3) + hist1.SetStartTimeMs(0) + hist1.SetEndTimeMs(60000) + for _, sample := range []int64{10, 20, 30, 40} { + hist1.RecordValue(sample) + } + hist2 := hdrhistogram.New(1, 3000, 3) + hist1.SetStartTimeMs(60001) + hist1.SetEndTimeMs(120000) + for _, sample := range []int64{50, 70, 80, 60} { + hist2.RecordValue(sample) + } + hist3 := hdrhistogram.New(1, 30000, 3) + hist1.SetStartTimeMs(120001) + hist1.SetEndTimeMs(180000) + for _, sample := range []int64{90, 100} { + hist3.RecordValue(sample) + } + writer.OutputIntervalHistogram(hist1) + writer.OutputIntervalHistogram(hist2) + writer.OutputIntervalHistogram(hist3) + + ioutil.WriteFile("example.logV2.hlog", buff.Bytes(), 0644) + + // read check + // Lets read all again and confirm that the total sample count is 10 + dat, _ := ioutil.ReadFile("example.logV2.hlog") + r := bytes.NewReader(dat) + + // Create a histogram log reader + reader := hdrhistogram.NewHistogramLogReader(r) + var histograms []*hdrhistogram.Histogram = make([]*hdrhistogram.Histogram, 0) + + // Read all histograms in the file + for hist, err := reader.NextIntervalHistogram(); hist != nil && err == nil; hist, err = reader.NextIntervalHistogram() { + histograms = append(histograms, hist) + } + fmt.Printf("Read a total of %d histograms\n", len(histograms)) + + min := reader.RangeObservedMin() + max := reader.RangeObservedMax() + sigdigits := 3 + overallHistogram := hdrhistogram.New(min, max, sigdigits) + + //// We can then merge all histograms into one and retrieve overall metrics + for _, hist := range histograms { + overallHistogram.Merge(hist) + } + fmt.Printf("Overall count: %d samples\n", overallHistogram.TotalCount()) + // Output: + // Read a total of 3 histograms + // Overall count: 10 samples +} diff --git a/hdr.go b/hdr.go index eeea526..55b449b 100644 --- a/hdr.go +++ b/hdr.go @@ -40,6 +40,33 @@ type Histogram struct { countsLen int32 totalCount int64 counts []int64 + startTimeMs int64 + endTimeMs int64 + tag string +} + +func (h *Histogram) Tag() string { + return h.tag +} + +func (h *Histogram) SetTag(tag string) { + h.tag = tag +} + +func (h *Histogram) EndTimeMs() int64 { + return h.endTimeMs +} + +func (h *Histogram) SetEndTimeMs(endTimeMs int64) { + h.endTimeMs = endTimeMs +} + +func (h *Histogram) StartTimeMs() int64 { + return h.startTimeMs +} + +func (h *Histogram) SetStartTimeMs(startTimeMs int64) { + h.startTimeMs = startTimeMs } // New returns a new Histogram instance capable of tracking values in the given @@ -93,6 +120,9 @@ func New(minValue, maxValue int64, sigfigs int) *Histogram { countsLen: countsLen, totalCount: 0, counts: make([]int64, countsLen), + startTimeMs: 0, + endTimeMs: 0, + tag: "", } } diff --git a/hdr_encoding.go b/hdr_encoding.go index 791fa65..44a6f3c 100644 --- a/hdr_encoding.go +++ b/hdr_encoding.go @@ -1,8 +1,7 @@ -//Histograms are encoded using the HdrHistogram V2 format which is based on an adapted ZigZag LEB128 encoding where: -//consecutive zero counters are encoded as a negative number representing the count of consecutive zeros -//non zero counter values are encoded as a positive number -//An empty histogram (all zeros counters) is encoded in exactly 48 bytes regardless of the counter size. -//A typical histogram (2 digits precision 1 usec to 1 day range) can be encoded in less than the typical MTU size of 1500 bytes. +// Histograms are encoded using the HdrHistogram V2 format which is based on an adapted ZigZag LEB128 encoding where: +// consecutive zero counters are encoded as a negative number representing the count of consecutive zeros +// non zero counter values are encoded as a positive number +// A typical histogram (2 digits precision 1 usec to 1 day range) can be encoded in less than the typical MTU size of 1500 bytes. package hdrhistogram import ( @@ -10,7 +9,6 @@ import ( "compress/zlib" "encoding/base64" "encoding/binary" - "errors" "fmt" "io/ioutil" ) @@ -71,7 +69,9 @@ func (h *Histogram) dumpV2CompressedEncoding() (outBuffer []byte, err error) { // final buffer buf := new(bytes.Buffer) err = binary.Write(buf, binary.BigEndian, compressedEncodingCookie) - + if err != nil { + return + } toCompress, err := h.encodeIntoByteBuffer() if err != nil { return @@ -83,13 +83,22 @@ func (h *Histogram) dumpV2CompressedEncoding() (outBuffer []byte, err error) { if err != nil { return } - w.Write(uncompressedBytes) + _, err = w.Write(uncompressedBytes) + if err != nil { + return + } w.Close() // LengthOfCompressedContents compressedContents := b.Bytes() err = binary.Write(buf, binary.BigEndian, int32(len(compressedContents))) + if err != nil { + return + } err = binary.Write(buf, binary.BigEndian, compressedContents) + if err != nil { + return + } outBuffer = []byte(base64.StdEncoding.EncodeToString(buf.Bytes())) return } @@ -159,21 +168,26 @@ func decodeCompressedFormat(compressedContents []byte, headerSize int) (rh *Hist } actualPayloadLen := decompressedSliceLen - int32(headerSize) if PayloadLength != actualPayloadLen { - err = errors.New(fmt.Sprintf("PayloadLength should have the same size of the actual payload. Got %d want %d", actualPayloadLen, PayloadLength)) + err = fmt.Errorf("PayloadLength should have the same size of the actual payload. Got %d want %d", actualPayloadLen, PayloadLength) return } rh = New(LowestTrackableValue, HighestTrackableValue, int(NumberOfSignificantValueDigits)) payload := decompressedSlice[headerSize:] - fillCountsArrayFromSourceBuffer(payload, rh) + err = fillCountsArrayFromSourceBuffer(payload, rh) return rh, err } -func fillCountsArrayFromSourceBuffer(payload []byte, rh *Histogram) { +func fillCountsArrayFromSourceBuffer(payload []byte, rh *Histogram) (err error) { var payloadSlicePos = 0 var dstIndex int64 = 0 + var n int + var count int64 + var zerosCount int64 for payloadSlicePos < len(payload) { - var zerosCount int64 = 0 - count, n := zig_zag_decode_i64(payload[payloadSlicePos:]) + count, n, err = zig_zag_decode_i64(payload[payloadSlicePos:]) + if err != nil { + return + } payloadSlicePos += n if count < 0 { zerosCount = -count @@ -183,6 +197,7 @@ func fillCountsArrayFromSourceBuffer(payload []byte, rh *Histogram) { dstIndex += 1 } } + return } func (rh *Histogram) fillBufferFromCountsArray() (buffer []byte, err error) { @@ -206,54 +221,20 @@ func (rh *Histogram) fillBufferFromCountsArray() (buffer []byte, err error) { } if zeros > 1 { err = binary.Write(buf, binary.BigEndian, zig_zag_encode_i64(-zeros)) + if err != nil { + return + } } else { err = binary.Write(buf, binary.BigEndian, zig_zag_encode_i64(count)) + if err != nil { + return + } } } buffer = buf.Bytes() return } -// Read an LEB128 ZigZag encoded long value from the given buffer -func zig_zag_decode_i64(buf []byte) (signedValue int64, n int) { - var value uint64 = 0 - for shift := uint(0); ; shift += 7 { - if n >= len(buf) { - return 0, 0 - } - b := uint64(buf[n]) - n++ - value |= (b & 0x7f) << shift - if (b & 0x80) == 0 { - break - } - } - signedValue = int64((value >> 1) ^ -(value & 1)) - return -} - -// Writes a int64_t value to the given buffer in LEB128 ZigZag encoded format -// ZigZag encoding maps signed integers to unsigned integers so that numbers with a small -// absolute value (for instance, -1) have a small varint encoded value too. -// It does this in a way that "zig-zags" back and forth through the positive and negative integers, -// so that -1 is encoded as 1, 1 is encoded as 2, -2 is encoded as 3, and so on. -func zig_zag_encode_i64(signedValue int64) (buffer []byte) { - buffer = make([]byte, 0) - var value uint64 = uint64((signedValue << 1) ^ (signedValue >> 63)) - for { - c := byte(value & 0x7f) - value >>= 7 - if value != 0 { - c |= 0x80 - } - buffer = append(buffer, c) - if c&0x80 == 0 { - break - } - } - return -} - func decodeDeCompressedHeaderFormat(decoded []byte) (Cookie int32, PayloadLength int32, NormalizingIndexOffSet int32, NumberOfSignificantValueDigits int32, LowestTrackableValue int64, HighestTrackableValue int64, IntegerToDoubleConversionRatio float64, err error) { rbuf := bytes.NewBuffer(decoded[0:40]) r32 := make([]int32, 4) diff --git a/hdr_encoding_test.go b/hdr_encoding_test.go index 6dd26fa..ec5daf9 100644 --- a/hdr_encoding_test.go +++ b/hdr_encoding_test.go @@ -1,7 +1,7 @@ package hdrhistogram_test import ( - "github.com/HdrHistogram/hdrhistogram-go" + hdrhistogram "github.com/HdrHistogram/hdrhistogram-go" "github.com/stretchr/testify/assert" "testing" ) @@ -20,18 +20,6 @@ func TestHistogram_Load(t *testing.T) { assert.Equal(t, int64(1), rh.TotalCount()) assert.Equal(t, float64(42.0), rh.Mean()) - // Failing test - //bufferOut, err := rh.Encode(hdrhistogram.V2CompressedEncodingCookieBase) - //if diff := cmp.Diff(inputBase64, bufferOut); diff != "" { - // t.Errorf("The input and decoded->encoded representations differ:\n"+ - // "Original base64 input: %s\n"+ - // "Output base64 encoded: %s\n"+ - // "Differences (-got +want):\n%s", - // string(inputBase64), - // string(bufferOut), - // diff) - //} - rh, err = hdrhistogram.Decode([]byte("HISTFAAAAB94nJNpmSzMwMDABMSMQMzMAAGMUJoJxg9mAgA1TQGm")) assert.Nil(t, err) assert.Equal(t, int64(1), rh.TotalCount()) @@ -63,9 +51,6 @@ func TestHistogram_Dump_empty(t *testing.T) { assert.Equal(t, int64(1000), loadedHist.HighestTrackableValue()) assert.Equal(t, int64(3), loadedHist.SignificantFigures()) - //Uncomment for failing test - //outBuffer, err := loadedHist.Encode(hdrhistogram.V2CompressedEncodingCookieBase) - //assert.Equal(t, empty, outBuffer) } func TestHistogram_Dump_Load_Merge(t *testing.T) { diff --git a/hdr_encoding_whitebox_test.go b/hdr_encoding_whitebox_test.go index da0a777..010806c 100644 --- a/hdr_encoding_whitebox_test.go +++ b/hdr_encoding_whitebox_test.go @@ -3,14 +3,13 @@ package hdrhistogram import ( "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" - "math" - "reflect" "testing" ) func TestHistogram_encodeIntoByteBuffer(t *testing.T) { hist := New(1, 9007199254740991, 2) - hist.RecordValue(42) + err := hist.RecordValue(42) + assert.Nil(t, err) buffer, err := hist.encodeIntoByteBuffer() assert.Nil(t, err) assert.Equal(t, 42, buffer.Len()) @@ -19,7 +18,8 @@ func TestHistogram_encodeIntoByteBuffer(t *testing.T) { func TestHistogram_DumpLoadWhiteBox(t *testing.T) { hist := New(1, 100000, 3) for i := 1; i <= 100; i++ { - hist.RecordValue(int64(i)) + err := hist.RecordValue(int64(i)) + assert.Nil(t, err) } dumpedHistogram, err := hist.Encode(V2CompressedEncodingCookieBase) assert.Nil(t, err) @@ -31,57 +31,3 @@ func TestHistogram_DumpLoadWhiteBox(t *testing.T) { t.Errorf("counts differs: (-got +want)\n%s", diff) } } - -func Test_zig_zag_decode_i64(t *testing.T) { - largeV := int64(math.Exp2(50)) - type args struct { - buffer []byte - } - tests := []struct { - name string - args args - wantSignedValue int64 - wantBytesRead int - }{ - {"56", args{zig_zag_encode_i64(56)}, 56, 1}, - {"-1515", args{zig_zag_encode_i64(-1515)}, -1515, 2}, - {"456", args{zig_zag_encode_i64(456)}, 456, 2}, - {"largeV", args{zig_zag_encode_i64(largeV)}, largeV, 8}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotSignedValue, gotBytesRead := zig_zag_decode_i64(tt.args.buffer) - if gotSignedValue != tt.wantSignedValue { - t.Errorf("zig_zag_decode_i64() gotSignedValue = %v, want %v", gotSignedValue, tt.wantSignedValue) - } - if gotBytesRead != tt.wantBytesRead { - t.Errorf("zig_zag_decode_i64() gotBytesRead = %v, want %v", gotBytesRead, tt.wantBytesRead) - } - }) - } -} - -func Test_zig_zag_encode_i64(t *testing.T) { - largeV := int64(math.Exp2(50)) - type args struct { - value int64 - } - tests := []struct { - name string - args args - wantBuffer []byte - }{ - {"56", args{56}, []byte{112}}, - {"-56", args{-56}, []byte{111}}, - {"456", args{456}, []byte{144, 7}}, - {"-456", args{-456}, []byte{143, 7}}, - {"2^50", args{largeV}, []byte{128, 128, 128, 128, 128, 128, 128, 4}}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if gotBuffer := zig_zag_encode_i64(tt.args.value); !reflect.DeepEqual(gotBuffer, tt.wantBuffer) { - t.Errorf("zig_zag_encode_i64() = %v, want %v", gotBuffer, tt.wantBuffer) - } - }) - } -} diff --git a/hdr_test.go b/hdr_test.go index e9947dc..10b822c 100644 --- a/hdr_test.go +++ b/hdr_test.go @@ -7,6 +7,7 @@ import ( "testing" ) +// nolint func TestHighSigFig(t *testing.T) { input := []int64{ 459876, 669187, 711612, 816326, 931423, 1033197, 1131895, 2477317, @@ -280,6 +281,7 @@ func TestHighestTrackableValue(t *testing.T) { } } +// nolint func BenchmarkHistogramRecordValue(b *testing.B) { h := hdrhistogram.New(1, 10000000, 3) for i := 0; i < 1000000; i++ { @@ -310,6 +312,7 @@ func TestUnitMagnitudeOverflow(t *testing.T) { } } +// nolint func TestSubBucketMaskOverflow(t *testing.T) { hist := hdrhistogram.New(2e7, 1e8, 5) for _, sample := range [...]int64{1e8, 2e7, 3e7} { diff --git a/log_reader.go b/log_reader.go new file mode 100644 index 0000000..cf60c22 --- /dev/null +++ b/log_reader.go @@ -0,0 +1,209 @@ +package hdrhistogram + +import ( + "bufio" + "io" + "math" + "regexp" + "strconv" + "strings" +) + +type HistogramLogReader struct { + log *bufio.Reader + startTimeSec float64 + observedStartTime bool + baseTimeSec float64 + observedBaseTime bool + + // scanner handling state + absolute bool + rangeStartTimeSec float64 + rangeEndTimeSec float64 + observedMax bool + rangeObservedMax int64 + observedMin bool + rangeObservedMin int64 + reStartTime *regexp.Regexp + reBaseTime *regexp.Regexp + reHistogramInterval *regexp.Regexp +} + +func (hlr *HistogramLogReader) ObservedMin() bool { + return hlr.observedMin +} + +func (hlr *HistogramLogReader) ObservedMax() bool { + return hlr.observedMax +} + +// Returns the overall observed max limit ( up to the current point ) of the read histograms +func (hlr *HistogramLogReader) RangeObservedMax() int64 { + return hlr.rangeObservedMax +} + +// Returns the overall observed min limit ( up to the current point ) of the read histograms +func (hlr *HistogramLogReader) RangeObservedMin() int64 { + return hlr.rangeObservedMin +} + +func NewHistogramLogReader(log io.Reader) *HistogramLogReader { + //# "#[StartTime: %f (seconds since epoch), %s]\n" + reStartTime, _ := regexp.Compile(`#\[StartTime: ([\d\.]*)`) + + //# "#[BaseTime: %f (seconds since epoch)]\n" + reBaseTime, _ := regexp.Compile(`#\[BaseTime: ([\d\.]*)`) + + //# 0.127,1.007,2.769,HISTFAAAAEV42pNpmSz... + //# Tag=A,0.127,1.007,2.769,HISTFAAAAEV42pNpmSz + //# "%f,%f,%f,%s\n" + reHistogramInterval, _ := regexp.Compile(`([\d\.]*),([\d\.]*),([\d\.]*),(.*)`) + // + reader := bufio.NewReader(log) + + return &HistogramLogReader{log: reader, + startTimeSec: 0.0, + observedStartTime: false, + baseTimeSec: 0.0, + observedBaseTime: false, + reStartTime: reStartTime, + reBaseTime: reBaseTime, + reHistogramInterval: reHistogramInterval, + rangeObservedMin: math.MaxInt64, + observedMin: false, + rangeObservedMax: math.MinInt64, + observedMax: false, + } +} + +func (hlr *HistogramLogReader) NextIntervalHistogram() (histogram *Histogram, err error) { + return hlr.NextIntervalHistogramWithRange(0.0, math.MaxFloat64, true) +} + +func (hlr *HistogramLogReader) NextIntervalHistogramWithRange(rangeStartTimeSec, rangeEndTimeSec float64, absolute bool) (histogram *Histogram, err error) { + hlr.rangeStartTimeSec = rangeStartTimeSec + hlr.rangeEndTimeSec = rangeEndTimeSec + hlr.absolute = absolute + return hlr.decodeNextIntervalHistogram() +} + +func (hlr *HistogramLogReader) decodeNextIntervalHistogram() (histogram *Histogram, err error) { + var line string + var tag string = "" + var logTimeStampInSec float64 + var intervalLengthSec float64 + for { + line, err = hlr.log.ReadString('\n') + if err != nil { + if err == io.EOF { + err = nil + break + } + break + } + if line[0] == '#' { + matchRes := hlr.reStartTime.FindStringSubmatch(line) + if len(matchRes) > 0 { + hlr.startTimeSec, err = strconv.ParseFloat(matchRes[1], 64) + if err != nil { + return + } + hlr.observedStartTime = true + continue + } + matchRes = hlr.reBaseTime.FindStringSubmatch(line) + if len(matchRes) > 0 { + hlr.baseTimeSec, err = strconv.ParseFloat(matchRes[1], 64) + if err != nil { + return + } + hlr.observedBaseTime = true + continue + } + continue + } + + if strings.HasPrefix(line, "Tag=") { + commaPos := strings.Index(line, ",") + tag = line[4:commaPos] + line = line[commaPos+1:] + } + + matchRes := hlr.reHistogramInterval.FindStringSubmatch(line) + if len(matchRes) >= 1 { + // Decode: startTimestamp, intervalLength, maxTime, histogramPayload + // Timestamp is expected to be in seconds + logTimeStampInSec, err = strconv.ParseFloat(matchRes[1], 64) + if err != nil { + return + } + intervalLengthSec, err = strconv.ParseFloat(matchRes[2], 64) + if err != nil { + return + } + cpayload := matchRes[4] + + // No explicit start time noted. Use 1st observed time: + + if !hlr.observedStartTime { + hlr.startTimeSec = logTimeStampInSec + hlr.observedStartTime = true + } + + // No explicit base time noted. + // Deduce from 1st observed time (compared to start time): + if !hlr.observedBaseTime { + // Criteria Note: if log timestamp is more than a year in + // the past (compared to StartTime), + // we assume that timestamps in the log are not absolute + if logTimeStampInSec < (hlr.startTimeSec - (365 * 24 * 3600.0)) { + hlr.baseTimeSec = hlr.startTimeSec + } else { + hlr.baseTimeSec = 0.0 + } + hlr.observedBaseTime = true + } + + absoluteStartTimeStampSec := logTimeStampInSec + hlr.baseTimeSec + offsetStartTimeStampSec := absoluteStartTimeStampSec + hlr.startTimeSec + + // Timestamp length is expect to be in seconds + absoluteEndTimeStampSec := absoluteStartTimeStampSec + intervalLengthSec + + var startTimeStampToCheckRangeOn float64 + if hlr.absolute { + startTimeStampToCheckRangeOn = absoluteStartTimeStampSec + } else { + startTimeStampToCheckRangeOn = offsetStartTimeStampSec + } + + if startTimeStampToCheckRangeOn < hlr.rangeStartTimeSec { + continue + } + + if startTimeStampToCheckRangeOn > hlr.rangeEndTimeSec { + return + } + histogram, err = Decode([]byte(cpayload)) + if err != nil { + return + } + + if histogram.Max() > hlr.rangeObservedMax { + hlr.rangeObservedMax = histogram.Max() + } + + if histogram.Min() < hlr.rangeObservedMin { + hlr.rangeObservedMin = histogram.Min() + } + + histogram.SetStartTimeMs(int64(absoluteStartTimeStampSec * 1000.0)) + histogram.SetEndTimeMs(int64(absoluteEndTimeStampSec * 1000.0)) + if tag != "" { + histogram.SetTag(tag) + } + return + } + } + return +} diff --git a/log_writer.go b/log_writer.go new file mode 100644 index 0000000..a5cb1c6 --- /dev/null +++ b/log_writer.go @@ -0,0 +1,141 @@ +//The log format encodes into a single file, multiple histograms with optional shared meta data. +package hdrhistogram + +import ( + "fmt" + "io" + "regexp" + "time" +) + +const HISTOGRAM_LOG_FORMAT_VERSION = "1.3" +const MsToNsRatio float64 = 1000000.0 + +type HistogramLogOptions struct { + startTimeStampSec float64 + endTimeStampSec float64 + maxValueUnitRatio float64 +} + +func DefaultHistogramLogOptions() *HistogramLogOptions { + return &HistogramLogOptions{0, 0, MsToNsRatio} +} + +type HistogramLogWriter struct { + baseTime int64 + log io.Writer +} + +// Return the current base time offset +func (lw *HistogramLogWriter) BaseTime() int64 { + return lw.baseTime +} + +// Set a base time to subtract from supplied histogram start/end timestamps when +// logging based on histogram timestamps. +// baseTime is expected to be in msec since the epoch, as histogram start/end times +// are typically stamped with absolute times in msec since the epoch. +func (lw *HistogramLogWriter) SetBaseTime(baseTime int64) { + lw.baseTime = baseTime +} + +func NewHistogramLogWriter(log io.Writer) *HistogramLogWriter { + return &HistogramLogWriter{baseTime: 0, log: log} +} + +// Output an interval histogram, using the start/end timestamp indicated in the histogram, and the [optional] tag associated with the histogram. +// The histogram start and end timestamps are assumed to be in msec units +// +// By convention, histogram start/end time are generally stamped with absolute times in msec +// since the epoch. For logging with absolute time stamps, the base time would remain zero ( default ). +// For logging with relative time stamps (time since a start point), the base time should be set with SetBaseTime(baseTime int64) +// +// The max value in the histogram will be reported scaled down by a default maxValueUnitRatio of 1000000.0 (which is the msec : nsec ratio). +// If you need to specify a different start/end timestamp or a different maxValueUnitRatio you should use OutputIntervalHistogramWithLogOptions(histogram *Histogram, logOptions *HistogramLogOptions) +func (lw *HistogramLogWriter) OutputIntervalHistogram(histogram *Histogram) (err error) { + return lw.OutputIntervalHistogramWithLogOptions(histogram, nil) +} + +// Output an interval histogram, with the given timestamp information and the [optional] tag associated with the histogram +// +// If you specify non-nil logOptions, and non-zero start timestamp, the the specified timestamp information will be used, and the start timestamp information in the actual histogram will be ignored. +// If you specify non-nil logOptions, and non-zero start timestamp, the the specified timestamp information will be used, and the end timestamp information in the actual histogram will be ignored. +// If you specify non-nil logOptions, The max value reported with the interval line will be scaled by the given maxValueUnitRatio, +// otherwise a default maxValueUnitRatio of 1,000,000 (which is the msec : nsec ratio) will be used. +// +// By convention, histogram start/end time are generally stamped with absolute times in msec +// since the epoch. For logging with absolute time stamps, the base time would remain zero ( default ). +// For logging with relative time stamps (time since a start point), the base time should be set with SetBaseTime(baseTime int64) +func (lw *HistogramLogWriter) OutputIntervalHistogramWithLogOptions(histogram *Histogram, logOptions *HistogramLogOptions) (err error) { + tag := histogram.Tag() + var match bool + tagStr := "" + if tag != "" { + match, err = regexp.MatchString(".[, \\r\\n].", tag) + if err != nil { + return + } + if match { + err = fmt.Errorf("Tag string cannot contain commas, spaces, or line breaks. Used tag: %s", tag) + return + } + tagStr = fmt.Sprintf("Tag=%s,", tag) + } + var usedStartTime float64 = float64(histogram.StartTimeMs()) + var usedEndTime float64 = float64(histogram.EndTimeMs()) + var maxValueUnitRatio float64 = MsToNsRatio + if logOptions != nil { + if logOptions.startTimeStampSec != 0 { + usedStartTime = logOptions.startTimeStampSec + } + if logOptions.endTimeStampSec != 0 { + usedEndTime = logOptions.endTimeStampSec + } + maxValueUnitRatio = logOptions.maxValueUnitRatio + } + startTime := usedStartTime - float64(lw.baseTime)/1000.0 + endTime := usedEndTime - float64(lw.baseTime)/1000.0 + maxValueAsDouble := float64(histogram.Max()) / maxValueUnitRatio + cpayload, err := histogram.Encode(V2CompressedEncodingCookieBase) + if err != nil { + return + } + _, err = lw.log.Write([]byte(fmt.Sprintf("%s%f,%f,%f,%s\n", tagStr, startTime, endTime, maxValueAsDouble, string(cpayload)))) + return +} + +// Log a start time in the log. +// Start time is represented as seconds since epoch with up to 3 decimal places. Line starts with the leading text '#[StartTime:' +func (lw *HistogramLogWriter) OutputStartTime(start_time_msec int64) (err error) { + secs := start_time_msec / 1000 + iso_str := time.Unix(secs, start_time_msec%int64(1000)*int64(1000000000)).Format(time.RFC3339) + _, err = lw.log.Write([]byte(fmt.Sprintf("#[StartTime: %d (seconds since epoch), %s]\n", secs, iso_str))) + return +} + +// Log a base time in the log. +// Base time is represented as seconds since epoch with up to 3 decimal places. Line starts with the leading text '#[BaseTime:' +func (lw *HistogramLogWriter) OutputBaseTime(base_time_msec int64) (err error) { + secs := base_time_msec / 1000 + _, err = lw.log.Write([]byte(fmt.Sprintf("#[Basetime: %d (seconds since epoch)]\n", secs))) + return +} + +// Log a comment to the log. +// A comment is any line that leads with '#' that is not matched by the BaseTime or StartTime formats. Comments are ignored when parsed. +func (lw *HistogramLogWriter) OutputComment(comment string) (err error) { + _, err = lw.log.Write([]byte(fmt.Sprintf("#%s\n", comment))) + return +} + +// Output a legend line to the log. +// Human readable column headers. Ignored when parsed. +func (lw *HistogramLogWriter) OutputLegend() (err error) { + _, err = lw.log.Write([]byte("\"StartTimestamp\",\"Interval_Length\",\"Interval_Max\",\"Interval_Compressed_Histogram\"\n")) + return +} + +// Output a log format version to the log. +func (lw *HistogramLogWriter) OutputLogFormatVersion() (err error) { + return lw.OutputComment(fmt.Sprintf("[Histogram log format version %s]", HISTOGRAM_LOG_FORMAT_VERSION)) +} diff --git a/log_writer_test.go b/log_writer_test.go new file mode 100644 index 0000000..41c8be0 --- /dev/null +++ b/log_writer_test.go @@ -0,0 +1,81 @@ +package hdrhistogram + +import ( + "bytes" + "github.com/stretchr/testify/assert" + "io/ioutil" + "testing" +) + +func TestHistogramLogWriter_empty(t *testing.T) { + var b bytes.Buffer + writer := NewHistogramLogWriter(&b) + err := writer.OutputLogFormatVersion() + assert.Nil(t, err) + var startTimeWritten int64 = 1000 + err = writer.OutputStartTime(startTimeWritten) + assert.Nil(t, err) + err = writer.OutputLogFormatVersion() + assert.Nil(t, err) + err = writer.OutputLegend() + assert.Nil(t, err) + got, _ := b.ReadString('\n') + want := "#[Histogram log format version 1.3]\n" + assert.Equal(t, want, got) + got, _ = b.ReadString('\n') + // avoid failing tests due to GMT time differences ( so we want all to be equal up until the first + ) + want = "#[StartTime: 1 (seconds since epoch), 1970-01-01" + assert.Contains(t, got, want) +} + +func TestHistogramLogWriterReader(t *testing.T) { + var b bytes.Buffer + writer := NewHistogramLogWriter(&b) + err := writer.OutputLogFormatVersion() + assert.Equal(t, nil, err) + var startTimeWritten int64 = 1000 + err = writer.OutputStartTime(startTimeWritten) + assert.Nil(t, err) + err = writer.OutputLogFormatVersion() + assert.Nil(t, err) + err = writer.OutputLegend() + assert.Nil(t, err) + histogram := New(1, 1000, 3) + for i := 0; i < 10; i++ { + err = histogram.RecordValue(int64(i)) + assert.Nil(t, err) + } + err = writer.OutputIntervalHistogram(histogram) + assert.Equal(t, nil, err) + r := bytes.NewReader(b.Bytes()) + reader := NewHistogramLogReader(r) + outHistogram, err := reader.NextIntervalHistogram() + assert.Equal(t, nil, err) + assert.Equal(t, histogram.TotalCount(), outHistogram.TotalCount()) + assert.Equal(t, histogram.LowestTrackableValue(), outHistogram.LowestTrackableValue()) + assert.Equal(t, histogram.HighestTrackableValue(), outHistogram.HighestTrackableValue()) +} + +func TestHistogramLogReader_logV2(t *testing.T) { + dat, err := ioutil.ReadFile("./test/jHiccup-2.0.7S.logV2.hlog") + assert.Equal(t, nil, err) + r := bytes.NewReader(dat) + reader := NewHistogramLogReader(r) + for i := 0; i < 61; i++ { + outHistogram, err := reader.NextIntervalHistogram() + assert.Equal(t, nil, err) + assert.NotNil(t, outHistogram) + } +} + +func TestHistogramLogReader_tagged_log(t *testing.T) { + dat, err := ioutil.ReadFile("./test/tagged-Log.logV2.hlog") + assert.Equal(t, nil, err) + r := bytes.NewReader(dat) + reader := NewHistogramLogReader(r) + for i := 0; i < 42; i++ { + outHistogram, err := reader.NextIntervalHistogram() + assert.Equal(t, nil, err) + assert.NotNil(t, outHistogram) + } +} diff --git a/window_test.go b/window_test.go index 34db582..6a9cf42 100644 --- a/window_test.go +++ b/window_test.go @@ -5,6 +5,7 @@ import ( "testing" ) +// nolint func TestWindowedHistogram(t *testing.T) { w := hdrhistogram.NewWindowed(2, 1, 1000, 3) diff --git a/zigzag.go b/zigzag.go new file mode 100644 index 0000000..f977873 --- /dev/null +++ b/zigzag.go @@ -0,0 +1,133 @@ +package hdrhistogram + +import "fmt" + +const truncatedErrStr = "Truncated compressed histogram decode. Expected minimum length of %d bytes and got %d." + +// Read an LEB128 ZigZag encoded long value from the given buffer +func zig_zag_decode_i64(buf []byte) (signedValue int64, n int, err error) { + buflen := len(buf) + if buflen < 1 { + return 0, 0, nil + } + var value = uint64(buf[0]) & 0x7f + n = 1 + if (buf[0] & 0x80) != 0 { + if buflen < 2 { + err = fmt.Errorf(truncatedErrStr, 2, buflen) + return + } + value |= uint64(buf[1]) & 0x7f << 7 + n = 2 + if (buf[1] & 0x80) != 0 { + if buflen < 3 { + err = fmt.Errorf(truncatedErrStr, 3, buflen) + return + } + value |= uint64(buf[2]) & 0x7f << 14 + n = 3 + if (buf[2] & 0x80) != 0 { + if buflen < 4 { + err = fmt.Errorf(truncatedErrStr, 4, buflen) + return + } + value |= uint64(buf[3]) & 0x7f << 21 + n = 4 + if (buf[3] & 0x80) != 0 { + if buflen < 5 { + err = fmt.Errorf(truncatedErrStr, 5, buflen) + return + } + value |= uint64(buf[4]) & 0x7f << 28 + n = 5 + if (buf[4] & 0x80) != 0 { + if buflen < 6 { + err = fmt.Errorf(truncatedErrStr, 6, buflen) + return + } + value |= uint64(buf[5]) & 0x7f << 35 + n = 6 + if (buf[5] & 0x80) != 0 { + if buflen < 7 { + err = fmt.Errorf(truncatedErrStr, 7, buflen) + return + } + value |= uint64(buf[6]) & 0x7f << 42 + n = 7 + if (buf[6] & 0x80) != 0 { + if buflen < 8 { + err = fmt.Errorf(truncatedErrStr, 8, buflen) + return + } + value |= uint64(buf[7]) & 0x7f << 49 + n = 8 + if (buf[7] & 0x80) != 0 { + if buflen < 9 { + err = fmt.Errorf(truncatedErrStr, 9, buflen) + return + } + value |= uint64(buf[8]) << 56 + n = 9 + } + } + } + } + } + } + } + } + signedValue = int64((value >> 1) ^ -(value & 1)) + return +} + +// Writes a int64_t value to the given buffer in LEB128 ZigZag encoded format +// ZigZag encoding maps signed integers to unsigned integers so that numbers with a small +// absolute value (for instance, -1) have a small varint encoded value too. +// It does this in a way that "zig-zags" back and forth through the positive and negative integers, +// so that -1 is encoded as 1, 1 is encoded as 2, -2 is encoded as 3, and so on. +func zig_zag_encode_i64(signedValue int64) (buffer []byte) { + buffer = make([]byte, 0) + var value = uint64((signedValue << 1) ^ (signedValue >> 63)) + if value>>7 == 0 { + buffer = append(buffer, byte(value)) + } else { + buffer = append(buffer, byte((value&0x7F)|0x80)) + if value>>14 == 0 { + buffer = append(buffer, byte(value>>7)) + } else { + buffer = append(buffer, byte((value>>7)|0x80)) + if value>>21 == 0 { + buffer = append(buffer, byte(value>>14)) + } else { + buffer = append(buffer, byte((value>>14)|0x80)) + if value>>28 == 0 { + buffer = append(buffer, byte(value>>21)) + } else { + buffer = append(buffer, byte((value>>21)|0x80)) + if value>>35 == 0 { + buffer = append(buffer, byte(value>>28)) + } else { + buffer = append(buffer, byte((value>>28)|0x80)) + if value>>42 == 0 { + buffer = append(buffer, byte(value>>35)) + } else { + buffer = append(buffer, byte((value>>35)|0x80)) + if value>>49 == 0 { + buffer = append(buffer, byte(value>>42)) + } else { + buffer = append(buffer, byte((value>>42)|0x80)) + if value>>56 == 0 { + buffer = append(buffer, byte(value>>49)) + } else { + buffer = append(buffer, byte((value>>49)|0x80)) + buffer = append(buffer, byte(value>>56)) + } + } + } + } + } + } + } + } + return +} diff --git a/zigzag_whitebox_test.go b/zigzag_whitebox_test.go new file mode 100644 index 0000000..1f69694 --- /dev/null +++ b/zigzag_whitebox_test.go @@ -0,0 +1,81 @@ +package hdrhistogram + +import ( + "math" + "reflect" + "testing" +) + +func Test_zig_zag_decode_i64(t *testing.T) { + largeV := int64(math.Exp2(50)) + type args struct { + buffer []byte + } + tests := []struct { + name string + args args + wantSignedValue int64 + wantBytesRead int + wantErr bool + }{ + {"empty", args{[]byte{}}, 0, 0, false}, + {"1", args{[]byte{1}}, -1, 1, false}, + {"2", args{[]byte{2}}, 1, 1, false}, + {"3", args{[]byte{3}}, -2, 1, false}, + {"4", args{[]byte{4}}, 2, 1, false}, + {"truncated 2nd byte", args{[]byte{128}}, 0, 1, true}, + {"truncated 3rd byte", args{[]byte{128, 128}}, 0, 2, true}, + {"truncated 4th byte", args{[]byte{128, 128, 128}}, 0, 3, true}, + {"truncated 5th byte", args{[]byte{128, 128, 128, 128}}, 0, 4, true}, + {"truncated 6th byte", args{[]byte{128, 128, 128, 128, 128}}, 0, 5, true}, + {"truncated 7th byte", args{[]byte{128, 128, 128, 128, 128, 128}}, 0, 6, true}, + {"truncated 8th byte", args{[]byte{128, 128, 128, 128, 128, 128, 128}}, 0, 7, true}, + {"truncated 9th byte", args{[]byte{128, 128, 128, 128, 128, 128, 128, 128}}, 0, 8, true}, + {"56", args{zig_zag_encode_i64(56)}, 56, 1, false}, + {"-1515", args{zig_zag_encode_i64(-1515)}, -1515, 2, false}, + {"456", args{zig_zag_encode_i64(456)}, 456, 2, false}, + {"largeV", args{zig_zag_encode_i64(largeV)}, largeV, 8, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotSignedValue, gotBytesRead, gotErr := zig_zag_decode_i64(tt.args.buffer) + if gotSignedValue != tt.wantSignedValue { + t.Errorf("zig_zag_decode_i64() gotSignedValue = %v, want %v", gotSignedValue, tt.wantSignedValue) + } + if gotBytesRead != tt.wantBytesRead { + t.Errorf("zig_zag_decode_i64() gotBytesRead = %v, want %v", gotBytesRead, tt.wantBytesRead) + } + if gotErr == nil && tt.wantErr { + t.Errorf("zig_zag_decode_i64() gotErr = %v, wanted error", gotErr) + } + if tt.wantErr == false && gotErr != nil { + t.Errorf("zig_zag_decode_i64() gotErr = %v, wanted nil", gotErr) + } + }) + } +} + +func Test_zig_zag_encode_i64(t *testing.T) { + largeV := int64(math.Exp2(50)) + type args struct { + value int64 + } + tests := []struct { + name string + args args + wantBuffer []byte + }{ + {"56", args{56}, []byte{112}}, + {"-56", args{-56}, []byte{111}}, + {"456", args{456}, []byte{144, 7}}, + {"-456", args{-456}, []byte{143, 7}}, + {"2^50", args{largeV}, []byte{128, 128, 128, 128, 128, 128, 128, 4}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if gotBuffer := zig_zag_encode_i64(tt.args.value); !reflect.DeepEqual(gotBuffer, tt.wantBuffer) { + t.Errorf("zig_zag_encode_i64() = %v, want %v", gotBuffer, tt.wantBuffer) + } + }) + } +}