From 0bda2d6b6efbcf3b0f41d0a93d2277a463b61617 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Sun, 22 Aug 2021 01:12:50 +0100 Subject: [PATCH 1/7] [wip] WIP on percentile calculation --- hdr.go | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/hdr.go b/hdr.go index df6abf4..bd871e6 100644 --- a/hdr.go +++ b/hdr.go @@ -331,21 +331,15 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { percentile = 100 } - total := int64(0) countAtPercentile := int64(((percentile / 100) * float64(h.totalCount)) + 0.5) i := h.iterator() - for i.nextCountAtIdx() { - total += i.countAtIdx - if total >= countAtPercentile { - if percentile == 0.0 { - return h.lowestEquivalentValue(i.valueFromIdx) - } - return h.highestEquivalentValue(i.valueFromIdx) - } + for i.nextCountAtIdx(countAtPercentile) { } - - return 0 + if percentile == 0.0 { + return h.lowestEquivalentValue(i.valueFromIdx) + } + return h.highestEquivalentValue(i.valueFromIdx) } // ValueAtPercentiles, given an slice of percentiles returns a map containing for each passed percentile, @@ -372,7 +366,7 @@ func (h *Histogram) ValueAtPercentiles(percentiles []float64) (values map[float6 total := int64(0) currentQuantileSlicePos := 0 i := h.iterator() - for currentQuantileSlicePos < totalQuantilesToCalculate && i.nextCountAtIdx() { + for currentQuantileSlicePos < totalQuantilesToCalculate && i.nextCountAtIdx(h.totalCount) { total += i.countAtIdx for currentQuantileSlicePos < totalQuantilesToCalculate && total >= countAtPercentiles[currentQuantileSlicePos] { currentPercentile := percentiles[currentQuantileSlicePos] @@ -622,8 +616,8 @@ type iterator struct { } // nextCountAtIdx does not update the iterator highestEquivalentValue in order to optimize cpu usage. -func (i *iterator) nextCountAtIdx() bool { - if i.countToIdx >= i.h.totalCount { +func (i *iterator) nextCountAtIdx(limit int64) bool { + if i.countToIdx >= limit { return false } // increment bucket @@ -645,7 +639,7 @@ func (i *iterator) nextCountAtIdx() bool { // Returns the next element in the iteration. func (i *iterator) next() bool { - if !i.nextCountAtIdx() { + if !i.nextCountAtIdx(i.h.totalCount) { return false } i.highestEquivalentValue = i.h.highestEquivalentValue(i.valueFromIdx) From 716f741fbbea4954335ef3654e7d602135bc6529 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Sun, 22 Aug 2021 01:41:01 +0100 Subject: [PATCH 2/7] [wip] iterative ValueAtPercentile --- hdr.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/hdr.go b/hdr.go index bd871e6..a035728 100644 --- a/hdr.go +++ b/hdr.go @@ -332,14 +332,29 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { } countAtPercentile := int64(((percentile / 100) * float64(h.totalCount)) + 0.5) + var countToIdx int64 = 0 + var valueFromIdx int64 = 0 + var subBucketIdx int32 = -1 + var bucketIdx int32 = 0 - i := h.iterator() - for i.nextCountAtIdx(countAtPercentile) { + for true { + if countToIdx >= countAtPercentile { + break + } + // increment bucket + subBucketIdx++ + if subBucketIdx >= h.subBucketCount { + subBucketIdx = h.subBucketHalfCount + bucketIdx++ + } + + countToIdx += h.getCountAtIndex(bucketIdx, subBucketIdx) + valueFromIdx = h.valueFromIndex(bucketIdx, subBucketIdx) } if percentile == 0.0 { - return h.lowestEquivalentValue(i.valueFromIdx) + return h.lowestEquivalentValue(valueFromIdx) } - return h.highestEquivalentValue(i.valueFromIdx) + return h.highestEquivalentValue(valueFromIdx) } // ValueAtPercentiles, given an slice of percentiles returns a map containing for each passed percentile, From cff480e6af3c5fce872a39cb5e2bfadbc6cbe1e1 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Sun, 22 Aug 2021 01:59:55 +0100 Subject: [PATCH 3/7] [add] using getBucketBaseIdx to optimize further ValueAtPercentile --- hdr.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/hdr.go b/hdr.go index a035728..ffe1611 100644 --- a/hdr.go +++ b/hdr.go @@ -336,6 +336,7 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { var valueFromIdx int64 = 0 var subBucketIdx int32 = -1 var bucketIdx int32 = 0 + bucketBaseIdx := h.getBucketBaseIdx(bucketIdx) for true { if countToIdx >= countAtPercentile { @@ -346,10 +347,11 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { if subBucketIdx >= h.subBucketCount { subBucketIdx = h.subBucketHalfCount bucketIdx++ + bucketBaseIdx = h.getBucketBaseIdx(bucketIdx) } - countToIdx += h.getCountAtIndex(bucketIdx, subBucketIdx) - valueFromIdx = h.valueFromIndex(bucketIdx, subBucketIdx) + countToIdx += h.getCountAtIndexGivenBucketBaseIdx(bucketBaseIdx, subBucketIdx) + valueFromIdx = int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude) } if percentile == 0.0 { return h.lowestEquivalentValue(valueFromIdx) @@ -588,10 +590,20 @@ func (h *Histogram) getCountAtIndex(bucketIdx, subBucketIdx int32) int64 { return h.counts[h.countsIndex(bucketIdx, subBucketIdx)] } +func (h *Histogram) getCountAtIndexGivenBucketBaseIdx(bucketBaseIdx, subBucketIdx int32) int64 { + return h.counts[bucketBaseIdx+subBucketIdx-h.subBucketHalfCount] +} + func (h *Histogram) countsIndex(bucketIdx, subBucketIdx int32) int32 { - bucketBaseIdx := (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude) - offsetInBucket := subBucketIdx - h.subBucketHalfCount - return bucketBaseIdx + offsetInBucket + return h.getBucketBaseIdx(bucketIdx) + subBucketIdx - h.subBucketHalfCount +} + +func (h *Histogram) getBucketBaseIdx(bucketIdx int32) int32 { + return (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude) +} + +func (h *Histogram) countsIndexGivenBucketBaseIdx(bucketBaseIdx, subBucketIdx int32) int32 { + return bucketBaseIdx + subBucketIdx - h.subBucketHalfCount } // return the lowest (and therefore highest precision) bucket index that can represent the value From e0e392b1e231cfe075557671237fea18d8fbed85 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Sun, 22 Aug 2021 23:04:38 +0100 Subject: [PATCH 4/7] [opt] moved optimization code for ValueAtPercentile() into getValueFromIdxUpToCount() --- hdr.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/hdr.go b/hdr.go index ffe1611..9c9834a 100644 --- a/hdr.go +++ b/hdr.go @@ -332,6 +332,14 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { } countAtPercentile := int64(((percentile / 100) * float64(h.totalCount)) + 0.5) + valueFromIdx := h.getValueFromIdxUpToCount(countAtPercentile) + if percentile == 0.0 { + return h.lowestEquivalentValue(valueFromIdx) + } + return h.highestEquivalentValue(valueFromIdx) +} + +func (h *Histogram) getValueFromIdxUpToCount(countAtPercentile int64) int64 { var countToIdx int64 = 0 var valueFromIdx int64 = 0 var subBucketIdx int32 = -1 @@ -353,10 +361,7 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { countToIdx += h.getCountAtIndexGivenBucketBaseIdx(bucketBaseIdx, subBucketIdx) valueFromIdx = int64(subBucketIdx) << uint(int64(bucketIdx)+h.unitMagnitude) } - if percentile == 0.0 { - return h.lowestEquivalentValue(valueFromIdx) - } - return h.highestEquivalentValue(valueFromIdx) + return valueFromIdx } // ValueAtPercentiles, given an slice of percentiles returns a map containing for each passed percentile, From 8e01ef158f91b0bcaa777c3a5d733f601cc7c7a3 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Sun, 22 Aug 2021 23:26:14 +0100 Subject: [PATCH 5/7] [fix] Avoid extremelly large slices of results on BenchmarkHistogramValueAtPercentile --- hdr_benchmark_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hdr_benchmark_test.go b/hdr_benchmark_test.go index 0dba312..850a48c 100644 --- a/hdr_benchmark_test.go +++ b/hdr_benchmark_test.go @@ -40,14 +40,14 @@ func BenchmarkHistogramValueAtPercentile(b *testing.B) { var sigfigs = 3 var totalDatapoints = 1000000 h, data := populateHistogramLogNormalDist(b, lowestDiscernibleValue, highestTrackableValue, sigfigs, totalDatapoints) - quantiles := make([]float64, b.N) + quantiles := make([]float64, totalDatapoints) for i := range quantiles { data[i] = rand.Float64() * 100.0 } b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - h.ValueAtPercentile(data[i]) + h.ValueAtPercentile(data[i%totalDatapoints]) } } From a0ace6b51c103cda8709b9776a4e96b8d806b088 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Mon, 23 Aug 2021 00:18:19 +0100 Subject: [PATCH 6/7] [fix] Fixed linter errors --- hdr.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hdr.go b/hdr.go index 9c9834a..8002262 100644 --- a/hdr.go +++ b/hdr.go @@ -346,7 +346,7 @@ func (h *Histogram) getValueFromIdxUpToCount(countAtPercentile int64) int64 { var bucketIdx int32 = 0 bucketBaseIdx := h.getBucketBaseIdx(bucketIdx) - for true { + for { if countToIdx >= countAtPercentile { break } @@ -607,10 +607,6 @@ func (h *Histogram) getBucketBaseIdx(bucketIdx int32) int32 { return (bucketIdx + 1) << uint(h.subBucketHalfCountMagnitude) } -func (h *Histogram) countsIndexGivenBucketBaseIdx(bucketBaseIdx, subBucketIdx int32) int32 { - return bucketBaseIdx + subBucketIdx - h.subBucketHalfCount -} - // return the lowest (and therefore highest precision) bucket index that can represent the value // Calculates the number of powers of two by which the value is greater than the biggest value that fits in // bucket 0. This is the bucket index since each successive bucket can hold a value 2x greater. From 8dc009253bad990ab770ef8994ac209d23878588 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Tue, 24 Aug 2021 16:07:33 +0100 Subject: [PATCH 7/7] [fix] Fixes per PR review: avoid initializing vars to it's default on getValueFromIdxUpToCount() --- hdr.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hdr.go b/hdr.go index 8002262..b87a1f1 100644 --- a/hdr.go +++ b/hdr.go @@ -340,10 +340,10 @@ func (h *Histogram) ValueAtPercentile(percentile float64) int64 { } func (h *Histogram) getValueFromIdxUpToCount(countAtPercentile int64) int64 { - var countToIdx int64 = 0 - var valueFromIdx int64 = 0 + var countToIdx int64 + var valueFromIdx int64 var subBucketIdx int32 = -1 - var bucketIdx int32 = 0 + var bucketIdx int32 bucketBaseIdx := h.getBucketBaseIdx(bucketIdx) for {