Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: Experiments with better resampling algorithms #210

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions pkg/timedata/lttb.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package timedata

import (
"math"
)

// Copied from https://github.com/haoel/downsampling-algorithm

// Largest triangle three buckets (LTTB) data downsampling algorithm implementation
// - Require: data . The original data
// - Require: threshold . Number of data points to be returned
func LTTB(data []Point, threshold int) []Point {

if threshold >= len(data) || threshold == 0 {
return data // Nothing to do
}

sampledData := make([]Point, 0, threshold)

// Bucket size. Leave room for start and end data points
bucketSize := float64(len(data)-2) / float64(threshold-2)

sampledData = append(sampledData, data[0]) // Always add the first point

// We have 3 pointers represent for
// > bucketLow - the current bucket's beginning location
// > bucketMiddle - the current bucket's ending location,
// also the beginning location of next bucket
// > bucketHight - the next bucket's ending location.
bucketLow := 1
bucketMiddle := int(math.Floor(bucketSize)) + 1

var prevMaxAreaPoint int

for i := 0; i < threshold-2; i++ {

bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1

// Calculate point average for next bucket (containing c)
avgPoint := calculateAverageDataPoint(data[bucketMiddle : bucketHigh+1])

// Get the range for current bucket
currBucketStart := bucketLow
currBucketEnd := bucketMiddle

// Point a
pointA := data[prevMaxAreaPoint]

maxArea := -1.0

var maxAreaPoint int
for ; currBucketStart < currBucketEnd; currBucketStart++ {

area := calculateTriangleArea(pointA, avgPoint, data[currBucketStart])
if area > maxArea {
maxArea = area
maxAreaPoint = currBucketStart
}
}

sampledData = append(sampledData, data[maxAreaPoint]) // Pick this point from the bucket
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint

//move to the next window
bucketLow = bucketMiddle
bucketMiddle = bucketHigh
}

sampledData = append(sampledData, data[len(data)-1]) // Always add last

return sampledData
}

func LTTB2(data []Point, threshold int) []Point {
buckets := splitDataBucket(data, threshold)
samples := LTTBForBuckets(buckets)
return samples
}

func LTTBForBuckets(buckets [][]Point) []Point {
bucketCount := len(buckets)
sampledData := make([]Point, 0)

sampledData = append(sampledData, buckets[0][0])

lastSelectedDataPoint := buckets[0][0]
for i := 1; i < bucketCount-1; i++ {
bucket := buckets[i]
averagePoint := calculateAveragePoint(buckets[i+1])

maxArea := -1.0
maxAreaIndex := -1
for j := 0; j < len(bucket); j++ {
point := bucket[j]
area := calculateTriangleArea(lastSelectedDataPoint, point, averagePoint)

if area > maxArea {
maxArea = area
maxAreaIndex = j
}
}
lastSelectedDataPoint := bucket[maxAreaIndex]
sampledData = append(sampledData, lastSelectedDataPoint)
}
sampledData = append(sampledData, buckets[len(buckets)-1][0])
return sampledData
}
27 changes: 27 additions & 0 deletions pkg/timedata/timedata.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,36 @@ import (
"github.com/cointop-sh/cointop/pkg/humanize"
)

// Point is a point on a line
type Point struct {
X float64
Y float64
}

// ResampleTimeSeriesData resamples the given [timestamp,value] data to numsteps between start-end (returns numSteps+1 points).
// If the data does not extend past start/end then there will likely be NaN in the output data.
func ResampleTimeSeriesData(data [][]float64, start float64, end float64, numSteps int) [][]float64 {
// Use linear interpolation for upsampling
if numSteps > len(data) {
return LinearInterpolateTimeSeriesData(data, start, end, numSteps)
}

// Use FTTB for downsampling
var points []Point
for _, item := range data {
points = append(points, Point{X: item[0], Y: item[1]})
}

resultPoints := LTTB(points, numSteps)

var newData [][]float64
for _, item := range resultPoints {
newData = append(newData, []float64{item.X, item.Y})
}
return newData
}

func LinearInterpolateTimeSeriesData(data [][]float64, start float64, end float64, numSteps int) [][]float64 {
var newData [][]float64
l := len(data)
step := (end - start) / float64(numSteps)
Expand Down
84 changes: 84 additions & 0 deletions pkg/timedata/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package timedata

import (
"math"
)

// Copied from https://github.com/haoel/downsampling-algorithm

func calculateTriangleArea(pa, pb, pc Point) float64 {
area := ((pa.X-pc.X)*(pb.Y-pa.Y) - (pa.X-pb.X)*(pc.Y-pa.Y)) * 0.5
return math.Abs(area)
}

func calculateAverageDataPoint(points []Point) (avg Point) {

for _, point := range points {
avg.X += point.X
avg.Y += point.Y
}
l := float64(len(points))
avg.X /= l
avg.Y /= l
return avg
}

func splitDataBucket(data []Point, threshold int) [][]Point {

buckets := make([][]Point, threshold)
for i := range buckets {
buckets[i] = make([]Point, 0)
}
// First and last bucket are formed by the first and the last data points
buckets[0] = append(buckets[0], data[0])
buckets[threshold-1] = append(buckets[threshold-1], data[len(data)-1])

// so we only have N - 2 buckets left to fill
bucketSize := float64(len(data)-2) / float64(threshold-2)

//slice remove the first and last point
d := data[1 : len(data)-1]

for i := 0; i < threshold-2; i++ {
bucketStartIdx := int(math.Floor(float64(i) * bucketSize))
bucketEndIdx := int(math.Floor(float64(i+1)*bucketSize)) + 1
if i == threshold-3 {
bucketEndIdx = len(d)
}
buckets[i+1] = append(buckets[i+1], d[bucketStartIdx:bucketEndIdx]...)
}

return buckets
}

func calculateAveragePoint(points []Point) Point {
l := len(points)
var p Point
for i := 0; i < l; i++ {
p.X += points[i].X
p.Y += points[i].Y
}

p.X /= float64(l)
p.Y /= float64(l)
return p

}

func peakAndTroughPointIndex(points []Point) (int, int) {
max := -0.1
min := math.MaxFloat64
minIdx := 0
maxIdx := 0
for i := 0; i < len(points); i++ {
if points[i].Y > max {
max = points[i].Y
maxIdx = i
}
if points[i].Y < min {
min = points[i].Y
minIdx = i
}
}
return maxIdx, minIdx
}