From 47c0ca8ab35ba6cd6f7944e860c572330fdd8a6c Mon Sep 17 00:00:00 2001 From: kbkpbot Date: Mon, 23 Dec 2024 22:41:02 +0800 Subject: [PATCH] math.stats: support int/i64 arrays, fix tests (fix #23245) (#23249) --- vlib/math/stats/stats.v | 120 +++++-- vlib/math/stats/stats_test.v | 583 ++++++++++++++++++++++++++--------- 2 files changed, 525 insertions(+), 178 deletions(-) diff --git a/vlib/math/stats/stats.v b/vlib/math/stats/stats.v index 72834fc24d44cc..ed071d6bacb640 100644 --- a/vlib/math/stats/stats.v +++ b/vlib/math/stats/stats.v @@ -31,7 +31,7 @@ pub fn mean[T](data []T) T { for v in data { sum += v } - return sum / T(data.len) + return T(sum / data.len) } // geometric_mean calculates the central tendency @@ -42,11 +42,16 @@ pub fn geometric_mean[T](data []T) T { if data.len == 0 { return T(0) } - mut sum := 1.0 + mut sum := T(1) for v in data { sum *= v } - return math.pow(sum, 1.0 / T(data.len)) + $if T is f64 { + return math.pow(sum, f64(1.0) / data.len) + } $else { + // use f32 for f32/int/... + return T(math.powf(sum, f32(1.0) / data.len)) + } } // harmonic_mean calculates the reciprocal of the average of reciprocals @@ -57,11 +62,20 @@ pub fn harmonic_mean[T](data []T) T { if data.len == 0 { return T(0) } - mut sum := T(0) - for v in data { - sum += 1.0 / v + $if T is f64 { + mut sum := f64(0) + for v in data { + sum += f64(1.0) / v + } + return f64(data.len / sum) + } $else { + // use f32 for f32/int/... + mut sum := f32(0) + for v in data { + sum += f32(1.0) / f32(v) + } + return T(data.len / sum) } - return T(data.len) / sum } // median returns the middlemost value of the given input array ( input array is assumed to be sorted ) @@ -106,11 +120,21 @@ pub fn rms[T](data []T) T { if data.len == 0 { return T(0) } - mut sum := T(0) - for v in data { - sum += math.pow(v, 2) + + $if T is f64 { + mut sum := f64(0) + for v in data { + sum += math.pow(v, 2) + } + return math.sqrt(sum / data.len) + } $else { + // use f32 for f32/int/... + mut sum := f32(0) + for v in data { + sum += math.powf(v, 2) + } + return T(math.sqrtf(sum / data.len)) } - return math.sqrt(sum / T(data.len)) } // population_variance is the Measure of Dispersion / Spread @@ -134,11 +158,12 @@ pub fn population_variance_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } + mut sum := T(0) for v in data { - sum += (v - mean) * (v - mean) + sum += T((v - mean) * (v - mean)) } - return sum / T(data.len) + return T(sum / data.len) } // sample_variance calculates the spread of dataset around the mean @@ -162,9 +187,9 @@ pub fn sample_variance_mean[T](data []T, mean T) T { } mut sum := T(0) for v in data { - sum += (v - mean) * (v - mean) + sum += T((v - mean) * (v - mean)) } - return sum / T(data.len - 1) + return T(sum / (data.len - 1)) } // population_stddev calculates how spread out the dataset is @@ -175,7 +200,11 @@ pub fn population_stddev[T](data []T) T { if data.len == 0 { return T(0) } - return math.sqrt(population_variance[T](data)) + $if T is f64 { + return math.sqrt(population_variance[T](data)) + } $else { + return T(math.sqrtf(population_variance[T](data))) + } } // population_stddev_mean calculates how spread out the dataset is, with the provide mean @@ -186,7 +215,11 @@ pub fn population_stddev_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } - return T(math.sqrt(f64(population_variance_mean[T](data, mean)))) + $if T is f64 { + return math.sqrt(population_variance_mean[T](data, mean)) + } $else { + return T(math.sqrtf(population_variance_mean[T](data, mean))) + } } // Measure of Dispersion / Spread @@ -198,7 +231,11 @@ pub fn sample_stddev[T](data []T) T { if data.len == 0 { return T(0) } - return T(math.sqrt(f64(sample_variance[T](data)))) + $if T is f64 { + return math.sqrt(sample_variance[T](data)) + } $else { + return T(math.sqrtf(sample_variance[T](data))) + } } // Measure of Dispersion / Spread @@ -210,7 +247,11 @@ pub fn sample_stddev_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } - return T(math.sqrt(f64(sample_variance_mean[T](data, mean)))) + $if T is f64 { + return math.sqrt(sample_variance_mean[T](data, mean)) + } $else { + return T(math.sqrtf(sample_variance_mean[T](data, mean))) + } } // absdev calculates the average distance between each data point and the mean @@ -236,7 +277,7 @@ pub fn absdev_mean[T](data []T, mean T) T { for v in data { sum += math.abs(v - mean) } - return sum / T(data.len) + return T(sum / data.len) } // tts, Sum of squares, calculates the sum over all squared differences between values and overall mean @@ -256,7 +297,7 @@ pub fn tss_mean[T](data []T, mean T) T { } mut tss := T(0) for v in data { - tss += (v - mean) * (v - mean) + tss += T((v - mean) * (v - mean)) } return tss } @@ -393,7 +434,7 @@ pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T { for i in 0 .. n { delta1 := data1[i] - mean1 delta2 := data2[i] - mean2 - covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0) + covariance += T((delta1 * delta2 - covariance) / (i + T(1))) } return covariance } @@ -418,10 +459,10 @@ pub fn lag1_autocorrelation_mean[T](data []T, mean T) T { for i := 1; i < data.len; i++ { delta0 := data[i - 1] - mean delta1 := data[i] - mean - q += (delta0 * delta1 - q) / (T(i) + 1.0) - v += (delta1 * delta1 - v) / (T(i) + 1.0) + q += T((delta0 * delta1 - q) / (i + T(1))) + v += T((delta1 * delta1 - v) / (T(i) + T(1))) } - return q / v + return T(q / v) } // kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation @@ -435,6 +476,9 @@ pub fn kurtosis[T](data []T) T { // kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data // using the fourth moment the deviations, normalized by the sd pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T { + if data.len == 0 { + return T(0) + } mut avg := T(0) // find the fourth moment the deviations, normalized by the sd /* we use a recurrence relation to stably update a running value so @@ -442,9 +486,9 @@ pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T { */ for i, v in data { x := (v - mean) / sd - avg += (x * x * x * x - avg) / (T(i) + 1.0) + avg += T((x * x * x * x - avg) / (i + T(1))) } - return avg - T(3.0) + return avg - T(3) } // skew calculates the mean and standard of deviation to find the skew from the data @@ -457,6 +501,9 @@ pub fn skew[T](data []T) T { // skew_mean_stddev calculates the skewness of data pub fn skew_mean_stddev[T](data []T, mean T, sd T) T { + if data.len == 0 { + return T(0) + } mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd. /* we use a recurrence relation to stably update a running value so @@ -464,7 +511,7 @@ pub fn skew_mean_stddev[T](data []T, mean T, sd T) T { */ for i, v in data { x := (v - mean) / sd - skew += (x * x * x - skew) / (T(i) + 1.0) + skew += T((x * x * x - skew) / (i + T(1))) } return skew } @@ -472,16 +519,21 @@ pub fn skew_mean_stddev[T](data []T, mean T, sd T) T { // quantile calculates quantile points // for more reference // https://en.wikipedia.org/wiki/Quantile -pub fn quantile[T](sorted_data []T, f T) T { +pub fn quantile[T](sorted_data []T, f T) !T { if sorted_data.len == 0 { return T(0) } - index := f * (T(sorted_data.len) - 1.0) + index := f * (sorted_data.len - 1) lhs := int(index) - delta := index - T(lhs) - return if lhs == sorted_data.len - 1 { - sorted_data[lhs] + if lhs < 0 || lhs >= sorted_data.len { + return error('index out of range') + } else if lhs == sorted_data.len - 1 { + return sorted_data[lhs] } else { - (1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)] + if lhs >= sorted_data.len - 1 { + return error('index out of range') + } + delta := index - T(lhs) + return T((1 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]) } } diff --git a/vlib/math/stats/stats_test.v b/vlib/math/stats/stats_test.v index ef6fa9baf5a601..d77c773d0b106c 100644 --- a/vlib/math/stats/stats_test.v +++ b/vlib/math/stats/stats_test.v @@ -3,70 +3,75 @@ import math.stats fn test_freq() { // Tests were also verified on Wolfram Alpha - data := [f64(10.0), f64(10.0), f64(5.9), f64(2.7)] + data := [10.0, 10.0, 5.9, 2.7] mut o := stats.freq(data, 10.0) assert o == 2 o = stats.freq(data, 2.7) assert o == 1 o = stats.freq(data, 15) assert o == 0 -} -fn tst_res(str1 string, str2 string) bool { - if (math.abs(str1.f64() - str2.f64())) < 1e-5 { - return true - } - return false + // test for int, i64, f32 array + assert stats.freq[int]([1, 3, 5, 7], 5) == 1 + assert stats.freq[i64]([i64(1), 3, 5, 7], 5) == 1 + assert stats.freq[f32]([f32(1.0), 3, 5, 7], 3.0) == 1 } fn test_mean() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '5.762500') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 5.7625) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '17.650000') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 17.65) + data = [12.0, 7.88, 76.122, 54.83] o = stats.mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '37.708000') + assert math.alike(o, 37.708) + + // test for int, i64, f32 array + assert stats.mean[int]([1, 2]) == 1 + assert stats.mean[i64]([i64(1), 2]) == 1 + o = stats.mean[f32]([f32(1.0), 3, 5, 7]) + assert math.alike(o, 4.0) } fn test_geometric_mean() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.geometric_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '5.15993') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 5.159931624158176) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.geometric_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - ok := o.str() == 'nan' || o.str() == '-nan' || o.str() == '-1.#IND00' || o == f64(0) - || o.str() == '-nan(ind)' - assert ok // Because in math it yields a complex number - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.is_nan(o) // Because in math it yields a complex number + data = [12.0, 7.88, 76.122, 54.83] o = stats.geometric_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '25.064496') + assert math.alike(o, 25.064495926603378) + + // test for int, i64, f32 array + assert stats.geometric_mean[int]([1, 3, 5, 7]) == 3 + assert stats.geometric_mean[i64]([i64(1), 3, 5, 7]) == 3 + o = stats.geometric_mean[f32]([f32(1.0), 3, 5, 7]) + assert math.alike(o, 3.2010858058929443) } fn test_harmonic_mean() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.harmonic_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '4.626519') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 4.626518526616179) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.harmonic_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '9.134577') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 9.134577425605814) + data = [12.0, 7.88, 76.122, 54.83] o = stats.harmonic_mean(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '16.555477') + assert math.alike(o, 16.555477040152685) + + // test for int, i64, f32 array + assert stats.harmonic_mean[int]([1, 2]) == 1 + assert stats.harmonic_mean[i64]([i64(1), 2]) == 1 + o = stats.harmonic_mean[f32]([f32(1.0), 3, 5, 7]) + assert math.alike(o, 2.3863635063171387) } fn test_median() { @@ -74,196 +79,486 @@ fn test_median() { // Assumes sorted array // Even - mut data := [f64(2.7), f64(4.45), f64(5.9), f64(10.0)] + mut data := [2.7, 4.45, 5.9, 10.0] mut o := stats.median(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '5.175000') - data = [f64(-3.0), f64(1.89), f64(4.4), f64(67.31)] + assert math.alike(o, 5.175000000000001) + data = [-3.0, 1.89, 4.4, 67.31] o = stats.median(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '3.145000') - data = [f64(7.88), f64(12.0), f64(54.83), f64(76.122)] + assert math.alike(o, 3.145) + data = [7.88, 12.0, 54.83, 76.122] o = stats.median(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '33.415000') + assert math.alike(o, 33.415) // Odd - data = [f64(2.7), f64(4.45), f64(5.9), f64(10.0), f64(22)] + data = [2.7, 4.45, 5.9, 10.0, 22] o = stats.median(data) - assert o == f64(5.9) - data = [f64(-3.0), f64(1.89), f64(4.4), f64(9), f64(67.31)] + assert math.alike(o, 5.9) + data = [-3.0, 1.89, 4.4, 9, 67.31] o = stats.median(data) - assert o == f64(4.4) - data = [f64(7.88), f64(3.3), f64(12.0), f64(54.83), f64(76.122)] + assert math.alike(o, 4.4) + data = [7.88, 3.3, 12.0, 54.83, 76.122] o = stats.median(data) - assert o == f64(12.0) + assert math.alike(o, 12.0) + + // test for int, i64, f32 array + assert stats.median[int]([1, 2, 3]) == 2 + assert stats.median[i64]([i64(1), 2, 3]) == 2 + o = stats.median[f32]([f32(1.0), 3, 5, 7]) + assert math.alike(o, 4) } fn test_mode() { // Tests were also verified on Wolfram Alpha - mut data := [f64(2.7), f64(2.7), f64(4.45), f64(5.9), f64(10.0)] + mut data := [2.7, 2.7, 4.45, 5.9, 10.0] mut o := stats.mode(data) - assert o == f64(2.7) - data = [f64(-3.0), f64(1.89), f64(1.89), f64(1.89), f64(9), f64(4.4), f64(4.4), f64(9), - f64(67.31)] + assert math.alike(o, 2.7) + data = [-3.0, 1.89, 1.89, 1.89, 9, 4.4, 4.4, 9, 67.31] o = stats.mode(data) - assert o == f64(1.89) + assert math.alike(o, 1.89) // Testing greedy nature - data = [f64(2.0), f64(4.0), f64(2.0), f64(4.0)] + data = [2.0, 4.0, 2.0, 4.0] o = stats.mode(data) - assert o == f64(2.0) + assert math.alike(o, 2.0) + + // test for int, i64, f32 array + assert stats.mode[int]([1, 2, 3, 1]) == 1 + assert stats.mode[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.mode[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 3) } fn test_rms() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.rms(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '6.362046') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 6.362045661577729) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.rms(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '33.773393') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 33.77339263384714) + data = [12.0, 7.88, 76.122, 54.83] o = stats.rms(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '47.452561') + assert math.alike(o, 47.45256100570337) + + // test for int, i64, f32 array + assert stats.rms[int]([1, 2, 3, 1]) == 1 + assert stats.rms[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.rms[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 4.312771797180176) } fn test_population_variance() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.population_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '7.269219') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 7.269218749999999) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.population_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '829.119550') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 829.119550) + data = [12.0, 7.88, 76.122, 54.83] o = stats.population_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '829.852282') + assert math.alike(o, 829.852282) + + // test for int, i64, f32 array + assert stats.population_variance[int]([1, 2, 3, 1]) == 1 + assert stats.population_variance[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.population_variance[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 4.159999847412109) } fn test_sample_variance() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.sample_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '9.692292') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 9.692291666666666) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.sample_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '1105.492733') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 1105.4927333333333) + data = [12.0, 7.88, 76.122, 54.83] o = stats.sample_variance(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '1106.469709') + assert math.alike(o, 1106.4697093333332) + + // test for int, i64, f32 array + assert stats.sample_variance[int]([1, 2, 3, 1]) == 1 + assert stats.sample_variance[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.sample_variance[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 5.199999809265137) } fn test_population_stddev() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.population_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '2.696149') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 2.6961488738569312) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.population_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '28.794436') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 28.794436094495754) + data = [12.0, 7.88, 76.122, 54.83] o = stats.population_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '28.807157') + assert math.alike(o, 28.80715678438259) + + // test for int, i64, f32 array + assert stats.population_stddev[int]([1, 2, 3, 1]) == 1 + assert stats.population_stddev[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.population_stddev[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 2.0396077632904053) } fn test_sample_stddev() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.sample_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '3.113245') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 3.1132445561932114) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.sample_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '33.248951') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 33.2489508606412) + data = [12.0, 7.88, 76.122, 54.83] o = stats.sample_stddev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '33.263639') + assert math.alike(o, 33.26363944810208) + + // test for int, i64, f32 array + assert stats.sample_stddev[int]([1, 2, 3, 1]) == 1 + assert stats.sample_stddev[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.sample_stddev[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 2.280350923538208) } fn test_absdev() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.absdev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '2.187500') - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert o == 2.187500048428774 + assert math.alike(o, 2.187500048428774) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.absdev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '24.830000') - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert o == 24.829999923706055 + assert math.alike(o, 24.829999923706055) + data = [12.0, 7.88, 76.122, 54.83] o = stats.absdev(data) - // Some issue with precision comparison in f64 using == operator hence serializing to string - assert tst_res(o.str(), '27.768000') + assert o == 27.76800012588501 + assert math.alike(o, 27.76800012588501) + + // test for int, i64, f32 array + assert stats.absdev[int]([1, 2, 3, 1]) == 0 + assert stats.absdev[i64]([i64(1), 2, 3, 1]) == 0 + o = stats.absdev[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 1.7599999904632568) +} + +fn test_tss() { + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.tss(data) + assert math.alike(o, 29.076874999999998) + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.tss(data) + assert math.alike(o, 3316.4782) + data = [12.0, 7.88, 76.122, 54.83] + o = stats.tss(data) + assert math.alike(o, 3319.409128) + + // test for int, i64, f32 array + assert stats.tss[int]([1, 2, 3, 1]) == 5 + assert stats.tss[i64]([i64(1), 2, 3, 1]) == 5 + o = stats.tss[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 20.799999237060547) } fn test_min() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.min(data) - assert o == f64(2.7) - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 2.7) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.min(data) - assert o == f64(-3.0) - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, -3.0) + data = [12.0, 7.88, 76.122, 54.83] o = stats.min(data) - assert o == f64(7.88) + assert math.alike(o, 7.88) + + // test for int, i64, f32 array + assert stats.min[int]([1, 2, 3, 1]) == 1 + assert stats.min[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.min[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 1.0) } fn test_max() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.max(data) - assert o == f64(10.0) - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 10.0) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.max(data) - assert o == f64(67.31) - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 67.31) + data = [12.0, 7.88, 76.122, 54.83] o = stats.max(data) - assert o == f64(76.122) + assert math.alike(o, 76.122) + + // test for int, i64, f32 array + assert stats.max[int]([1, 2, 3, 1]) == 3 + assert stats.max[i64]([i64(1), 2, 3, 1]) == 3 + o = stats.max[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 7.0) +} + +fn test_minmax() { + // Tests were also verified on Wolfram Alpha + mut data := [10.0, 4.45, 5.9, 2.7] + mut o_min, mut o_max := stats.minmax(data) + assert [o_min, o_max] == [2.7, 10.0] + data = [-3.0, 67.31, 4.4, 1.89] + o_min, o_max = stats.minmax(data) + assert [o_min, o_max] == [-3.0, 67.31] + data = [12.0, 7.88, 76.122, 54.83] + o_min, o_max = stats.minmax(data) + assert [o_min, o_max] == [7.88, 76.122] + + // test for int, i64, f32 array + o_min_int, o_max_int := stats.minmax[int]([1, 2, 3, 1]) + assert [o_min_int, o_max_int] == [1, 3] + o_min_i64, o_max_i64 := stats.minmax[i64]([i64(1), 2, 3, 1]) + assert [o_min_i64, o_max_i64] == [i64(1), 3] + o_min_f32, o_max_f32 := stats.minmax[f32]([f32(1.0), 3, 5, 7, 3]) + assert [o_min_f32, o_max_f32] == [f32(1.0), 7] +} + +fn test_min_index() { + // Tests were also verified on Wolfram Alpha + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.min_index(data) + assert o == 3 + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.min_index(data) + assert o == 0 + data = [12.0, 7.88, 76.122, 54.83] + o = stats.min_index(data) + assert o == 1 + + // test for int, i64, f32 array + assert stats.min_index[int]([1, 2, 3, 1]) == 0 + assert stats.min_index[i64]([i64(1), 2, 3, 1]) == 0 + assert stats.min_index[f32]([f32(1.0), 3, 5, 7, 3]) == 0 +} + +fn test_max_index() { + // Tests were also verified on Wolfram Alpha + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.max_index(data) + assert o == 0 + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.max_index(data) + assert o == 1 + data = [12.0, 7.88, 76.122, 54.83] + o = stats.max_index(data) + assert o == 2 + + // test for int, i64, f32 array + assert stats.max_index[int]([1, 2, 3, 1]) == 2 + assert stats.max_index[i64]([i64(1), 2, 3, 1]) == 2 + assert stats.max_index[f32]([f32(1.0), 3, 5, 7, 3]) == 3 +} + +fn test_minmax_index() { + // Tests were also verified on Wolfram Alpha + mut data := [10.0, 4.45, 5.9, 2.7] + mut o_min, mut o_max := stats.minmax_index(data) + assert [o_min, o_max] == [3, 0] + data = [-3.0, 67.31, 4.4, 1.89] + o_min, o_max = stats.minmax_index(data) + assert [o_min, o_max] == [0, 1] + data = [12.0, 7.88, 76.122, 54.83] + o_min, o_max = stats.minmax_index(data) + assert [o_min, o_max] == [1, 2] + + // test for int, i64, f32 array + o_min, o_max = stats.minmax_index[int]([1, 2, 3, 1]) + assert [o_min, o_max] == [0, 2] + o_min, o_max = stats.minmax_index[i64]([i64(1), 2, 3, 1]) + assert [o_min, o_max] == [0, 2] + o_min, o_max = stats.minmax_index[f32]([f32(1.0), 3, 5, 7, 3]) + assert [o_min, o_max] == [0, 3] } fn test_range() { // Tests were also verified on Wolfram Alpha - mut data := [f64(10.0), f64(4.45), f64(5.9), f64(2.7)] + mut data := [10.0, 4.45, 5.9, 2.7] mut o := stats.range(data) - assert o == f64(7.3) - data = [f64(-3.0), f64(67.31), f64(4.4), f64(1.89)] + assert math.alike(o, 7.3) + data = [-3.0, 67.31, 4.4, 1.89] o = stats.range(data) - assert o == f64(70.31) - data = [f64(12.0), f64(7.88), f64(76.122), f64(54.83)] + assert math.alike(o, 70.31) + data = [12.0, 7.88, 76.122, 54.83] o = stats.range(data) - assert o == f64(68.242) + assert math.alike(o, 68.242) + + // test for int, i64, f32 array + assert stats.range[int]([1, 2, 3, 1]) == 2 + assert stats.range[i64]([i64(1), 2, 3, 1]) == 2 + assert stats.range[f32]([f32(1.0), 3, 5, 7, 3]) == 6.0 +} + +fn test_covariance() { + mut data0 := [10.0, 4.45, 5.9, 2.7] + mut data1 := [5.0, 14.45, -15.9, 22.7] + mut o := stats.covariance(data0, data1) + assert math.alike(o, -17.37078207731247) + data0 = [-3.0, 67.31, 4.4, 1.89] + data1 = [5.0, 77.31, 44.4, 11.89] + o = stats.covariance(data0, data1) + assert math.alike(o, 740.0695419311523) + data0 = [12.0, 7.88, 76.122, 54.83] + data1 = [2.0, 5.88, 7.122, 5.83] + o = stats.covariance(data0, data1) + assert math.alike(o, 36.65028190612793) + + // test for int, i64, f32 array + data0_int := [1, 2, 3, 1] + data1_int := [11, 22, 33, 11] + o_int := stats.covariance[int](data0_int, data1_int) + assert o_int == 8 + data0_i64 := [i64(1), 2, 3, 1] + data1_i64 := [i64(11), 22, 33, 11] + o_i64 := stats.covariance[i64](data0_i64, data1_i64) + assert o_i64 == 8 + data0_f32 := [f32(1.0), 2, 3, 1] + data1_f32 := [f32(11.0), 22, 33, 11] + o_f32 := stats.covariance[f32](data0_f32, data1_f32) + assert math.alike(o_f32, 7.562500476837158) +} + +fn test_lag1_autocorrelation() { + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.lag1_autocorrelation(data) + assert math.alike(o, -0.554228566606572) + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.lag1_autocorrelation(data) + assert math.alike(o, -0.5102510823460722) + data = [12.0, 7.88, 76.122, 54.83] + o = stats.lag1_autocorrelation(data) + assert math.alike(o, 0.10484451825170164) + + // test for int, i64, f32 array + assert stats.lag1_autocorrelation[int]([1, 2, 3, 1]) == 0 + assert stats.lag1_autocorrelation[i64]([i64(1), 2, 3, 1]) == 0 + o = stats.lag1_autocorrelation[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 0.1975308507680893) +} + +fn test_kurtosis() { + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.kurtosis(data) + assert math.alike(o, -1.0443214689384779) + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.kurtosis(data) + assert math.alike(o, -0.688495594786176) + data = [12.0, 7.88, 76.122, 54.83] + o = stats.kurtosis(data) + assert math.alike(o, -1.7323772574195067) + + // test for int, i64, f32 array + assert stats.kurtosis[int]([1, 2, 3, 1]) == 1 + assert stats.kurtosis[i64]([i64(1), 2, 3, 1]) == 1 + o = stats.kurtosis[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, -1.0443782806396484) +} + +fn test_skew() { + mut data := [10.0, 4.45, 5.9, 2.7] + mut o := stats.skew(data) + assert math.alike(o, 0.5754020379048158) + data = [-3.0, 67.31, 4.4, 1.89] + o = stats.skew(data) + assert math.alike(o, 1.1248732608899568) + data = [12.0, 7.88, 76.122, 54.83] + o = stats.skew(data) + assert math.alike(o, 0.19007917421924964) + + // test for int, i64, f32 array + assert stats.skew[int]([1, 2, 3, 1]) == 2 + assert stats.skew[i64]([i64(1), 2, 3, 1]) == 2 + o = stats.skew[f32]([f32(1.0), 3, 5, 7, 3]) + assert math.alike(o, 0.2715454697608948) +} + +fn test_quantile() { + // Assumes sorted array + + mut data := [2.7, 4.45, 5.9, 10.0] + mut o := stats.quantile(data, 0.1)! + assert math.alike(o, 3.225000020861626) + data = [-3.0, 1.89, 4.4, 67.31] + o = stats.quantile(data, 0.2)! + assert math.alike(o, -0.06599988341331486) + data = [7.88, 12.0, 54.83, 76.122] + o = stats.quantile(data, 0.3)! + assert math.alike(o, 11.587999901771546) + + stats.quantile(data, -0.3) or { assert err.msg() == 'index out of range' } + + stats.quantile(data, 2) or { assert err.msg() == 'index out of range' } + + // test for int, i64, f32 array + assert stats.quantile[int]([1, 2, 3], 1)! == 3 + assert stats.quantile[i64]([i64(1), 2, 3], 1)! == 3 + o = stats.quantile[f32]([f32(1.0), 3, 5, 7], 0.22)! + assert math.alike(o, 2.319999933242798) } fn test_passing_empty() { data := []f64{} assert stats.freq(data, 0) == 0 - assert stats.mean(data) == f64(0) - assert stats.geometric_mean(data) == f64(0) - assert stats.harmonic_mean(data) == f64(0) - assert stats.median(data) == f64(0) - assert stats.mode(data) == f64(0) - assert stats.rms(data) == f64(0) - assert stats.population_variance(data) == f64(0) - assert stats.sample_variance(data) == f64(0) - assert stats.population_stddev(data) == f64(0) - assert stats.sample_stddev(data) == f64(0) - assert stats.absdev(data) == f64(0) - assert stats.min(data) == f64(0) - assert stats.max(data) == f64(0) - assert stats.range(data) == f64(0) + assert stats.mean(data) == 0 + assert stats.geometric_mean(data) == 0 + assert stats.harmonic_mean(data) == 0 + assert stats.median(data) == 0 + assert stats.mode(data) == 0 + assert stats.rms(data) == 0 + assert stats.population_variance(data) == 0 + assert stats.sample_variance(data) == 0 + assert stats.population_stddev(data) == 0 + assert stats.sample_stddev(data) == 0 + assert stats.absdev(data) == 0 + assert stats.min(data) == 0 + assert stats.max(data) == 0 + o_min, o_max := stats.minmax(data) + assert [o_min, o_max] == [f64(0), 0] + assert stats.min_index(data) == 0 + assert stats.max_index(data) == 0 + o_min_index, o_max_index := stats.minmax_index(data) + assert [o_min_index, o_max_index] == [0, 0] + assert stats.range(data) == 0 + assert stats.covariance(data, data) == 0 + assert stats.lag1_autocorrelation(data) == 0 + assert stats.kurtosis(data) == 0 + assert stats.skew(data) == 0 + assert stats.quantile(data, 0)! == 0 +} + +fn test_passing_one() { + data := [100.0] + assert stats.freq(data, 100.0) == 1 + assert stats.mean(data) == 100.0 + assert stats.geometric_mean(data) == 100.0 + assert stats.harmonic_mean(data) == 100.0 + assert stats.median(data) == 100.0 + assert stats.mode(data) == 100.0 + assert stats.rms(data) == 100.0 + assert stats.population_variance(data) == 0.0 + assert math.is_nan(stats.sample_variance(data)) + assert stats.population_stddev(data) == 0.0 + assert math.is_nan(stats.sample_stddev(data)) + assert stats.absdev(data) == 0.0 + assert stats.min(data) == 100.0 + assert stats.max(data) == 100.0 + o_min, o_max := stats.minmax(data) + assert [o_min, o_max] == [f64(100), 100] + assert stats.min_index(data) == 0 + assert stats.max_index(data) == 0 + o_min_index, o_max_index := stats.minmax_index(data) + assert [o_min_index, o_max_index] == [0, 0] + assert stats.range(data) == 0 + assert stats.covariance(data, data) == 0 + assert math.is_nan(stats.lag1_autocorrelation(data)) + assert math.is_nan(stats.kurtosis(data)) + assert math.is_nan(stats.skew(data)) + assert stats.quantile(data, 0)! == 100 }