Skip to content

Commit a70b38e

Browse files
authored
Improve Levene & Bartlett tests (#218)
- Be polymorphic in input vectors - Improve tests - General code cleanup
1 parent ff92091 commit a70b38e

File tree

4 files changed

+303
-222
lines changed

4 files changed

+303
-222
lines changed

Statistics/Sample/Internal.hs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ module Statistics.Sample.Internal
1414
(
1515
robustSumVar
1616
, sum
17+
, sumF
1718
) where
1819

19-
import Numeric.Sum (kbn, sumVector)
20+
import qualified Numeric.Sum as Sum
2021
import Prelude hiding (sum)
2122
import Statistics.Function (square)
2223
import qualified Data.Vector.Generic as G
@@ -26,5 +27,9 @@ robustSumVar m = sum . G.map (square . subtract m)
2627
{-# INLINE robustSumVar #-}
2728

2829
sum :: (G.Vector v Double) => v Double -> Double
29-
sum = sumVector kbn
30+
sum = Sum.sumVector Sum.kbn
3031
{-# INLINE sum #-}
32+
33+
sumF :: Foldable f => f Double -> Double
34+
sumF = Sum.sum Sum.kbn
35+
{-# INLINE sumF #-}

Statistics/Test/Bartlett.hs

Lines changed: 66 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,99 @@
1+
{-# LANGUAGE CPP #-}
12
{-# LANGUAGE FlexibleContexts #-}
23
{-|
34
Module : Statistics.Test.Bartlett
45
Description : Bartlett's test for homogeneity of variances.
5-
Copyright : (c) Praneya Kumar, 2025
6+
Copyright : (c) Praneya Kumar, Alexey Khudyakov, 2025
67
License : BSD-3-Clause
78
8-
Implements Bartlett's test to check if multiple groups have equal variances.
9-
Assesses equality of variances assuming normal distribution, sensitive to non-normality.
9+
Bartlett's test is used to check that multiple groups of observations
10+
come from distributions with equal variances. This test assumes that
11+
samples come from normal distribution. If this is not the case it may
12+
simple test for non-normality and Levene's ("Statistics.Test.Levene")
13+
is preferred
14+
15+
>>> import qualified Data.Vector.Unboxed as VU
16+
>>> import Statistics.Test.Bartlett
17+
>>> :{
18+
let a = VU.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
19+
b = VU.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
20+
c = VU.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
21+
in bartlettTest [a,b,c]
22+
:}
23+
Right (Test {testSignificance = mkPValue 1.1254782518843598e-5, testStatistics = 22.789434813726768, testDistribution = chiSquared 2})
24+
1025
-}
1126
module Statistics.Test.Bartlett (
1227
bartlettTest,
1328
module Statistics.Distribution.ChiSquared
1429
) where
1530

16-
import qualified Data.Vector.Generic as G
17-
import qualified Data.Vector.Unboxed as U
18-
import Statistics.Distribution (cumulative)
31+
import qualified Data.Vector as V
32+
import qualified Data.Vector.Unboxed as VU
33+
import qualified Data.Vector.Generic as VG
34+
import qualified Data.Vector.Storable as VS
35+
import qualified Data.Vector.Primitive as VP
36+
#if MIN_VERSION_vector(0,13,2)
37+
import qualified Data.Vector.Strict as VV
38+
#endif
39+
40+
import Statistics.Distribution (complCumulative)
1941
import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..))
2042
import Statistics.Sample (varianceUnbiased)
2143
import Statistics.Types (mkPValue)
2244
import Statistics.Test.Types (Test(..))
2345

24-
-- | Perform Bartlett's test for equal variances.
25-
-- The input is a list of vectors, where each vector represents a group of observations.
26-
-- Returns Either an error message or a Test ChiSquared containing the test statistic and p-value.
27-
bartlettTest :: [U.Vector Double] -> Either String (Test ChiSquared)
46+
-- | Perform Bartlett's test for equal variances. The input is a list
47+
-- of vectors, where each vector represents a group of observations.
48+
bartlettTest :: VG.Vector v Double => [v Double] -> Either String (Test ChiSquared)
2849
bartlettTest groups
29-
| length groups < 2 = Left "At least two groups are required for Bartlett's test."
30-
| any ((< 2) . G.length) groups = Left "Each group must have at least two observations."
31-
| any (<= 0) groupVariances = Left "All groups must have positive variance."
32-
| otherwise = Right $ Test
50+
| length groups < 2 = Left "At least two groups are required for Bartlett's test."
51+
| any ((< 2) . VG.length) groups = Left "Each group must have at least two observations."
52+
| any ((<= 0) . var) groupVariances = Left "All groups must have positive variance."
53+
| otherwise = Right Test
3354
{ testSignificance = pValue
3455
, testStatistics = tStatistic
3556
, testDistribution = chiDist
3657
}
3758
where
3859
-- Number of groups
3960
k = length groups
40-
4161
-- Sample sizes for each group
42-
ni = map G.length groups
43-
ni' = map fromIntegral ni
44-
62+
ni = map (fromIntegral . VG.length) groups
4563
-- Total number of observations across all groups
46-
nTotal = sum ni
47-
48-
-- Variance for each group (unbiased estimate)
49-
groupVariances = map varianceUnbiased groups
50-
51-
-- Pooled variance calculation
52-
sumWeightedVars = sum [ (n - 1) * v | (n, v) <- zip ni' groupVariances ]
53-
pooledVariance = sumWeightedVars / fromIntegral (nTotal - k)
54-
64+
n_tot = sum $ fromIntegral . VG.length <$> groups
65+
-- Variance estimates
66+
groupVariances = toVar <$> groups
67+
sumWeightedVars = sum [ (n - 1) * v | Var{sampleN=n, var=v} <- groupVariances ]
68+
pooledVariance = sumWeightedVars / fromIntegral (n_tot - k)
5569
-- Numerator of Bartlett's statistic
5670
numerator =
57-
fromIntegral (nTotal - k) * log pooledVariance -
58-
sum [ (n - 1) * log v | (n, v) <- zip ni' groupVariances ]
59-
71+
fromIntegral (n_tot - k) * log pooledVariance -
72+
sum [ (n - 1) * log v | Var{sampleN=n, var=v} <- groupVariances ]
6073
-- Denominator correction term
61-
sumReciprocals = sum [1 / (n - 1) | n <- ni']
74+
sumReciprocals = sum [1 / (n - 1) | n <- ni]
6275
denomCorrection =
63-
1 + (sumReciprocals - 1 / fromIntegral (nTotal - k)) / (3 * (fromIntegral k - 1))
76+
1 + (sumReciprocals - 1 / fromIntegral (n_tot - k)) / (3 * (fromIntegral k - 1))
6477

65-
-- Test statistic T
78+
-- Test statistic and test distrubution
6679
tStatistic = max 0 $ numerator / denomCorrection
67-
68-
-- Degrees of freedom and chi-squared distribution
69-
df = k - 1
70-
chiDist = chiSquared df
71-
pValue = mkPValue $ 1 - cumulative chiDist tStatistic
72-
73-
74-
-- Example usage:
75-
-- import qualified Data.Vector.Unboxed as U
76-
-- import Statistics.Test.Bartlett
77-
78-
-- main :: IO ()
79-
-- main = do
80-
-- let a = U.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
81-
-- b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
82-
-- c = U.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
83-
84-
-- case bartlettTest [a,b,c] of
85-
-- Left err -> putStrLn $ "Error: " ++ err
86-
-- Right test -> do
87-
-- putStrLn $ "Bartlett's Test Statistic: " ++ show (testStatistics test)
88-
-- putStrLn $ "P-Value: " ++ show (testSignificance test)
89-
90-
-- Sample Output
91-
-- Bartlett's Test Statistic: ~32
92-
-- P-Value: ~1e-5
80+
chiDist = chiSquared (k - 1)
81+
pValue = mkPValue $ complCumulative chiDist tStatistic
82+
{-# SPECIALIZE bartlettTest :: [V.Vector Double] -> Either String (Test ChiSquared) #-}
83+
{-# SPECIALIZE bartlettTest :: [VU.Vector Double] -> Either String (Test ChiSquared) #-}
84+
{-# SPECIALIZE bartlettTest :: [VS.Vector Double] -> Either String (Test ChiSquared) #-}
85+
{-# SPECIALIZE bartlettTest :: [VP.Vector Double] -> Either String (Test ChiSquared) #-}
86+
#if MIN_VERSION_vector(0,13,2)
87+
{-# SPECIALIZE bartlettTest :: [VV.Vector Double] -> Either String (Test ChiSquared) #-}
88+
#endif
89+
90+
-- Estimate of variance
91+
data Var = Var
92+
{ sampleN :: !Double -- ^ N of elements
93+
, var :: !Double -- ^ Sample variance
94+
}
95+
96+
toVar :: VG.Vector v Double => v Double -> Var
97+
toVar xs = Var { sampleN = fromIntegral $ VG.length xs
98+
, var = varianceUnbiased xs
99+
}

0 commit comments

Comments
 (0)