|
| 1 | +{-# LANGUAGE CPP #-} |
1 | 2 | {-# LANGUAGE FlexibleContexts #-} |
2 | 3 | {-| |
3 | 4 | Module : Statistics.Test.Bartlett |
4 | 5 | Description : Bartlett's test for homogeneity of variances. |
5 | | -Copyright : (c) Praneya Kumar, 2025 |
| 6 | +Copyright : (c) Praneya Kumar, Alexey Khudyakov, 2025 |
6 | 7 | License : BSD-3-Clause |
7 | 8 |
|
8 | | -Implements Bartlett's test to check if multiple groups have equal variances. |
9 | | -Assesses equality of variances assuming normal distribution, sensitive to non-normality. |
| 9 | +Bartlett's test is used to check that multiple groups of observations |
| 10 | +come from distributions with equal variances. This test assumes that |
| 11 | +samples come from normal distribution. If this is not the case it may |
| 12 | +simple test for non-normality and Levene's ("Statistics.Test.Levene") |
| 13 | +is preferred |
| 14 | +
|
| 15 | +>>> import qualified Data.Vector.Unboxed as VU |
| 16 | +>>> import Statistics.Test.Bartlett |
| 17 | +>>> :{ |
| 18 | +let a = VU.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99] |
| 19 | + b = VU.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05] |
| 20 | + c = VU.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98] |
| 21 | +in bartlettTest [a,b,c] |
| 22 | +:} |
| 23 | +Right (Test {testSignificance = mkPValue 1.1254782518843598e-5, testStatistics = 22.789434813726768, testDistribution = chiSquared 2}) |
| 24 | +
|
10 | 25 | -} |
11 | 26 | module Statistics.Test.Bartlett ( |
12 | 27 | bartlettTest, |
13 | 28 | module Statistics.Distribution.ChiSquared |
14 | 29 | ) where |
15 | 30 |
|
16 | | -import qualified Data.Vector.Generic as G |
17 | | -import qualified Data.Vector.Unboxed as U |
18 | | -import Statistics.Distribution (cumulative) |
| 31 | +import qualified Data.Vector as V |
| 32 | +import qualified Data.Vector.Unboxed as VU |
| 33 | +import qualified Data.Vector.Generic as VG |
| 34 | +import qualified Data.Vector.Storable as VS |
| 35 | +import qualified Data.Vector.Primitive as VP |
| 36 | +#if MIN_VERSION_vector(0,13,2) |
| 37 | +import qualified Data.Vector.Strict as VV |
| 38 | +#endif |
| 39 | + |
| 40 | +import Statistics.Distribution (complCumulative) |
19 | 41 | import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..)) |
20 | 42 | import Statistics.Sample (varianceUnbiased) |
21 | 43 | import Statistics.Types (mkPValue) |
22 | 44 | import Statistics.Test.Types (Test(..)) |
23 | 45 |
|
24 | | --- | Perform Bartlett's test for equal variances. |
25 | | --- The input is a list of vectors, where each vector represents a group of observations. |
26 | | --- Returns Either an error message or a Test ChiSquared containing the test statistic and p-value. |
27 | | -bartlettTest :: [U.Vector Double] -> Either String (Test ChiSquared) |
| 46 | +-- | Perform Bartlett's test for equal variances. The input is a list |
| 47 | +-- of vectors, where each vector represents a group of observations. |
| 48 | +bartlettTest :: VG.Vector v Double => [v Double] -> Either String (Test ChiSquared) |
28 | 49 | bartlettTest groups |
29 | | - | length groups < 2 = Left "At least two groups are required for Bartlett's test." |
30 | | - | any ((< 2) . G.length) groups = Left "Each group must have at least two observations." |
31 | | - | any (<= 0) groupVariances = Left "All groups must have positive variance." |
32 | | - | otherwise = Right $ Test |
| 50 | + | length groups < 2 = Left "At least two groups are required for Bartlett's test." |
| 51 | + | any ((< 2) . VG.length) groups = Left "Each group must have at least two observations." |
| 52 | + | any ((<= 0) . var) groupVariances = Left "All groups must have positive variance." |
| 53 | + | otherwise = Right Test |
33 | 54 | { testSignificance = pValue |
34 | 55 | , testStatistics = tStatistic |
35 | 56 | , testDistribution = chiDist |
36 | 57 | } |
37 | 58 | where |
38 | 59 | -- Number of groups |
39 | 60 | k = length groups |
40 | | - |
41 | 61 | -- Sample sizes for each group |
42 | | - ni = map G.length groups |
43 | | - ni' = map fromIntegral ni |
44 | | - |
| 62 | + ni = map (fromIntegral . VG.length) groups |
45 | 63 | -- Total number of observations across all groups |
46 | | - nTotal = sum ni |
47 | | - |
48 | | - -- Variance for each group (unbiased estimate) |
49 | | - groupVariances = map varianceUnbiased groups |
50 | | - |
51 | | - -- Pooled variance calculation |
52 | | - sumWeightedVars = sum [ (n - 1) * v | (n, v) <- zip ni' groupVariances ] |
53 | | - pooledVariance = sumWeightedVars / fromIntegral (nTotal - k) |
54 | | - |
| 64 | + n_tot = sum $ fromIntegral . VG.length <$> groups |
| 65 | + -- Variance estimates |
| 66 | + groupVariances = toVar <$> groups |
| 67 | + sumWeightedVars = sum [ (n - 1) * v | Var{sampleN=n, var=v} <- groupVariances ] |
| 68 | + pooledVariance = sumWeightedVars / fromIntegral (n_tot - k) |
55 | 69 | -- Numerator of Bartlett's statistic |
56 | 70 | numerator = |
57 | | - fromIntegral (nTotal - k) * log pooledVariance - |
58 | | - sum [ (n - 1) * log v | (n, v) <- zip ni' groupVariances ] |
59 | | - |
| 71 | + fromIntegral (n_tot - k) * log pooledVariance - |
| 72 | + sum [ (n - 1) * log v | Var{sampleN=n, var=v} <- groupVariances ] |
60 | 73 | -- Denominator correction term |
61 | | - sumReciprocals = sum [1 / (n - 1) | n <- ni'] |
| 74 | + sumReciprocals = sum [1 / (n - 1) | n <- ni] |
62 | 75 | denomCorrection = |
63 | | - 1 + (sumReciprocals - 1 / fromIntegral (nTotal - k)) / (3 * (fromIntegral k - 1)) |
| 76 | + 1 + (sumReciprocals - 1 / fromIntegral (n_tot - k)) / (3 * (fromIntegral k - 1)) |
64 | 77 |
|
65 | | - -- Test statistic T |
| 78 | + -- Test statistic and test distrubution |
66 | 79 | tStatistic = max 0 $ numerator / denomCorrection |
67 | | - |
68 | | - -- Degrees of freedom and chi-squared distribution |
69 | | - df = k - 1 |
70 | | - chiDist = chiSquared df |
71 | | - pValue = mkPValue $ 1 - cumulative chiDist tStatistic |
72 | | - |
73 | | - |
74 | | --- Example usage: |
75 | | --- import qualified Data.Vector.Unboxed as U |
76 | | --- import Statistics.Test.Bartlett |
77 | | - |
78 | | --- main :: IO () |
79 | | --- main = do |
80 | | --- let a = U.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99] |
81 | | --- b = U.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05] |
82 | | --- c = U.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98] |
83 | | - |
84 | | --- case bartlettTest [a,b,c] of |
85 | | --- Left err -> putStrLn $ "Error: " ++ err |
86 | | --- Right test -> do |
87 | | --- putStrLn $ "Bartlett's Test Statistic: " ++ show (testStatistics test) |
88 | | --- putStrLn $ "P-Value: " ++ show (testSignificance test) |
89 | | - |
90 | | --- Sample Output |
91 | | --- Bartlett's Test Statistic: ~32 |
92 | | --- P-Value: ~1e-5 |
| 80 | + chiDist = chiSquared (k - 1) |
| 81 | + pValue = mkPValue $ complCumulative chiDist tStatistic |
| 82 | +{-# SPECIALIZE bartlettTest :: [V.Vector Double] -> Either String (Test ChiSquared) #-} |
| 83 | +{-# SPECIALIZE bartlettTest :: [VU.Vector Double] -> Either String (Test ChiSquared) #-} |
| 84 | +{-# SPECIALIZE bartlettTest :: [VS.Vector Double] -> Either String (Test ChiSquared) #-} |
| 85 | +{-# SPECIALIZE bartlettTest :: [VP.Vector Double] -> Either String (Test ChiSquared) #-} |
| 86 | +#if MIN_VERSION_vector(0,13,2) |
| 87 | +{-# SPECIALIZE bartlettTest :: [VV.Vector Double] -> Either String (Test ChiSquared) #-} |
| 88 | +#endif |
| 89 | + |
| 90 | +-- Estimate of variance |
| 91 | +data Var = Var |
| 92 | + { sampleN :: !Double -- ^ N of elements |
| 93 | + , var :: !Double -- ^ Sample variance |
| 94 | + } |
| 95 | + |
| 96 | +toVar :: VG.Vector v Double => v Double -> Var |
| 97 | +toVar xs = Var { sampleN = fromIntegral $ VG.length xs |
| 98 | + , var = varianceUnbiased xs |
| 99 | + } |
0 commit comments