Skip to content

Commit

Permalink
Merge pull request #67 from ShouvikGhosh2048/adding_numerical_tests
Browse files Browse the repository at this point in the history
Adding frequentist numerical tests + Integrate StatsAPI
  • Loading branch information
sourish-cmi authored Dec 29, 2022
2 parents 39ff832 + 77db266 commit c51b875
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 14 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NLSolversBase = "d41bc354-129a-5804-8e4c-c37616107c6c"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
Expand Down
1 change: 1 addition & 0 deletions src/CRRao.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module CRRao
using DataFrames, GLM, Turing, StatsModels, StatsBase
using StatsBase, Distributions, LinearAlgebra
using Optim, NLSolversBase, Random
import StatsBase: coef, coeftable, r2, adjr2, loglikelihood, aic, bic, predict, residuals, cooksdistance, fit

"""
```julia
Expand Down
4 changes: 2 additions & 2 deletions src/fitmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FrequentistRegression{RegressionType}
Type to represent frequentist regression models returned by `fit` functions. This type is used internally by the package to represent all frequentist regression models. `RegressionType` is a `Symbol` representing the model class.
"""
struct FrequentistRegression{RegressionType}
struct FrequentistRegression{RegressionType} <: RegressionModel
model
formula::FormulaTerm
link
Expand All @@ -27,7 +27,7 @@ BayesianRegression{RegressionType}
Type to represent bayesian regression models returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models. `RegressionType` is a `Symbol` representing the model class.
"""
struct BayesianRegression{RegressionType}
struct BayesianRegression{RegressionType} <: RegressionModel
chain
formula::FormulaTerm
link
Expand Down
2 changes: 1 addition & 1 deletion src/frequentist/getter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ function predict(container::FrequentistRegression{:LogisticRegression}, newdata:
return Probit_Link.(z)
elseif (container.link == GLM.CauchitLink)
return Cauchit_Link.(z)
elseif (container.link == GLM.Cloglog)
elseif (container.link == GLM.CloglogLink)
return Cloglog_Link.(z)
end
end
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Expand Down
16 changes: 16 additions & 0 deletions test/numerical/frequentist/LinearRegression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
mtcars = dataset("datasets", "mtcars")

tests = [
(@formula(MPG ~ HP + WT + Gear), 157.052778719219475, 164.381458233218098),
(@formula(MPG ~ 0 + HP + WT + Gear), 186.905400588726366, 192.768344199925281),
(@formula(MPG ~ HP + HP^2 + WT + WT * HP), 145.922971789355415, 154.717387206153774),
(@formula(log(MPG) ~ log(HP) + log(WT)), -48.353276320549490, -42.490332709350582)
]

for (test_formula, test_aic, test_bic) in tests
crrao_model = fit(test_formula, mtcars, LinearRegression())
glm_model = lm(test_formula, mtcars)
compare_models(crrao_model, glm_model, mtcars)
@test isapprox(aic(crrao_model), test_aic)
@test isapprox(bic(crrao_model), test_bic)
end
37 changes: 37 additions & 0 deletions test/numerical/frequentist/LogisticRegression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
turnout = dataset("Zelig", "turnout")

tests = [
(Logit(), LogitLink(), [
(@formula(Vote ~ Age + Race + Income + Educate), 2033.981263703107970, 2061.985776000818532),
(@formula(Vote ~ 0 + Age + Race + Income + Educate), 2033.981263703107970, 2061.985776000818532),
(@formula(Vote ~ Age + Age^2 + Race + Income * Educate), 2023.429129040224325, 2062.635446257018884),
(@formula(Vote ~ log(Age) + Educate), 2069.570124645007581, 2086.372832023633691)
]),
(Probit(), ProbitLink(), [
(@formula(Vote ~ Age + Race + Income + Educate), 2034.196513858159960, 2062.201026155870295),
(@formula(Vote ~ 0 + Age + Race + Income + Educate), 2034.196513858159960, 2062.201026155870295),
(@formula(Vote ~ Age + Age^2 + Race + Income * Educate), 2023.024253890667069, 2062.230571107461856),
(@formula(Vote ~ log(Age) + Educate), 2068.082640703962625, 2084.885348082588735)
]),
(Cloglog(), CloglogLink(), [
(@formula(Vote ~ Age + Race + Income + Educate), 2036.690120606579512, 2064.694632904289847),
(@formula(Vote ~ 0 + Age + Race + Income + Educate), 2036.690120606579512, 2064.694632904289847),
(@formula(Vote ~ log(Age) + Educate), 2067.047234348701750, 2083.849941727327860)
]),
(Cauchit(), CauchitLink(), [
(@formula(Vote ~ Age + Race + Income + Educate), 2050.941937867712113, 2078.946450165422448),
(@formula(Vote ~ 0 + Age + Race + Income + Educate), 2050.941937867712113, 2078.946450165422448),
(@formula(Vote ~ Age + Age^2 + Race + Income * Educate), 2036.876992804688371, 2076.083310021483157),
(@formula(Vote ~ log(Age) + Educate), 2085.365660144347203, 2102.168367522973313)
])
]

for (crrao_link, glm_link, formulae_and_values) in tests
for (test_formula, test_aic, test_bic) in formulae_and_values
crrao_model = fit(test_formula, turnout, LogisticRegression(), crrao_link)
glm_model = glm(test_formula, turnout, Binomial(), glm_link)
compare_models(crrao_model, glm_model, turnout)
@test isapprox(aic(crrao_model), test_aic)
@test isapprox(bic(crrao_model), test_bic)
end
end
16 changes: 16 additions & 0 deletions test/numerical/frequentist/NegBinomialRegression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
sanction = dataset("Zelig", "sanction")

tests = [
(@formula(Num ~ Target + Coop + NCost), 344.8795500751855, 361.37651186201265),
(@formula(Num ~ 0 + Target + Coop + NCost), 344.87955007518536, 361.37651186201253),
(@formula(Num ~ Target + Target^2 + Coop + Coop * Target), 362.262357097934, 376.40261005807156),
(@formula(Num ~ log(Target) + log(Coop)), 367.1949690199781, 376.6218043267365)
]

for (test_formula, test_aic, test_bic) in tests
crrao_model = fit(test_formula, sanction, NegBinomRegression())
glm_model = negbin(test_formula, sanction, LogLink())
compare_models(crrao_model, glm_model, sanction)
@test isapprox(aic(crrao_model), test_aic)
@test isapprox(bic(crrao_model), test_bic)
end
16 changes: 16 additions & 0 deletions test/numerical/frequentist/PoissonRegression.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
sanction = dataset("Zelig", "sanction")

tests = [
(@formula(Num ~ Target + Coop + NCost), 580.673868966946884, 594.814121927084443),
(@formula(Num ~ 0 + Target + Coop + NCost), 580.673868966946770, 594.814121927084329),
(@formula(Num ~ Target + Target^2 + Coop + Coop * Target), 871.418230125844502, 883.201774259292506),
(@formula(Num ~ log(Target) + log(Coop)), 944.326386272138961, 951.396512752207741)
]

for (test_formula, test_aic, test_bic) in tests
crrao_model = fit(test_formula, sanction, PoissonRegression())
glm_model = glm(test_formula, sanction, Poisson(), LogLink())
compare_models(crrao_model, glm_model, sanction)
@test isapprox(aic(crrao_model), test_aic)
@test isapprox(bic(crrao_model), test_bic)
end
20 changes: 20 additions & 0 deletions test/numerical/frequentist/tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
function compare_models(crrao_model, glm_model, df)
@test isapprox(coeftable(crrao_model).cols, coeftable(glm_model).cols)
@test isapprox(predict(crrao_model, df), predict(glm_model, df))
end

@testset "Linear Regression" begin
include("LinearRegression.jl")
end

@testset "Logistic Regression" begin
include("LogisticRegression.jl")
end

@testset "Poisson Regression" begin
include("PoissonRegression.jl")
end

@testset "Negative Binomial Regression" begin
include("NegBinomialRegression.jl")
end
27 changes: 16 additions & 11 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using CRRao, Test, StableRNGs, Logging, RDatasets, StatsModels, Statistics
using CRRao, Test, StableRNGs, Logging, RDatasets, StatsModels, GLM, Statistics

Logging.disable_logging(Logging.Warn)

Expand All @@ -25,20 +25,25 @@ CRRao.set_rng(StableRNG(123))
end

@testset "Numerical Tests" begin
@testset "Linear Regression" begin
include("numerical/bayesian/LinearRegression.jl")
@testset "Frequentist" begin
include("numerical/frequentist/tests.jl")
end
@testset "Bayesian" begin
@testset "Linear Regression" begin
include("numerical/bayesian/LinearRegression.jl")
end

@testset "Logistic Regression" begin
include("numerical/bayesian/LogisticRegression.jl")
end
@testset "Logistic Regression" begin
include("numerical/bayesian/LogisticRegression.jl")
end

@testset "Poisson Regression" begin
include("numerical/bayesian/PoissonRegression.jl")
end
@testset "Poisson Regression" begin
include("numerical/bayesian/PoissonRegression.jl")
end

@testset "Negative Binomial Regression" begin
include("numerical/bayesian/NegBinomialRegression.jl")
@testset "Negative Binomial Regression" begin
include("numerical/bayesian/NegBinomialRegression.jl")
end
end
end
end

0 comments on commit c51b875

Please sign in to comment.