Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logistic regression <-> Variational Inference #156

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docs/src/api/bayesian_regression.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,24 @@ fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Pri

### Logistic Regression with Ridge Prior
```@docs
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95)
```
### Logistic Regression with Laplace Prior
```@docs
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95)
```
### Logistic Regression with Cauchy Prior
```@docs
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95)
```
### Logistic Regression with T-Distributed Prior
```@docs
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, h::Float64 = 1.0, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 1.0, level::Float64 = 0.95)
```

### Logistic Regression with Horse Shoe Prior
```@docs
fit(formula::FormulaTerm,data::DataFrame,modelClass::LogisticRegression,Link::CRRaoLink,prior::Prior_HorseShoe,level::Float64 = 0.95,sim_size::Int64 = 1000)
fit(formula::FormulaTerm,data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_HorseShoe, algorithm::BayesianAlgorithm = MCMC(), level::Float64 = 0.95)
```

## Negative Binomial Regression
Expand Down
178 changes: 116 additions & 62 deletions src/bayesian/logistic_regression.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, sim_size::Int64)
function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, algorithm::MCMC)
formula = apply_schema(formula, schema(formula, data),RegressionModel)
y, X = modelcols(formula, data)

if sim_size < 500
if algorithm.sim_size < 500
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), algorithm.sim_size)
params = get_params(chain[:,:,:])
samples = params.β
if isa(samples, Tuple)
Expand All @@ -16,9 +16,32 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu
return BayesianRegression(:LogisticRegression, samples, formula, Link)
end

function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, algorithm::VI)
formula = apply_schema(formula, schema(formula, data),RegressionModel)
y, X = modelcols(formula, data)

if algorithm.vi_max_iters < 500
@warn "Max iterations should generally be atleast 500."
end
model = turingModel(X, y)
dist = vi(model, ADVI(algorithm.vi_samples_per_step, algorithm.vi_max_iters))
_, symbol_to_range = bijector(model, Val(true))
samples = samples[union(symbol_to_range[:β]...), :]
return BayesianRegression(:LogisticRegression, samples, formula, Link)
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Ridge,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
)
```

Fit a Bayesian Logistic Regression model on the input data with a Ridge prior with the provided `Link` function.
Expand Down Expand Up @@ -192,9 +215,9 @@ function fit(
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Ridge,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
sim_size::Int64 = 1000
)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -216,13 +239,21 @@ function fit(
y[i] ~ Bernoulli(prob[i])
end
end

return logistic_reg(formula, data, Link, LogisticRegression, sim_size)
return logistic_reg(formula, data, Link, LogisticRegression, algorithm)
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Laplace,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
)
```

Fit a Bayesian Logistic Regression model on the input data with a Laplace prior with the provided `Link` function.
Expand Down Expand Up @@ -391,9 +422,9 @@ function fit(
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Laplace,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
sim_size::Int64 = 1000
level::Float64 = 0.95
)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -416,12 +447,21 @@ function fit(
end
end

return logistic_reg(formula, data, Link, LogisticRegression, sim_size)
return logistic_reg(formula, data, Link, LogisticRegression, algorithm)
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Cauchy,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
)
```

Fit a Bayesian Logistic Regression model on the input data with a Cauchy prior with the provided `Link` function.
Expand Down Expand Up @@ -585,9 +625,9 @@ function fit(
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_Cauchy,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 0.1,
level::Float64 = 0.95,
sim_size::Int64 = 1000
level::Float64 = 0.95
)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -609,13 +649,21 @@ function fit(
y[i] ~ Bernoulli(prob[i])
end
end

return logistic_reg(formula, data, Link, LogisticRegression, sim_size)
return logistic_reg(formula, data, Link, LogisticRegression, algorithm)
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, h::Float64 = 1.0, level::Float64 = 0.95, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_TDist,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 3.0,
level::Float64 = 0.95,
)
```

Fit a Bayesian Logistic Regression model on the input data with a T-Dist prior with the provided `Link` function.
Expand Down Expand Up @@ -804,9 +852,9 @@ function fit(
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_TDist,
algorithm::BayesianAlgorithm = MCMC(),
h::Float64 = 3.0,
level::Float64 = 0.95,
sim_size::Int64 = 1000
level::Float64 = 0.95
)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -829,15 +877,22 @@ function fit(
y[i] ~ Bernoulli(prob[i])
end
end

return logistic_reg(formula, data, Link, LogisticRegression, sim_size)
return logistic_reg(formula, data, Link, LogisticRegression, algorithm)
end



"""
```julia
fit(formula::FormulaTerm,data::DataFrame,modelClass::LogisticRegression,Link::CRRaoLink,prior::Prior_HorseShoe,level::Float64 = 0.95,sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_HorseShoe,
algorithm::BayesianAlgorithm = MCMC(),
level::Float64 = 0.95,
)
```

Fit a Bayesian Logistic Regression model on the input data with a HorseShoe prior with the provided `Link` function.
Expand Down Expand Up @@ -1036,45 +1091,44 @@ Quantiles
```
"""
function fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_HorseShoe,
level::Float64 = 0.95,
sim_size::Int64 = 1000
formula::FormulaTerm,
data::DataFrame,
modelClass::LogisticRegression,
Link::CRRaoLink,
prior::Prior_HorseShoe,
algorithm::BayesianAlgorithm = MCMC(),
level::Float64 = 0.95
)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
n = size(X, 1)
#priors
#v ~ InverseGamma(h, h)
#α ~ TDist(1)
#β ~ filldist(Uniform(-v, v), p)

halfcauchy = Truncated(TDist(1), 0, Inf)

τ ~ halfcauchy ## Global Shrinkage
λ ~ filldist(halfcauchy, p) ## Local Shrinkage
σ ~ halfcauchy
#α ~ Normal(0, τ * σ)
β0 = repeat([0], p) ## prior mean
β ~ MvNormal(β0, λ * τ *σ)


#z = α .+ X * β
z = X * β

## Link Function

#prob = Link.link.(z)
prob = Link.link_function.(z)

#likelihood
for i = 1:n
y[i] ~ Bernoulli(prob[i])
end
end

return logistic_reg(formula, data, Link, LogisticRegression, sim_size)
@model LogisticRegression(X, y) = begin
p = size(X, 2)
n = size(X, 1)
#priors
#v ~ InverseGamma(h, h)
#α ~ TDist(1)
#β ~ filldist(Uniform(-v, v), p)

halfcauchy = Truncated(TDist(1), 0, Inf)

τ ~ halfcauchy ## Global Shrinkage
λ ~ filldist(halfcauchy, p) ## Local Shrinkage
σ ~ halfcauchy
#α ~ Normal(0, τ * σ)
β0 = repeat([0], p) ## prior mean
β ~ MvNormal(β0, λ * τ *σ)


#z = α .+ X * β
z = X * β

## Link Function

#prob = Link.link.(z)
prob = Link.link_function.(z)

#likelihood
for i = 1:n
y[i] ~ Bernoulli(prob[i])
end
end
return logistic_reg(formula, data, Link, LogisticRegression, algorithm)
end
13 changes: 11 additions & 2 deletions test/numerical/bayesian/LogisticRegression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,19 @@ tests = [
]

for (prior, prior_testcases) in tests
# MCMC
for (link, test_mean) in prior_testcases
CRRao.set_rng(StableRNG(123))
model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior)
prediction = predict(model, turnout)
@test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction)
mcmc_prediction = predict(model, turnout)
@test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= test_mean && test_mean <= mean(mcmc_prediction) + 2 * std(mcmc_prediction)
end

# VI
for (link, test_mean) in prior_testcases
CRRao.set_rng(StableRNG(123))
model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior, VI())
vi_prediction = predict(model, turnout)
@test mean(vi_prediction) - 2 * std(vi_prediction) <= test_mean && test_mean <= mean(vi_prediction) + 2 * std(vi_prediction)
end
end
Loading