Skip to content

Commit

Permalink
Update to StatsModels 0.7 (#51)
Browse files Browse the repository at this point in the history
* Update Project.toml

* Update formula.jl

* up

* Update Project.toml

* update

* Update ci.yml

* Update .gitignore

* up
  • Loading branch information
matthieugomez authored Mar 15, 2023
1 parent 80b4926 commit 58a8f70
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 110 deletions.
Binary file removed .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.3' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
- '1.6' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
os:
- ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ src/other/*
*.synctex.gz
examples/runbenchmark.jl
.DS_Store
.DS_Store
.DS_Store
8 changes: 5 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "InteractiveFixedEffectModels"
uuid = "80307280-efb2-5c5d-af8b-a9c15821677b"
version = "1.1.8"
version = "1.2.0"

[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand All @@ -13,6 +13,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Expand All @@ -27,12 +28,13 @@ FixedEffects = "2"
GroupedArrays = "0.3"
LeastSquaresOptim = "0.7, 0.8"
Reexport = "0.2, 1"
StatsAPI = "1"
StatsBase = "0.33"
StatsModels = "0.6"
StatsModels = "0.7"
StatsFuns = "0.9, 1"
Tables = "1"
Vcov = "0.5, 0.6, 0.7"
julia = "1.3"
julia = "1.6"

[extras]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
1 change: 1 addition & 0 deletions src/InteractiveFixedEffectModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ using LeastSquaresOptim
using LinearAlgebra
using Printf
using Statistics
using StatsAPI
using StatsBase
using StatsFuns
using StatsModels
Expand Down
17 changes: 9 additions & 8 deletions src/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

function regife(
@nospecialize(df),
@nospecialize(f::FormulaTerm),
@nospecialize(formula::FormulaTerm),
@nospecialize(vcov::CovarianceEstimator = Vcov.simple());
@nospecialize(weights::Union{Symbol, Nothing} = nothing),
@nospecialize(subset::Union{AbstractVector, Nothing} = nothing),
Expand All @@ -18,13 +18,15 @@ function regife(
## Transform DataFrame -> Matrix
##
##############################################################################
formula_origin = formula

df = DataFrame(df; copycols = false)
if (ConstantTerm(0) eachterm(f.rhs)) & (ConstantTerm(1) eachterm(f.rhs))
formula = FormulaTerm(f.lhs, tuple(ConstantTerm(1), eachterm(f.rhs)...))
if (ConstantTerm(0) eachterm(formula.rhs)) & (ConstantTerm(1) eachterm(formula.rhs))
formula = FormulaTerm(formula.lhs, tuple(ConstantTerm(1), eachterm(formula.rhs)...))
end


m, formula = parse_interactivefixedeffect(df, f)
m, formula = parse_interactivefixedeffect(df, formula)
has_weights = (weights != nothing)


Expand Down Expand Up @@ -112,11 +114,11 @@ function regife(


# Compute demeaned X
yname, coef_names = coefnames(formula_schema)
responsename, coef_names = coefnames(formula_schema)
if !isa(coef_names, Vector)
coef_names = [coef_names]
end
yname = Symbol(yname)
responsename = Symbol(responsename)
coef_names = Symbol.(coef_names)


Expand Down Expand Up @@ -278,7 +280,6 @@ function regife(
return FactorResult(esample, augmentdf, rss, iterations, converged)
else
return InteractiveFixedEffectModel(fs.b, matrix_vcov, vcov, esample, augmentdf,
coef_names, yname, f, nobs, dof_residual, r2, r2_a, r2_within,
rss, sum(iterations), all(converged))
coef_names, responsename, formula_origin, formula, nobs, dof_residual, rss, tss_total, r2, r2_a, r2_within, sum(iterations), all(converged))
end
end
172 changes: 76 additions & 96 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function parse_interactivefixedeffect(df::AbstractDataFrame, formula::FormulaTer
m = nothing
for term in FixedEffectModels.eachterm(formula.rhs)
if term isa FunctionTerm{typeof(ife)}
m = InteractiveFixedEffectTerm(term.args_parsed[1].sym, term.args_parsed[2].sym, term.args_parsed[3].n)
m = InteractiveFixedEffectTerm(term.args[1].sym, term.args[2].sym, term.args[3].n)
elseif term isa InteractiveFixedEffectTerm
m = term
end
Expand Down Expand Up @@ -255,30 +255,38 @@ struct InteractiveFixedEffectModel <: RegressionModel
augmentdf::DataFrame

coefnames::Vector # Name of coefficients
yname::Symbol # Name of dependent variable
formula::FormulaTerm # Original formula
responsename::Symbol # Name of dependent variable
formula::FormulaTerm # Original formula
formula_schema::FormulaTerm # Schema for predict

nobs::Int64 # Number of observations
dof_residual::Int64 # degree of freedoms

rss::Float64
tss::Float64
r2::Float64 # R squared
adjr2::Float64 # R squared adjusted
r2_within::Float64 # R within

rss::Float64
iterations::Int # Number of iterations
converged::Bool # Has the demeaning algorithm converged?

end
StatsBase.coef(x::InteractiveFixedEffectModel) = x.coef
StatsBase.coefnames(x::InteractiveFixedEffectModel) = x.coefnames
StatsBase.vcov(x::InteractiveFixedEffectModel) = x.vcov
StatsBase.nobs(x::InteractiveFixedEffectModel) = x.nobs
StatsBase.dof_residual(x::InteractiveFixedEffectModel) = x.dof_residual
StatsBase.r2(x::InteractiveFixedEffectModel) = x.r2
StatsBase.adjr2(x::InteractiveFixedEffectModel) = x.adjr2
StatsBase.islinear(x::InteractiveFixedEffectModel) = false
StatsBase.rss(x::InteractiveFixedEffectModel) = x.rss
StatsAPI.coef(x::InteractiveFixedEffectModel) = x.coef
StatsAPI.coefnames(x::InteractiveFixedEffectModel) = x.coefnames
StatsAPI.responsename(m::InteractiveFixedEffectModel) = m.responsename
StatsAPI.vcov(x::InteractiveFixedEffectModel) = x.vcov
StatsAPI.nobs(x::InteractiveFixedEffectModel) = x.nobs
StatsAPI.dof_residual(x::InteractiveFixedEffectModel) = x.dof_residual
StatsAPI.r2(x::InteractiveFixedEffectModel) = x.r2
StatsAPI.adjr2(x::InteractiveFixedEffectModel) = x.adjr2
StatsAPI.islinear(x::InteractiveFixedEffectModel) = false
StatsAPI.deviance(x::InteractiveFixedEffectModel) = x.tss
StatsAPI.rss(x::InteractiveFixedEffectModel) = x.rss
StatsAPI.mss(m::InteractiveFixedEffectModel) = deviance(m) - rss(m)
StatsModels.formula(m::InteractiveFixedEffectModel) = m.formula_schema


StatsBase.predict(::InteractiveFixedEffectModel, ::AbstractDataFrame) = error("predict is not defined for linear factor models. Use the option save = true")
StatsBase.residuals(::InteractiveFixedEffectModel, ::AbstractDataFrame) = error("residuals is not defined for linear factor models. Use the option save = true")
function StatsBase.confint(x::InteractiveFixedEffectModel; level::Real = 0.95)
Expand Down Expand Up @@ -316,97 +324,69 @@ title(::InteractiveFixedEffectModel) = "Interactive Fixed Effect Model"
top(x::InteractiveFixedEffectModel) = [
"Number of obs" sprint(show, nobs(x); context=:compact => true);
"Degree of freedom" sprint(show, nobs(x) - dof_residual(x); context=:compact => true);
"R2" @sprintf("%.3f", x.r2);
"R2 within" @sprintf("%.3f", x.r2_within);
"" @sprintf("%.3f", x.r2);
" within" @sprintf("%.3f", x.r2_within);
"Iterations" sprint(show, x.iterations; context=:compact => true);
"Converged" sprint(show, x.converged; context=:compact => true)
]

format_scientific(x) = @sprintf("%.3f", x)

function Base.show(io::IO, x::InteractiveFixedEffectModel)
ctitle = title(x)
ctop = top(x)
cc = coef(x)
se = stderror(x)
coefnms = coefnames(x)
conf_int = confint(x)
# put (intercept) last
if !isempty(coefnms) && ((coefnms[1] == Symbol("(Intercept)")) || (coefnms[1] == "(Intercept)"))
newindex = vcat(2:length(cc), 1)
cc = cc[newindex]
se = se[newindex]
conf_int = conf_int[newindex, :]
coefnms = coefnms[newindex]
end
tt = cc ./ se
mat = hcat(cc, se, tt, fdistccdf.(Ref(1), Ref(dof_residual(x)), abs2.(tt)), conf_int[:, 1:2])
nr, nc = size(mat)
colnms = ["Estimate","Std.Error","t value", "Pr(>|t|)", "Lower 95%", "Upper 95%"]
rownms = ["$(coefnms[i])" for i = 1:length(cc)]
pvc = 4


# print
import StatsBase: NoQuote, PValue
function Base.show(io::IO, m::InteractiveFixedEffectModel)
ct = coeftable(m)
#copied from show(iio,cf::Coeftable)
cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms;
nc = length(cols)
nr = length(cols[1])
if length(rownms) == 0
rownms = AbstractString[lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr]
end
if length(rownms) > 0
rnwidth = max(4,maximum([length(nm) for nm in rownms]) + 1)
else
# if only intercept, rownms is empty collection, so previous would return error
rnwidth = 4
rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr]
end
rownms = [rpad(nm,rnwidth) for nm in rownms]
widths = [length(cn)::Int for cn in colnms]
str = [sprint(show, mat[i,j]; context=:compact => true) for i in 1:nr, j in 1:nc]
if pvc != 0 # format the p-values column
for i in 1:nr
str[i, pvc] = format_scientific(mat[i, pvc])
end
mat = [j == 1 ? NoQuote(rownms[i]) :
j-1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j-1][i]))) :
j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) :
cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i]
for i in 1:nr, j in 1:nc+1]
io = IOContext(io, :compact=>true, :limit=>false)
A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2),
typemax(Int), typemax(Int), 3)
nmswidths = pushfirst!(length.(colnms), 0)
A = [nmswidths[i] > sum(A[i]) ? (A[i][1]+nmswidths[i]-sum(A[i]), A[i][2]) : A[i]
for i in 1:length(A)]
totwidth = sum(sum.(A)) + 2 * (length(A) - 1)


#intert my stuff which requires totwidth
ctitle = string(typeof(m))
halfwidth = div(totwidth - length(ctitle), 2)
print(io, " " ^ halfwidth * ctitle * " " ^ halfwidth)
ctop = top(m)
for i in 1:size(ctop, 1)
ctop[i, 1] = ctop[i, 1] * ":"
end
for j in 1:nc
for i in 1:nr
lij = length(str[i, j])
if lij > widths[j]
widths[j] = lij
end
println(io, '\n', repeat('=', totwidth))
halfwidth = div(totwidth, 2) - 1
interwidth = 2 + mod(totwidth, 2)
for i in 1:(div(size(ctop, 1) - 1, 2)+1)
print(io, ctop[2*i-1, 1])
print(io, lpad(ctop[2*i-1, 2], halfwidth - length(ctop[2*i-1, 1])))
print(io, " " ^interwidth)
if size(ctop, 1) >= 2*i
print(io, ctop[2*i, 1])
print(io, lpad(ctop[2*i, 2], halfwidth - length(ctop[2*i, 1])))
end
println(io)
end
widths .+= 1
totalwidth = sum(widths) + rnwidth
if length(ctitle) > 0
halfwidth = div(totalwidth - length(ctitle), 2)
println(io, " " ^ halfwidth * string(ctitle) * " " ^ halfwidth)

# rest of coeftable code
println(io, repeat('=', totwidth))
print(io, repeat(' ', sum(A[1])))
for j in 1:length(colnms)
print(io, " ", lpad(colnms[j], sum(A[j+1])))
end
if length(ctop) > 0
for i in 1:size(ctop, 1)
ctop[i, 1] = ctop[i, 1] * ":"
end
println(io, "=" ^totalwidth)
halfwidth = div(totalwidth, 2) - 1
interwidth = 2 + mod(totalwidth, 2)
for i in 1:(div(size(ctop, 1) - 1, 2)+1)
print(io, ctop[2*i-1, 1])
print(io, lpad(ctop[2*i-1, 2], halfwidth - length(ctop[2*i-1, 1])))
print(io, " " ^interwidth)
if size(ctop, 1) >= 2*i
print(io, ctop[2*i, 1])
print(io, lpad(ctop[2*i, 2], halfwidth - length(ctop[2*i, 1])))
end
println(io)
end
println(io, '\n', repeat('', totwidth))
for i in 1:size(mat, 1)
Base.print_matrix_row(io, mat, A, i, 1:size(mat, 2), " ")
i != size(mat, 1) && println(io)
end
println(io,"=" ^totalwidth)
println(io," " ^ rnwidth *
join([lpad(string(colnms[i]), widths[i]) for i = 1:nc], ""))
println(io,"-" ^totalwidth)
for i in 1:nr
print(io, rownms[i])
for j in 1:nc
print(io, lpad(str[i,j],widths[j]))
end
println(io)
end
println(io,"=" ^totalwidth)
end
println(io, '\n', repeat('=', totwidth))
nothing
end
3 changes: 1 addition & 2 deletions src/utils/formula.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@

eachterm(@nospecialize(x::AbstractTerm)) = (x,)
eachterm(@nospecialize(x::NTuple{N, AbstractTerm})) where {N} = x
TermOrTerms = Union{AbstractTerm, NTuple{N, AbstractTerm} where N}

##############################################################################
##
## Parse FixedEffect
##
##############################################################################
fesymbol(t::FixedEffectModels.FixedEffectTerm) = t.x
fesymbol(t::FunctionTerm{typeof(fe)}) = Symbol(t.args_parsed[1])
fesymbol(t::FunctionTerm{typeof(fe)}) = Symbol(t.args[1])

2 comments on commit 58a8f70

@matthieugomez
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/79672

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.2.0 -m "<description of version>" 58a8f702ad59f0cb37e0c38e1a4b85bcd42fc6b5
git push origin v1.2.0

Please sign in to comment.