Skip to content

Commit

Permalink
Add the option to ignore ConvergenceExceptions (JuliaStats#233)
Browse files Browse the repository at this point in the history
The result of a non-converged model might still be interesting. However, since
`ConvergenceExceptions` are thrown these partially fitted models are discarded.

This commit adds a `omit_convergence_exception` argument to all relevant
functions that allows the user to omit such exceptions and return the partially
trained model. The exception is still thrown in the default case to preserve
backwards compatibility.
  • Loading branch information
joeldierkes committed Sep 18, 2024
1 parent b7dc36e commit 9b9bf6d
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 14 deletions.
14 changes: 11 additions & 3 deletions src/ica.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Invoke the Fast ICA algorithm[^1].
- `fun`: The approximate neg-entropy functor of type [`ICAGDeriv`](@ref).
- `maxiter`: Maximum number of iterations.
- `tol`: Tolerable change of `W` at convergence.
- `omit_convergence_exception`: Whether to omit an exception if the function did not converge.
Returns the updated `W`.
Expand All @@ -116,7 +117,8 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz
X::DenseMatrix{T}, # (whitened) observation sample matrix, size(m, n)
fun::ICAGDeriv, # approximate neg-entropy functor
maxiter::Int, # maximum number of iterations
tol::Real) where {T<:Real} # convergence tolerance
tol::Real, # convergence tolerance
omit_convergence_exception::Bool) where {T<:Real}

# argument checking
m = size(W, 1)
Expand Down Expand Up @@ -173,7 +175,11 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz

@debug "Iteration $t" change=chg tolerance=tol
end
converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))

if !omit_convergence_exception && !converged
throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
end

return W
end

Expand All @@ -194,6 +200,7 @@ while each column corresponds to an observation (*e.g* all signal value at a par
- `do_whiten`: Whether to perform pre-whitening (*default* `true`)
- `maxiter`: Maximum number of iterations (*default* `100`)
- `tol`: Tolerable change of ``W`` at convergence (*default* `1.0e-6`)
- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
- `mean`: The mean vector, which can be either of:
- `0`: the input data has already been centralized
- `nothing`: this function will compute the mean (*default*)
Expand All @@ -216,6 +223,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
do_whiten::Bool=true, # whether to perform pre-whitening
maxiter::Integer=100, # maximum number of iterations
tol::Real=1.0e-6, # convergence tolerance
omit_convergence_exception::Bool=false,
mean=nothing, # pre-computed mean
winit::Matrix{T}=zeros(T,0,0) # init guess of W, size (m, k)
) where {T<:Real}
Expand Down Expand Up @@ -247,7 +255,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
W = (isempty(winit) ? randn(T, size(Z,1), k) : copy(winit))

# invoke core algorithm
fastica!(W, Z, fun, maxiter, tol)
fastica!(W, Z, fun, maxiter, tol, omit_convergence_exception)

# construct model
if do_whiten
Expand Down
9 changes: 7 additions & 2 deletions src/mmds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
- any two parameter disparity transformation function, where the first parameter is a vector of proximities (i.e. dissimilarities) and the second parameter is a vector of distances, e.g. `(p,d)->b*p` for some `b` is a transformation function for *ratio* MDS.
- `tol`: Convergence tolerance (*default* `1.0e-3`)
- `maxiter`: Maximum number of iterations (*default* `300`)
- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
- `initial`: an initial reduced space point configuration
- `nothing`: then an initial configuration is randomly generated (*default*)
- pre-defined matrix
Expand All @@ -129,7 +130,8 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
maxiter::Int = 300,
initial::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
weights::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
distances::Bool) where {T<:Real}
distances::Bool,
omit_convergence_exception::Bool = false) where {T<:Real}

# get distance matrix and space dimension
Δ, d = if !distances
Expand Down Expand Up @@ -204,7 +206,10 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
σ′ = σ
i += 1
end
converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))

if !omit_convergence_exception && !converged
throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
end

MetricMDS(d, Z, σ′)
end
Expand Down
46 changes: 37 additions & 9 deletions src/ppca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,14 @@ or an empty vector indicating a zero mean.
Returns the resultant [`PPCA`](@ref) model.
**Note:** This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
**Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
`maxiter` and `omit_convergence_exception`.
"""
function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
maxoutdim::Int=size(S,1)-1,
tol::Real=1.0e-6, # convergence tolerance
maxiter::Integer=1000) where {T<:Real}
maxiter::Integer=1000,
omit_convergence_exception::Bool = false) where {T<:Real}

check_pcaparams(size(S,1), mean, maxoutdim, 1.)

Expand Down Expand Up @@ -197,7 +199,10 @@ function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
L_old = L
i += 1
end
converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))

if !omit_convergence_exception && !converged
throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
end

return PPCA(mean, W, σ²)
end
Expand All @@ -217,14 +222,16 @@ or an empty vector indicating a zero mean.
Returns the resultant [`PPCA`](@ref) model.
**Notes:**
- This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
- **Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
`maxiter` and `omit_convergence_exception`.
- Function uses the `maxoutdim` parameter as an upper boundary when it automatically
determines the latent space dimensionality.
"""
function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
maxoutdim::Int=size(S,1)-1,
tol::Real=1.0e-6, # convergence tolerance
maxiter::Integer=1000) where {T<:Real}
maxiter::Integer=1000,
omit_convergence_exception::Bool = false) where {T<:Real}

check_pcaparams(size(S,1), mean, maxoutdim, 1.)

Expand Down Expand Up @@ -271,7 +278,10 @@ function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
L_old = L
i += 1
end
converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))

if !omit_convergence_exception && !converged
throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
end

return PPCA(mean, W[:,wnorm .> 0.], σ²)
end
Expand Down Expand Up @@ -299,6 +309,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
- a pre-computed mean vector
- `tol`: Convergence tolerance (*default* `1.0e-6`)
- `maxiter`: Maximum number of iterations (*default* `1000`)
- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
**Notes:** This function calls [`ppcaml`](@ref), [`ppcaem`](@ref) or
[`bayespca`](@ref) internally, depending on the choice of method.
Expand All @@ -308,7 +319,8 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
maxoutdim::Int=size(X,1)-1,
mean=nothing,
tol::Real=1.0e-6, # convergence tolerance
maxiter::Integer=1000) where {T<:Real}
maxiter::Integer=1000,
omit_convergence_exception::Bool = false) where {T<:Real}

@assert !SparseArrays.issparse(X) "Use Kernel PCA for sparse arrays"

Expand All @@ -326,9 +338,25 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
elseif method == :em || method == :bayes
S = covm(X, isempty(mv) ? 0 : mv, 2)
if method == :em
M = ppcaem(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
M = ppcaem(
S,
mv,
n,
maxoutdim=maxoutdim,
tol=tol,
maxiter=maxiter,
omit_convergence_exception=omit_convergence_exception
)
elseif method == :bayes
M = bayespca(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
M = bayespca(
S,
mv,
n,
maxoutdim=maxoutdim,
tol=tol,
maxiter=maxiter,
omit_convergence_exception=omit_convergence_exception
)
end
else
throw(ArgumentError("Invalid method name $(method)"))
Expand Down
1 change: 1 addition & 0 deletions test/ica.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ using StatsBase: ConvergenceException
@test W'C * W Matrix(I, k, k)

@test_throws ConvergenceException fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2)
_ = fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2, omit_convergence_exception=true)

# Use data of different type
XX = convert(Matrix{Float32}, X)
Expand Down
2 changes: 2 additions & 0 deletions test/ppca.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ import StatsBase
@test P'P Matrix(I, 3, 3)

@test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:em, maxiter=1)
_ = fit(PPCA, X; method=:em, maxiter=1, omit_convergence_exception=true)

# bayespca
M0 = fit(PCA, X; mean=mval, maxoutdim = 3)
Expand All @@ -139,6 +140,7 @@ import StatsBase
@test P'P Matrix(I, 2, 2)

@test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:bayes, maxiter=1)
_ = fit(PPCA, X; method=:bayes, maxiter=1, omit_convergence_exception=true)

# Different data types
# --------------------
Expand Down

0 comments on commit 9b9bf6d

Please sign in to comment.