Add the option to ignore ConvergenceExceptions (JuliaStats#233)

The result of a non-converged model might still be interesting. However, since `ConvergenceExceptions` are thrown these partially fitted models are discarded. This commit adds a `omit_convergence_exception` argument to all relevant functions that allows the user to omit such exceptions and return the partially trained model. The exception is still thrown in the default case to preserve backwards compatibility.
joeldierkes · Sep 18, 2024 · 9b9bf6d · 9b9bf6d
1 parent b7dc36e
commit 9b9bf6d
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 14 deletions.
diff --git a/src/ica.jl b/src/ica.jl
@@ -107,6 +107,7 @@ Invoke the Fast ICA algorithm[^1].
 - `fun`: The approximate neg-entropy functor of type [`ICAGDeriv`](@ref).
 - `maxiter`: Maximum number of iterations.
 - `tol`: Tolerable change of `W` at convergence.
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge.
 
 Returns the updated `W`.
 
@@ -116,7 +117,8 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz
  X::DenseMatrix{T}, # (whitened) observation sample matrix, size(m, n)
  fun::ICAGDeriv, # approximate neg-entropy functor
  maxiter::Int, # maximum number of iterations
- tol::Real) where {T<:Real} # convergence tolerance
+ tol::Real, # convergence tolerance
+ omit_convergence_exception::Bool) where {T<:Real}
 
  # argument checking
  m = size(W, 1)
@@ -173,7 +175,11 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz
 
  @debug "Iteration $t" change=chg tolerance=tol
  end
- converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+ if !omit_convergence_exception && !converged
+ throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+ end
+
  return W
 end
 
@@ -194,6 +200,7 @@ while each column corresponds to an observation (*e.g* all signal value at a par
 - `do_whiten`: Whether to perform pre-whitening (*default* `true`)
 - `maxiter`: Maximum number of iterations (*default* `100`)
 - `tol`: Tolerable change of ``W`` at convergence (*default* `1.0e-6`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 - `mean`: The mean vector, which can be either of:
  - `0`: the input data has already been centralized
  - `nothing`: this function will compute the mean (*default*)
@@ -216,6 +223,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
  do_whiten::Bool=true, # whether to perform pre-whitening
  maxiter::Integer=100, # maximum number of iterations
  tol::Real=1.0e-6, # convergence tolerance
+ omit_convergence_exception::Bool=false,
  mean=nothing, # pre-computed mean
  winit::Matrix{T}=zeros(T,0,0) # init guess of W, size (m, k)
  ) where {T<:Real}
@@ -247,7 +255,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
  W = (isempty(winit) ? randn(T, size(Z,1), k) : copy(winit))
 
  # invoke core algorithm
- fastica!(W, Z, fun, maxiter, tol)
+ fastica!(W, Z, fun, maxiter, tol, omit_convergence_exception)
 
  # construct model
  if do_whiten

diff --git a/src/mmds.jl b/src/mmds.jl
@@ -113,6 +113,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
  - any two parameter disparity transformation function, where the first parameter is a vector of proximities (i.e. dissimilarities) and the second parameter is a vector of distances, e.g. `(p,d)->b*p` for some `b` is a transformation function for *ratio* MDS.
 - `tol`: Convergence tolerance (*default* `1.0e-3`)
 - `maxiter`: Maximum number of iterations (*default* `300`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 - `initial`: an initial reduced space point configuration
  - `nothing`: then an initial configuration is randomly generated (*default*)
  - pre-defined matrix
@@ -129,7 +130,8 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
  maxiter::Int = 300,
  initial::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
  weights::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
- distances::Bool) where {T<:Real}
+ distances::Bool,
+ omit_convergence_exception::Bool = false) where {T<:Real}
 
  # get distance matrix and space dimension
  Δ, d = if !distances
@@ -204,7 +206,10 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
  σ′ = σ
  i += 1
  end
- converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+ if !omit_convergence_exception && !converged
+ throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+ end
 
  MetricMDS(d, Z, σ′)
 end

diff --git a/src/ppca.jl b/src/ppca.jl
@@ -153,12 +153,14 @@ or an empty vector indicating a zero mean.
 
 Returns the resultant [`PPCA`](@ref) model.
 
-**Note:** This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
+**Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
+`maxiter` and `omit_convergence_exception`.
 """
 function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
  maxoutdim::Int=size(S,1)-1,
  tol::Real=1.0e-6, # convergence tolerance
- maxiter::Integer=1000) where {T<:Real}
+ maxiter::Integer=1000,
+ omit_convergence_exception::Bool = false) where {T<:Real}
 
  check_pcaparams(size(S,1), mean, maxoutdim, 1.)
 
@@ -197,7 +199,10 @@ function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
  L_old = L
  i += 1
  end
- converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+ if !omit_convergence_exception && !converged
+ throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+ end
 
  return PPCA(mean, W, σ²)
 end
@@ -217,14 +222,16 @@ or an empty vector indicating a zero mean.
 Returns the resultant [`PPCA`](@ref) model.
 
 **Notes:**
-- This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
+- **Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
+`maxiter` and `omit_convergence_exception`.
 - Function uses the `maxoutdim` parameter as an upper boundary when it automatically
 determines the latent space dimensionality.
 """
 function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
  maxoutdim::Int=size(S,1)-1,
  tol::Real=1.0e-6, # convergence tolerance
- maxiter::Integer=1000) where {T<:Real}
+ maxiter::Integer=1000,
+ omit_convergence_exception::Bool = false) where {T<:Real}
 
  check_pcaparams(size(S,1), mean, maxoutdim, 1.)
 
@@ -271,7 +278,10 @@ function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
  L_old = L
  i += 1
  end
- converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+ if !omit_convergence_exception && !converged
+ throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+ end
 
  return PPCA(mean, W[:,wnorm .> 0.], σ²)
 end
@@ -299,6 +309,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
  - a pre-computed mean vector
 - `tol`: Convergence tolerance (*default* `1.0e-6`)
 - `maxiter`: Maximum number of iterations (*default* `1000`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 
 **Notes:** This function calls [`ppcaml`](@ref), [`ppcaem`](@ref) or
 [`bayespca`](@ref) internally, depending on the choice of method.
@@ -308,7 +319,8 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
  maxoutdim::Int=size(X,1)-1,
  mean=nothing,
  tol::Real=1.0e-6, # convergence tolerance
- maxiter::Integer=1000) where {T<:Real}
+ maxiter::Integer=1000,
+ omit_convergence_exception::Bool = false) where {T<:Real}
 
  @assert !SparseArrays.issparse(X) "Use Kernel PCA for sparse arrays"
 
@@ -326,9 +338,25 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
  elseif method == :em || method == :bayes
  S = covm(X, isempty(mv) ? 0 : mv, 2)
  if method == :em
- M = ppcaem(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
+ M = ppcaem(
+ S,
+ mv,
+ n,
+ maxoutdim=maxoutdim,
+ tol=tol,
+ maxiter=maxiter,
+ omit_convergence_exception=omit_convergence_exception
+ )
  elseif method == :bayes
- M = bayespca(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
+ M = bayespca(
+ S,
+ mv,
+ n,
+ maxoutdim=maxoutdim,
+ tol=tol,
+ maxiter=maxiter,
+ omit_convergence_exception=omit_convergence_exception
+ )
  end
  else
  throw(ArgumentError("Invalid method name $(method)"))

diff --git a/test/ica.jl b/test/ica.jl
@@ -85,6 +85,7 @@ using StatsBase: ConvergenceException
  @test W'C * W ≈ Matrix(I, k, k)
 
  @test_throws ConvergenceException fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2)
+ _ = fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2, omit_convergence_exception=true)
 
  # Use data of different type
  XX = convert(Matrix{Float32}, X)

diff --git a/test/ppca.jl b/test/ppca.jl
@@ -113,6 +113,7 @@ import StatsBase
  @test P'P ≈ Matrix(I, 3, 3)
 
  @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:em, maxiter=1)
+ _ = fit(PPCA, X; method=:em, maxiter=1, omit_convergence_exception=true)
 
  # bayespca
  M0 = fit(PCA, X; mean=mval, maxoutdim = 3)
@@ -139,6 +140,7 @@ import StatsBase
  @test P'P ≈ Matrix(I, 2, 2)
 
  @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:bayes, maxiter=1)
+ _ = fit(PPCA, X; method=:bayes, maxiter=1, omit_convergence_exception=true)
 
  # Different data types
  # --------------------