diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5b67a64..f5c5a80 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -20,7 +20,7 @@ jobs: matrix: version: - '1' - - '1.10' + - 'lts' os: - ubuntu-latest - macOS-latest @@ -29,7 +29,7 @@ jobs: - x64 steps: - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} diff --git a/docs/src/gibbs_polar.md b/docs/src/gibbs_polar.md index 56e48ca..d87a4d1 100644 --- a/docs/src/gibbs_polar.md +++ b/docs/src/gibbs_polar.md @@ -8,7 +8,6 @@ Unlike other slice sampling algorithms, it operates a Gibbs sampler over polar c Due to the involvement of polar coordinates, GPSS only works reliably on more than one dimension. However, unlike ESS, GPSS is applicable to any target distribution. - ## Description For a $$d$$-dimensional target distribution $$\pi$$, GPSS utilizes the following augmented target distribution: ```math @@ -34,26 +33,19 @@ The Gibbs steps on $$\theta$$ and $$r$$ are implemented through specialized shri The only tunable parameter of the algorithm is the size of the search interval (window) of the shrinkage sampler for the radius variable $$r$$. +!!! warning + A limitation of the current implementation of GPSS is that the acceptance rate exhibits a heavy tail. That is, occasionally, a single transition might take an excessive amount of time. + !!! info The kernel corresponding to this sampler is defined on an **augmented state space** and cannot directly perform a transition on $$x$$. This also means that the corresponding kernel is not reversible with respect to $$x$$. ## Interface -!!! info - By the nature of polar coordinates, GPSS only works reliably for targets with dimension at least $$d \geq 2$$. - ```@docs GibbsPolarSlice ``` -!!! warning - When initializing the chain (*e.g.* the `initial_params` keyword arguments in `AbstractMCMC.sample`), it is necessary to inialize from a point $$x_0$$ that has a sensible norm $$\lVert x_0 \rVert > 0$$, otherwise, the chain will start from a pathologic point in polar coordinates. This might even result in the sampler getting stuck in an infinite loop. (This can be prevented by setting `max_proposals`.) If $$\lVert x_0 \rVert \leq 10^{-5}$$, the current implementation will display a warning. - -!!! info - For Turing users: `Turing` might change `initial_params` to match the support of the posterior. This might lead to $$\lVert x_0 \rVert$$ being small, even though the vector you passed to`initial_params` has a sufficiently large norm. If this is suspected, simply try a different initialization value. - - ## Demonstration As illustrated in the original paper, GPSS shows good performance on heavy-tailed targets despite being a multivariate slice sampler. Consider a 10-dimensional Student-$$t$$ target with 1-degree of freedom (this corresponds to a multivariate Cauchy): diff --git a/src/SliceSampling.jl b/src/SliceSampling.jl index ad9f768..cb1c82e 100644 --- a/src/SliceSampling.jl +++ b/src/SliceSampling.jl @@ -20,6 +20,8 @@ export sample, MCMCThreads, MCMCDistributed, MCMCSerial # Interfaces abstract type AbstractSliceSampling <: AbstractMCMC.AbstractSampler end +const DEFAULT_MAX_PROPOSALS = 10_000 + """ struct Transition @@ -52,10 +54,9 @@ Return the initial sample for the `model` using the random number generator `rng """ function initial_sample(::Random.AbstractRNG, ::Any) error( - "`initial_sample` is not implemented but an initialization wasn't provided. " * + "`initial_sample` is not implemented but an initialization wasn't provided. ", "Consider supplying an initialization to `initial_params`." ) - println("fuck!!!") end # If target is from `LogDensityProblemsAD`, unwrap target before calling `initial_sample`. @@ -66,10 +67,16 @@ initial_sample( ) = initial_sample(rng, parent(wrap)) function exceeded_max_prop(max_prop::Int) - error("Exceeded maximum number of proposal $(max_prop).\n", - "Here are possible causes:\n", - "- The model might be broken or pathologic.\n", - "- There might be a bug in the sampler.") + error("Exceeded maximum number of proposal $(max_prop), ", + "which indicates an acceptance rate less than $(1/max_prop*100)%. ", + "A quick fix is to increase `max_prop`, ", + "but an acceptance rate that is too low often indicates that there is a problem. ", + "Here are some possible causes:\n", + "- The model might be broken or degenerate (most likely cause).\n", + "- The tunable parameters of the sampler are suboptimal.\n", + "- The initialization is pathologic. (try supplying a (different) `initial_params`)\n", + "- There might be a bug in the sampler. (if this is suspected, file an issue to `SliceSampling`)\n" + ) end ## Univariate Slice Sampling Algorithms diff --git a/src/multivariate/gibbspolar.jl b/src/multivariate/gibbspolar.jl index 857e517..9600b03 100644 --- a/src/multivariate/gibbspolar.jl +++ b/src/multivariate/gibbspolar.jl @@ -9,14 +9,26 @@ Gibbsian polar slice sampling algorithm by P. Schär, M. Habeck, and D. Rudolf [ # Keyword Arguments - `w::Real`: Initial window size for the radius shrinkage procedure -- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `$(DEFAULT_MAX_PROPOSALS)`). + +!!! info + By the nature of polar coordinates, GPSS only works reliably for targets with dimension at least \$\$d \\geq 2\$\$. + +!!! info + The initial window size `w` must be set at least an order of magnitude larger than what is sensible for other slice samplers. Otherwise, a large number of rejections might be experienced. + +!!! warning + When initializing the chain (*e.g.* the `initial_params` keyword arguments in `AbstractMCMC.sample`), it is necessary to inialize from a point \$\$x_0\$\$ that has a sensible norm \$\$\\lVert x_0 \\rVert > 0\$\$, otherwise, the chain will start from a pathologic point in polar coordinates. This might even result in the sampler getting stuck in an infinite loop. (This can be prevented by setting `max_proposals`.) If \$\$\\lVert x_0 \\rVert \\leq 10^{-5}\$\$, the current implementation will display a warning. + +!!! info + For Turing users: `Turing` might change `initial_params` to match the support of the posterior. This might lead to \$\$\\lVert x_0 \\rVert\$\$ being small, even though the vector you passed to`initial_params` has a sufficiently large norm. If this is suspected, simply try a different initialization value. """ struct GibbsPolarSlice{W <: Real} <: AbstractMultivariateSliceSampling w::W max_proposals::Int end -GibbsPolarSlice(w::Real; max_proposals::Int = typemax(Int)) = GibbsPolarSlice(w, max_proposals) +GibbsPolarSlice(w::Real; max_proposals::Int = DEFAULT_MAX_PROPOSALS) = GibbsPolarSlice(w, max_proposals) struct GibbsPolarSliceState{T <: Transition, R <: Real, D <: AbstractVector} "Current [`Transition`](@ref)." diff --git a/src/multivariate/latent.jl b/src/multivariate/latent.jl index 0e93622..d3f7051 100644 --- a/src/multivariate/latent.jl +++ b/src/multivariate/latent.jl @@ -8,14 +8,14 @@ Latent slice sampling algorithm by Li and Walker[^LW2023]. - `beta::Real`: Beta parameter of the Gamma distribution of the auxiliary variables. # Keyword Arguments -- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `$(DEFAULT_MAX_PROPOSALS)`). """ struct LatentSlice{B <: Real} <: AbstractMultivariateSliceSampling beta ::B max_proposals::Int end -function LatentSlice(beta::Real; max_proposals::Int = typemax(Int)) +function LatentSlice(beta::Real; max_proposals::Int = DEFAULT_MAX_PROPOSALS) @assert beta > 0 "Beta must be strictly positive" LatentSlice(beta, max_proposals) end diff --git a/src/univariate/doublingout.jl b/src/univariate/doublingout.jl index 1e6a8fc..3a9155e 100644 --- a/src/univariate/doublingout.jl +++ b/src/univariate/doublingout.jl @@ -9,7 +9,7 @@ Univariate slice sampling by automatically adapting the initial interval through # Keyword Arguments - `max_doubling_out`: Maximum number of "doubling outs" (default: 8). -- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `$(DEFAULT_MAX_PROPOSALS)`). """ struct SliceDoublingOut{W <: Real} <: AbstractUnivariateSliceSampling window ::W @@ -20,7 +20,7 @@ end function SliceDoublingOut( window ::Real; max_doubling_out::Int = 8, - max_proposals ::Int = typemax(Int), + max_proposals ::Int = DEFAULT_MAX_PROPOSALS, ) @assert window > 0 SliceDoublingOut(window, max_doubling_out, max_proposals) diff --git a/src/univariate/fixedinterval.jl b/src/univariate/fixedinterval.jl index 4c075f6..b679ec3 100644 --- a/src/univariate/fixedinterval.jl +++ b/src/univariate/fixedinterval.jl @@ -8,7 +8,7 @@ Univariate slice sampling with a fixed initial interval (Scheme 2 by Neal[^N2003 - `window::Real`: Proposal window. # Keyword Arguments -- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `$(DEFAULT_MAX_PROPOSALS)`). """ struct Slice{W <: Real} <: AbstractUnivariateSliceSampling window ::W @@ -17,7 +17,7 @@ end function Slice( window ::Real; - max_proposals::Int = typemax(Int), + max_proposals::Int = DEFAULT_MAX_PROPOSALS, ) @assert window > 0 Slice(window, max_proposals) diff --git a/src/univariate/steppingout.jl b/src/univariate/steppingout.jl index 6ab9867..88d578e 100644 --- a/src/univariate/steppingout.jl +++ b/src/univariate/steppingout.jl @@ -9,7 +9,7 @@ Univariate slice sampling by automatically adapting the initial interval through # Keyword Arguments - `max_stepping_out::Int`: Maximum number of "stepping outs" (default: 32). -- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `typemax(Int)`). +- `max_proposals::Int`: Maximum number of proposals allowed until throwing an error (default: `$(DEFAULT_MAX_PROPOSALS)`). """ struct SliceSteppingOut{W <: Real} <: AbstractUnivariateSliceSampling window ::W @@ -20,7 +20,7 @@ end function SliceSteppingOut( window ::Real; max_stepping_out::Int = 32, - max_proposals ::Int = typemax(Int), + max_proposals ::Int = DEFAULT_MAX_PROPOSALS, ) @assert window > 0 SliceSteppingOut(window, max_stepping_out, max_proposals) diff --git a/test/multivariate.jl b/test/multivariate.jl index d91233b..910fe63 100644 --- a/test/multivariate.jl +++ b/test/multivariate.jl @@ -78,7 +78,7 @@ end LatentSlice(5), # Gibbsian polar slice sampling - GibbsPolarSlice(10), + GibbsPolarSlice(100), ] @testset "initial_params" begin model = MultiModel(1.0, 1.0, [0.0]) @@ -87,7 +87,7 @@ end θ0 = [1.0, 0.1] chain = sample( - model, + model′, sampler, 10; initial_params=θ0,