Skip to content

Commit

Permalink
Merge pull request #133 from TuringLang/tor/improvements
Browse files Browse the repository at this point in the history
General improvements and fixes
  • Loading branch information
yebai authored Dec 4, 2022
2 parents cb07737 + 8a6b47e commit 1fdea5f
Show file tree
Hide file tree
Showing 21 changed files with 1,126 additions and 581 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
/Manifest.toml
Manifest.toml
*.DS_Store
*.png
deprecated
working_code
17 changes: 10 additions & 7 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ version = "0.1.1"

[deps]
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112"
ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"

[compat]
AbstractMCMC = "3.2"
AbstractMCMC = "3.2, 4"
ConcreteStructs = "0.2"
Distributions = "0.24, 0.25"
DocStringExtensions = "0.8, 0.9"
InverseFunctions = "0.1"
Setfield = "0.7, 0.8, 1"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
28 changes: 23 additions & 5 deletions src/MCMCTempering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,30 @@ import AbstractMCMC
import Distributions
import Random

using ProgressLogging: ProgressLogging
using ConcreteStructs: @concrete
using Setfield: @set, @set!

using InverseFunctions

using DocStringExtensions

include("adaptation.jl")
include("tempered.jl")
include("swapping.jl")
include("state.jl")
include("sampler.jl")
include("sampling.jl")
include("ladders.jl")
include("stepping.jl")
include("model.jl")
include("swapping.jl")
include("plotting.jl")

export tempered, TemperedSampler, plot_swaps, plot_ladders, make_tempered_model, get_tempered_loglikelihoods_and_params, make_tempered_loglikelihood, get_params
export tempered,
tempered_sample,
TemperedSampler,
make_tempered_model,
StandardSwap,
RandomPermutationSwap,
NonReversibleSwap

function AbstractMCMC.bundle_samples(
ts::Vector,
Expand All @@ -22,7 +37,10 @@ function AbstractMCMC.bundle_samples(
chain_type::Type;
kwargs...
)
AbstractMCMC.bundle_samples(ts, model, sampler.internal_sampler, state, chain_type; kwargs...)
AbstractMCMC.bundle_samples(
ts, model, sampler_for_chain(sampler, state, 1), state_for_chain(state, 1), chain_type;
kwargs...
)
end

end
242 changes: 214 additions & 28 deletions src/adaptation.jl
Original file line number Diff line number Diff line change
@@ -1,54 +1,240 @@
using Distributions: StatsFuns

struct PolynomialStep
η :: Real
c :: Real
@concrete struct PolynomialStep
η
c
end
function get(step::PolynomialStep, k::Real)
step.c * (k + 1.) ^ (-step.η)
return step.c * (k + 1) ^ (-step.η)
end

"""
Geometric
struct AdaptiveState
swap_target_ar :: Real
scale :: Base.RefValue{<:Real}
step :: PolynomialStep
Specifies a geometric schedule for the inverse temperatures.
See also: [`AdaptiveState`](@ref), [`update_inverse_temperatures`](@ref), and
[`weight`](@ref).
"""
struct Geometric end

defaultscale(::Geometric, Δ) = eltype(Δ)(0.9)

"""
InverselyAdditive
Specifies an additive schedule for the temperatures (not _inverse_ temperatures).
See also: [`AdaptiveState`](@ref), [`update_inverse_temperatures`](@ref), and
[`weight`](@ref).
"""
struct InverselyAdditive end

defaultscale(::InverselyAdditive, Δ) = eltype(Δ)(0.9)

struct AdaptiveState{S,T1<:Real,T2<:Real,P<:PolynomialStep}
schedule_type::S
swap_target_ar::T1
scale_unconstrained::T2
step::P
n::Int
end
function AdaptiveState(swap_target::Real, scale::Real, step::PolynomialStep)
AdaptiveState(swap_target, Ref(log(scale)), step)

function AdaptiveState(swap_target_ar, scale_unconstrained, step)
return AdaptiveState(InverselyAdditive(), swap_target_ar, scale_unconstrained, step)
end

function AdaptiveState(schedule_type, swap_target_ar, scale_unconstrained, step)
return AdaptiveState(schedule_type, swap_target_ar, scale_unconstrained, step, 1)
end

"""
weight(ρ::AdaptiveState{<:Geometric})
Return the weight/scale to be used in the mapping `β[ℓ] ↦ β[ℓ + 1]`.
# Notes
In Eq. (13) in [^MIAS12] they use the relation
β[ℓ + 1] = β[ℓ] * w(ρ)
with
w(ρ) = exp(-exp(ρ))
because we want `w(ρ) ∈ (0, 1)` while `ρ ∈ ℝ`. As an alternative, we use
`StatsFuns.logistic(ρ)` which is numerically more stable than `exp(-exp(ρ))` and
leads to less extreme values, i.e. 0 or 1.
This the same approach as mentioned in [^ATCH11].
# References
[^MIAS12]: Miasojedow, B., Moulines, E., & Vihola, M., Adaptive Parallel Tempering Algorithm, (2012).
[^ATCH11]: Atchade, Yves F, Roberts, G. O., & Rosenthal, J. S., Towards optimal scaling of metropolis-coupled markov chain monte carlo, Statistics and Computing, 21(4), 555–568 (2011).
"""
weight::AdaptiveState{<:Geometric}) = geometric_weight_constrain.scale_unconstrained)
geometric_weight_constrain(x) = StatsFuns.logistic(x)
geometric_weight_unconstrain(y) = inverse(StatsFuns.logistic)(y)

"""
weight(ρ::AdaptiveState{<:InverselyAdditive})
Return the weight/scale to be used in the mapping `β[ℓ] ↦ β[ℓ + 1]`.
"""
weight::AdaptiveState{<:InverselyAdditive}) = inversely_additive_weight_constrain.scale_unconstrained)
inversely_additive_weight_constrain(x) = exp(-x)
inversely_additive_weight_unconstrain(y) = -log(y)

function init_adaptation(
schedule::InverselyAdditive,
Δ::Vector{<:Real},
swap_target::Real,
scale::Real,
η::Real,
stepsize::Real
)
N_it = length(Δ)
step = PolynomialStep(η, stepsize)
# TODO: One common state or one per temperature?
# ρs = [
# AdaptiveState(schedule, swap_target, inversely_additive_weight_unconstrain(scale), step)
# for _ in 1:(N_it - 1)
# ]
ρs = AdaptiveState(schedule, swap_target, log(scale), step)
return ρs
end

function init_adaptation(
schedule::Geometric,
Δ::Vector{<:Real},
swap_target::Real,
scale::Real,
γ::Real
η::Real,
stepsize::Real
)
Nt = length(Δ)
step = PolynomialStep(γ, Nt - 1)
Ρ = [AdaptiveState(swap_target, scale, step) for _ in 1:(Nt - 1)]
return Ρ
N_it = length(Δ)
step = PolynomialStep(η, stepsize)
# TODO: One common state or one per temperature?
# ρs = [
# AdaptiveState(schedule, swap_target, geometric_weight_unconstrain(scale), step)
# for _ in 1:(N_it - 1)
# ]
ρs = AdaptiveState(schedule, swap_target, geometric_weight_unconstrain(scale), step)
return ρs
end


function rhos_to_ladder(Ρ, Δ)
β′ = Δ[1]
for i in 1:length(Ρ)
β′ += exp(Ρ[i].scale[])
Δ[i + 1] = Δ[1] / β′
"""
adapt!!(ρ::AdaptiveState, swap_ar)
Return updated `ρ` based on swap acceptance ratio `swap_ar`.
See [`update_inverse_temperatures`](@ref) to see how we compute the resulting
inverse temperatures from the adapted state `ρ`.
"""
function adapt!!::AdaptiveState, swap_ar)
swap_diff = ρ.swap_target_ar - swap_ar
γ = get.step, ρ.n)
ρ_new = @set ρ.scale_unconstrained = ρ.scale_unconstrained + γ * swap_diff
return @set ρ_new.n += 1
end

"""
adapt!!(ρ::AdaptiveState, Δ, k, swap_ar)
adapt!!(ρ::AbstractVector{<:AdaptiveState}, Δ, k, swap_ar)
Return adapted state(s) given that we just proposed a swap of the `k`-th
and `(k + 1)`-th temperatures with acceptance ratio `swap_ar`.
"""
adapt!!::AdaptiveState, Δ, k, swap_ar) = adapt!!(ρ, swap_ar)
function adapt!!(ρs::AbstractVector{<:AdaptiveState}, Δ, k, swap_ar)
ρs[k] = adapt!!(ρs[k], swap_ar)
return ρs
end

"""
update_inverse_temperatures(ρ::AdaptiveState{<:Geometric}, Δ_current)
update_inverse_temperatures(ρ::AbstractVector{<:AdaptiveState{<:Geometric}}, Δ_current)
Return updated inverse temperatures computed from adaptation state(s) and `Δ_current`.
This update is similar to Eq. (13) in [^MIAS12], with the only possible deviation
being how we compute the scaling factor from `ρ`: see [`weight`](@ref) for information.
If `ρ` is a `AbstractVector`, then it should be of length `length(Δ_current) - 1`,
with `ρ[k]` corresponding to the adaptation state for the `k`-th inverse temperature.
# References
[^MIAS12]: Miasojedow, B., Moulines, E., & Vihola, M., Adaptive Parallel Tempering Algorithm, (2012).
"""
function update_inverse_temperatures::AdaptiveState{<:Geometric}, Δ_current)
Δ = similar(Δ_current)
β₀ = Δ_current[1]
Δ[1] = β₀

β = inv(β₀)
forin 1:length(Δ) - 1
# TODO: Is it worth it to do this on log-scale instead?
β *= weight(ρ)
@inbounds Δ[ℓ + 1] = β
end
return Δ
end

function update_inverse_temperatures(ρs::AbstractVector{<:AdaptiveState{<:Geometric}}, Δ_current)
Δ = similar(Δ_current)
N = length(Δ)
@assert length(ρs) N - 1 "number of adaptive states < number of temperatures"

β₀ = Δ_current[1]
Δ[1] = β₀

function adapt_rho::AdaptiveState, swap_ar, n)
swap_diff = swap_ar - ρ.swap_target_ar
γ = get.step, n)
return γ * swap_diff
β = β₀
forin 1:N - 1
# TODO: Is it worth it to do this on log-scale instead?
β *= weight(ρs[ℓ])
@inbounds Δ[ℓ + 1] = β
end
return Δ
end

"""
update_inverse_temperatures(ρ::AdaptiveState{<:InverselyAdditive}, Δ_current)
update_inverse_temperatures(ρ::AbstractVector{<:AdaptiveState{<:InverselyAdditive}}, Δ_current)
Return updated inverse temperatures computed from adaptation state(s) and `Δ_current`.
This update increments the temperature (not _inverse_ temperature) by a positive constant,
which is adapted by `ρ`.
If `ρ` is a `AbstractVector`, then it should be of length `length(Δ_current) - 1`,
with `ρ[k]` corresponding to the adaptation state for the `k`-th inverse temperature.
"""
function update_inverse_temperatures::AdaptiveState{<:InverselyAdditive}, Δ_current)
Δ = similar(Δ_current)
β₀ = Δ_current[1]
Δ[1] = β₀

function adapt_ladder(Ρ, Δ, k, swap_ar, n)
Ρ[k].scale[] += adapt_rho(Ρ[k], swap_ar, n)
return Ρ, rhos_to_ladder(Ρ, Δ)
end
T = inv(β₀)
forin 1:length(Δ) - 1
T += weight(ρ)
@inbounds Δ[ℓ + 1] = inv(T)
end
return Δ
end

function update_inverse_temperatures(ρs::AbstractVector{<:AdaptiveState{<:InverselyAdditive}}, Δ_current)
Δ = similar(Δ_current)
N = length(Δ)
@assert length(ρs) N - 1 "number of adaptive states < number of temperatures"

β₀ = Δ_current[1]
Δ[1] = β₀

T = inv(β₀)
forin 1:N - 1
T += weight(ρs[ℓ])
@inbounds Δ[ℓ + 1] = inv(T)
end
return Δ
end
Loading

0 comments on commit 1fdea5f

Please sign in to comment.