TuringLang · yebai · Dec 4, 2022 · Sep 11, 2021 · Oct 4, 2021 · Oct 4, 2021
diff --git a/Project.toml b/Project.toml
@@ -5,16 +5,23 @@ version = "0.1.1"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 
 [compat]
 AbstractMCMC = "3.2"
 Distributions = "0.24, 0.25"
 julia = "1"
 
 [extras]
+AdvancedMH = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170"
+Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Test", "AdvancedMH", "MCMCChains", "Bijectors", "StatsPlots", "LinearAlgebra"]
diff --git a/src/MCMCTempering.jl b/src/MCMCTempering.jl
@@ -4,12 +4,15 @@ import AbstractMCMC
 import Distributions
 import Random
 
+using ConcreteStructs: @concrete
+using Setfield: @set, @set!
+
 include("adaptation.jl")
+include("swapping.jl")
 include("tempered.jl")
 include("ladders.jl")
 include("stepping.jl")
 include("model.jl")
-include("swapping.jl")
 include("plotting.jl")
 
 export tempered, TemperedSampler, plot_swaps, plot_ladders, make_tempered_model, get_tempered_loglikelihoods_and_params, make_tempered_loglikelihood, get_params

diff --git a/src/stepping.jl b/src/stepping.jl
@@ -41,19 +41,16 @@ Chains:        chain_index:     Δ_index:
 | | | |        2  3  1  4       3  1  2  4
 | | | |  
 """
-mutable struct TemperedState
-    states          :: Array{Any}
-    Δ               :: Vector{<:Real}
-    Δ_index         :: Vector{<:Integer}
-    chain_index     :: Vector{<:Integer}
-    step_counter    :: Integer
-    total_steps     :: Integer
-    Δ_history       :: Array{<:Real, 2}
-    Δ_index_history :: Array{<:Integer, 2}
-    Ρ               :: Vector{AdaptiveState}
+@concrete struct TemperedState
+    states
+    Δ
+    Δ_index
+    chain_index
+    step_counter
+    total_steps
+    Ρ
 end
 
-
 """
 For each `β` in `Δ`, carry out a step with a tempered model at the corresponding `β` inverse temperature,
 resulting in a list of transitions and states, the transition associated with `β₀ = 1` is then returned with the
@@ -63,21 +60,23 @@ function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model,
     spl::TemperedSampler;
+    init_params=nothing,
     kwargs...
 )
     states = [
         AbstractMCMC.step(
             rng,
             make_tempered_model(model, spl.Δ[spl.Δ_init[i]]),
             spl.internal_sampler;
+            init_params=init_params !== nothing ? init_params[i] : nothing,
             kwargs...
         )
         for i in 1:length(spl.Δ)
     ]
     return (
-        states[sortperm(spl.Δ_init)[1]][1],
+        first(states[argmax(spl.Δ_init)]),
         TemperedState(
-            states,spl.Δ, spl.Δ_init, sortperm(spl.Δ_init), 1, 1, Array{Real, 2}(spl.Δ'), Array{Integer, 2}(spl.Δ_init'), spl.Ρ
+            states, spl.Δ, spl.Δ_init, sortperm(spl.Δ_init), 1, 1, spl.Ρ
         )
     )
 end
@@ -90,30 +89,29 @@ function AbstractMCMC.step(
 )
     if ts.step_counter == spl.N_swap
         ts = swap_step(rng, model, spl, ts)
-        ts.step_counter = 0
+        @set! ts.step_counter = 0
     else
-        ts.states = [
+        @set! ts.states = [
             AbstractMCMC.step(
                 rng,
                 make_tempered_model(model, ts.Δ[ts.Δ_index[i]]),
                 spl.internal_sampler,
-                ts.states[i][2];
+                ts.states[ts.chain_index[i]][2];
                 kwargs...
             )
             for i in 1:length(ts.Δ)
         ]
-        ts.step_counter += 1
+        @set! ts.step_counter += 1
     end
 
-    ts.Δ_history = vcat(ts.Δ_history, Array{Real, 2}(ts.Δ'))
-    ts.Δ_index_history = vcat(ts.Δ_index_history, Array{Integer, 2}(ts.Δ_index'))
-    ts.total_steps += 1
-    return ts.states[ts.chain_index[1]][1], ts  # Use chain_index[1] to ensure the sample from the target is always returned for the step
+    @set! ts.total_steps += 1
+    # Use `chain_index[1]` to ensure the sample from the target is always returned for the step.
+    return ts.states[ts.chain_index[1]][1], ts
 end
 
 
 """
-    swap_step(rng, model, spl, ts)
+    swap_step([strategy::SwapStrategy, ]rng, model, spl, ts)
 
 Uses the internals of the passed `TemperedSampler` - `spl` - and `TemperedState` -
 `ts` - to perform a "swap step" between temperatures, in accordance with the relevant
@@ -122,35 +120,60 @@ swap strategy.
 function swap_step(
     rng::Random.AbstractRNG,
     model,
-    spl::TemperedSampler,
+    sampler::TemperedSampler,
+    ts::TemperedState
+)
+    return swap_step(swapstrategy(sampler), rng, model, sampler, ts)
+end
+
+function swap_step(
+    strategy::StandardSwap,
+    rng::Random.AbstractRNG,
+    model,
+    sampler::TemperedSampler,
     ts::TemperedState
 )
     L = length(ts.Δ) - 1
-    sampler = spl.internal_sampler
+    k = rand(rng, 1:L)
+    return swap_attempt(rng, model, sampler.internal_sampler, ts, k, sampler.adapt, ts.total_steps / L)
+end
 
-    if spl.swap_strategy == :standard
+function swap_step(
+    strategy::RandomPermutationSwap,
+    rng::Random.AbstractRNG,
+    model,
+    sampler::TemperedSampler,
+    ts::TemperedState
+)
+    L = length(ts.Δ) - 1
+    levels = Vector{Int}(undef, L)
+    Random.randperm!(rng, levels)
 
-        k = rand(rng, Distributions.Categorical(L))  # Pick randomly from 1, 2, ..., k - 1
-        ts = swap_attempt(model, sampler, ts, k, spl.adapt, ts.total_steps / L)
+    # Iterate through all levels and attempt swaps.
+    for k in levels
+        ts = swap_attempt(rng, model, sampler.internal_sampler, ts, k, sampler.adapt, ts.total_steps)
+    end
+    return ts
+end
 
+function swap_step(
+    strategy::NonReversibleSwap,
+    rng::Random.AbstractRNG,
+    model,
+    sampler::TemperedSampler,
+    ts::TemperedState
+)
+    L = length(ts.Δ) - 1
+    # Alternate between swapping odds and evens.
+    levels = if ts.total_steps % (2 * sampler.N_swap) == 0
+        1:2:L
     else
+        2:2:L
+    end
 
-        # Define a vector to populate with levels at which to propose swaps according to swap_strategy
-        levels = Vector{Int}(undef, L)
-        if spl.swap_strategy == :nonrev
-            if ts.step_counter % (2 * spl.N_swap) == 0
-                levels = 1:2:L
-            else
-                levels = 2:2:L
-            end
-        elseif spl.swap_strategy == :randperm
-            randperm!(rng, levels)
-        end
-
-        for k in levels
-            ts = swap_attempt(model, sampler, ts, k, spl.adapt, ts.total_steps)
-        end
-
+    # Iterate through all levels and attempt swaps.
+    for k in levels
+        ts = swap_attempt(rng, model, sampler.internal_sampler, ts, k, sampler.adapt, ts.total_steps)
     end
     return ts
 end
diff --git a/src/swapping.jl b/src/swapping.jl
@@ -1,3 +1,50 @@
+"""
+    AbstractSwapStrategy
+
+Represents a strategy for swapping between parallel chains.
+
+A concrete subtype is expected to implement the method [`swap_step`](@ref).
+"""
+abstract type AbstractSwapStrategy end
+
+"""
+    StandardSwap <: AbstractSwapStrategy
+
+At every swap step taken, this strategy samples a single chain index `i` and proposes
+a swap between chains `i` and `i + 1`.
+
+This approach goes under a number of names, e.g. Parallel Tempering (PT) MCMC and Replica-Exchange MCMC.[^PTPH05]
+
+The sampling of the chain index ensures reversibility/detailed balance is satisfied.
+
+# References
+[^PTPH05]: Earl, D. J., & Deem, M. W., Parallel tempering: theory, applications, and new perspectives, Physical Chemistry Chemical Physics, 7(23), 3910–3916 (2005).
+"""
+struct StandardSwap <: AbstractSwapStrategy end
+
+"""
+    RandomPermutationSwap <: AbstractSwapStrategy
+
+At every swap step taken, this strategy randomly shuffles all the chain indices
+and then iterates through them, proposing swaps for neighboring chains.
+
+The shuffling of chain indices ensures reversibility/detailed balance is satisfied.
+"""
+struct RandomPermutationSwap <: AbstractSwapStrategy end
+
+
+"""
+    NonReversibleSwap <: AbstractSwapStrategy
+
+At every swap step taken, this strategy _deterministically_ traverses first the
+odd chain indices, proposing swaps between neighbors, and then in the _next_ swap step
+taken traverses even chain indices, proposing swaps between neighbors.
+
+Note that this method is _not_ reversible, and does not satisfy detailed balance.
+As a result, this method is asymptotically biased.
+"""
+struct NonReversibleSwap <: AbstractSwapStrategy end
+
 """
     swap_betas(chain_index, k)
 
@@ -9,49 +56,24 @@ function swap_betas(chain_index, k)
     return sortperm(chain_index), chain_index
 end
 
-function make_tempered_loglikelihood end
-function get_params end
-
 
 """
-    get_tempered_loglikelihoods_and_params(model, sampler, states, k, Δ, chain_index)
+    compute_tempered_logdensities(model, sampler, transition, transition_other, β)
 
-Temper the `model`'s density using the `k`th and `k + 1`th temperatures 
-selected via `Δ` and `chain_index`. Then retrieve the parameters using the chains'
-current transitions extracted from the collection of `states`.
+Return `(logπ(transition, β), logπ(transition_other, β))` where `logπ(x, β)` denotes the
+log-density for `model` with inverse-temperature `β`.
 """
-function get_tempered_loglikelihoods_and_params(
-    model,
-    sampler::AbstractMCMC.AbstractSampler,
-    states,
-    k::Integer,
-    Δ::Vector{Real},
-    chain_index::Vector{<:Integer}
-)
-
-    logπk = make_tempered_loglikelihood(model, Δ[k])
-    logπkp1 = make_tempered_loglikelihood(model, Δ[k + 1])
-
-    θk = get_params(states[chain_index[k]][1])
-    θkp1 = get_params(states[chain_index[k + 1]][1])
-
-    return logπk, logπkp1, θk, θkp1
-end
-
+function compute_tempered_logdensities end
 
 """
-    swap_acceptance_pt(logπk, logπkp1, θk, θkp1)
+    swap_acceptance_pt(logπk, logπkp1)
 
 Calculates and returns the swap acceptance ratio for swapping the temperature
 of two chains. Using tempered likelihoods `logπk` and `logπkp1` at the chains'
-current state parameters `θk` and `θkp1`.
+current state parameters.
 """
-function swap_acceptance_pt(logπk, logπkp1, θk, θkp1)
-    return min(
-        1,
-        exp(logπkp1(θk) + logπk(θkp1)) / exp(logπk(θk) + logπkp1(θkp1))
-        # exp(abs(βk - βkp1) * abs(AdvancedMH.logdensity(model, samplek) - AdvancedMH.logdensity(model, samplekp1)))
-    )
+function swap_acceptance_pt(logπk_θk, logπk_θkp1, logπkp1_θk, logπkp1_θkp1)
+    return (logπkp1_θk + logπk_θkp1) - (logπk_θk + logπkp1_θkp1)
 end
 
 
@@ -61,21 +83,32 @@ end
 Attempt to swap the temperatures of two chains by tempering the densities and
 calculating the swap acceptance ratio; then swapping if it is accepted.
 """
-function swap_attempt(model, sampler, ts, k, adapt, n)
-
-    logπk, logπkp1, θk, θkp1 = get_tempered_loglikelihoods_and_params(model, sampler, ts.states, k, ts.Δ, ts.chain_index)
+function swap_attempt(rng, model, sampler, ts, k, adapt, n)
+    # Extract the relevant transitions.
+    transitionk = first(ts.states[ts.chain_index[k]])
+    transitionkp1 = first(ts.states[ts.chain_index[k + 1]])
+    # Evaluate logdensity for both parameters for each tempered density.
+    logπk_θk, logπk_θkp1 = compute_tempered_logdensities(
+        model, sampler, transitionk, transitionkp1, ts.Δ[k]
+    )
+    logπkp1_θkp1, logπkp1_θk = compute_tempered_logdensities(
+        model, sampler, transitionkp1, transitionk, ts.Δ[k + 1]
+    )
 
-    swap_ar = swap_acceptance_pt(logπk, logπkp1, θk, θkp1)
-    U = rand(Distributions.Uniform(0, 1))
-
-    # If the proposed temperature swap is accepted according to swap_ar and U, swap the temperatures for future steps
-    if U ≤ swap_ar
-        ts.Δ_index, ts.chain_index = swap_betas(ts.chain_index, k)
+    # If the proposed temperature swap is accepted according `logα`,
+    # swap the temperatures for future steps.
+    logα = swap_acceptance_pt(logπk_θk, logπk_θkp1, logπkp1_θk, logπkp1_θkp1)
+    if -Random.randexp(rng) ≤ logα
+        Δ_index, chain_index = swap_betas(ts.chain_index, k)
+        @set! ts.Δ_index = Δ_index
+        @set! ts.chain_index = chain_index
     end
 
     # Adaptation steps affects Ρ and Δ, as the Ρ is adapted before a new Δ is generated and returned
     if adapt
-        ts.Ρ, ts.Δ = adapt_ladder(ts.Ρ, ts.Δ, k, swap_ar, n)
+        P, Δ = adapt_ladder(ts.Ρ, ts.Δ, k, min(one(logα), exp(logα)), n)
+        @set! ts.Ρ = P
+        @set! ts.Δ = Δ
     end
     return ts
-end
+end