diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c7a374d2..3be3162c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,8 +9,6 @@ on: env: PYTHON: "Conda" # use Julia's packaged Conda build for installing packages - CMDSTAN_VERSION: "2.31.0" - CMDSTAN_PATH: "${{ GITHUB.WORKSPACE }}/.cmdstan/" jobs: test: @@ -41,23 +39,11 @@ jobs: ${{ runner.os }}-test-${{ env.cache-name }}- ${{ runner.os }}-test- ${{ runner.os }}- - - name: Cache CmdStan - id: cache-cmdstan - uses: actions/cache@v2 - with: - path: ${{ env.CMDSTAN_PATH }} - key: cmdstan-${{ env.CMDSTAN_VERSION }}-${{ runner.os }} - name: Install wget for windows if: matrix.os == 'windows-latest' uses: crazy-max/ghaction-chocolatey@v2 with: args: install wget - - name: Download and build CmdStan - if: steps.cache-cmdstan.outputs.cache-hit != 'true' - run: | - wget -P ${{ env.CMDSTAN_PATH }} https://github.com/stan-dev/cmdstan/releases/download/v${{ env.CMDSTAN_VERSION }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz - tar -xzpf ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz -C ${{ env.CMDSTAN_PATH }} - make -C ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}/ build - uses: julia-actions/julia-buildpkg@latest - name: Install ArviZ dependencies run: | @@ -68,8 +54,6 @@ jobs: ArviZ.initialize_pandas() shell: julia --color=yes --project {0} - uses: julia-actions/julia-runtest@latest - env: - JULIA_CMDSTAN_HOME: ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}/ # required by CmdStan.jl - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/futures.yml b/.github/workflows/futures.yml index 841f38a2..d8f438a9 100644 --- a/.github/workflows/futures.yml +++ b/.github/workflows/futures.yml @@ -9,8 +9,6 @@ on: env: PYTHON: "Conda" # use Julia's packaged Conda build for installing packages - CMDSTAN_VERSION: "2.25.0" - CMDSTAN_PATH: "${{ GITHUB.WORKSPACE }}/.cmdstan/" jobs: test: @@ -43,19 +41,6 @@ jobs: ${{ runner.os }}-test-${{ env.cache-name }}- ${{ runner.os }}-test- ${{ runner.os }}- - - name: Cache CmdStan - id: cache-cmdstan - uses: actions/cache@v2 - with: - path: ${{ env.CMDSTAN_PATH }} - key: cmdstan-${{ env.CMDSTAN_VERSION }}-${{ runner.os }} - - name: Download and build CmdStan - if: steps.cache-cmdstan.outputs.cache-hit != 'true' - run: | - wget -P ${{ env.CMDSTAN_PATH }} https://github.com/stan-dev/cmdstan/releases/download/v${{ env.CMDSTAN_VERSION }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz - tar -xzpf ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}.tar.gz -C ${{ env.CMDSTAN_PATH }} - make -C ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}/ build - shell: bash - uses: julia-actions/julia-buildpkg@latest if: matrix.arviz_version != 'main' || github.event_name == 'create' && startsWith(github.ref, 'refs/tags/v') - name: "Install arviz#main" @@ -79,5 +64,3 @@ jobs: ArviZ.initialize_pandas() shell: julia --color=yes --project {0} - uses: julia-actions/julia-runtest@latest - env: - JULIA_CMDSTAN_HOME: ${{ env.CMDSTAN_PATH }}/cmdstan-${{ env.CMDSTAN_VERSION }}/ # required by CmdStan.jl diff --git a/docs/make.jl b/docs/make.jl index df322f95..d20b65a7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -40,15 +40,24 @@ doctestfilters = [ r"\s+\"created_at\" => .*", # ignore timestamps in doctests ] +modules = [ + ArviZ, + ArviZExampleData, + InferenceObjects, + InferenceObjectsNetCDF, + MCMCDiagnosticTools, + PSIS, +] +if isdefined(Base, :get_extension) + # using Requires, these docstrings are automatically loaded, but as an extension we need + # to manually specify the module + push!( + modules, Base.get_extension(InferenceObjects, :InferenceObjectsMCMCDiagnosticToolsExt) + ) +end + makedocs(; - modules=[ - ArviZ, - ArviZExampleData, - InferenceObjects, - InferenceObjectsNetCDF, - MCMCDiagnosticTools, - PSIS, - ], + modules, sitename="ArviZ.jl", pages=[ "Home" => "index.md", @@ -71,7 +80,7 @@ makedocs(; sidebar_sitename=false, canonical="stable", ), - doctestfilters=doctestfilters, + doctestfilters, linkcheck=true, analytics="G-W1G68W77YV", ) diff --git a/docs/src/api/data.md b/docs/src/api/data.md index b4e7cfee..7e75d124 100644 --- a/docs/src/api/data.md +++ b/docs/src/api/data.md @@ -7,7 +7,6 @@ Pages = ["data.md"] ## Inference library converters ```@docs -from_cmdstan from_mcmcchains from_samplechains ``` @@ -15,18 +14,10 @@ from_samplechains ## IO / Conversion ```@docs -from_json from_netcdf to_netcdf ``` -## General functions - -```@docs -concat -extract -``` - ## Example data ```@docs diff --git a/docs/src/api/diagnostics.md b/docs/src/api/diagnostics.md index 14b2bd2d..a7dbec71 100644 --- a/docs/src/api/diagnostics.md +++ b/docs/src/api/diagnostics.md @@ -4,15 +4,18 @@ Pages = ["diagnostics.md"] ``` -## Reference +## [Bayesian fraction of missing information](@id bfmi) ```@docs MCMCDiagnosticTools.bfmi +``` + +## [Effective sample size and $\widehat{R}$ diagnostic](@id ess_rhat) + +```@docs MCMCDiagnosticTools.ess -MCMCDiagnosticTools.ess_rhat -MCMCDiagnosticTools.mcse MCMCDiagnosticTools.rhat -MCMCDiagnosticTools.rstar +MCMCDiagnosticTools.ess_rhat ``` The following autocovariance methods are supported: @@ -22,3 +25,15 @@ MCMCDiagnosticTools.AutocovMethod MCMCDiagnosticTools.FFTAutocovMethod MCMCDiagnosticTools.BDAAutocovMethod ``` + +## [Monte Carlo standard error](@id mcse) + +```@docs +MCMCDiagnosticTools.mcse +``` + +## [$R^*$ diagnostic](@id rstar) + +```@docs +MCMCDiagnosticTools.rstar +``` diff --git a/docs/src/working_with_inference_data.md b/docs/src/working_with_inference_data.md index 9b08143a..054b431c 100644 --- a/docs/src/working_with_inference_data.md +++ b/docs/src/working_with_inference_data.md @@ -221,7 +221,7 @@ school_bis_idx = ["Deerfield", "Choate", "Lawrenceville"] theta_school_diff[school=At(school_idx), school_bis=At(school_bis_idx)] ``` -## Add new chains using `concat` +## Add new chains using `cat` Suppose after checking the `mcse` and realizing you need more samples, you rerun the model with two chains and obtain an `idata_rerun` object. @@ -229,8 +229,8 @@ Suppose after checking the `mcse` and realizing you need more samples, you rerun idata_rerun = InferenceData(; posterior=set(post[chain=At([0, 1])]; chain=[4, 5])) ``` -You can combine the two using [`concat`](@ref). +You can combine the two using [`cat`](@ref). ```@example wwid -concat(idata[[:posterior]], idata_rerun; dim=:chain) +cat(idata[[:posterior]], idata_rerun; dims=:chain) ``` diff --git a/src/ArviZ.jl b/src/ArviZ.jl index 81eccefd..eeb03c60 100644 --- a/src/ArviZ.jl +++ b/src/ArviZ.jl @@ -93,9 +93,6 @@ export summarystats, compare, hdi, loo, loo_pit, r2_score, waic export MCMCDiagnosticTools, AutocovMethod, FFTAutocovMethod, BDAAutocovMethod export bfmi, ess, ess_rhat, mcse, rhat, rstar -## Stats utils -export autocov, autocorr, make_ufunc, wrap_xarray_ufunc - ## InferenceObjects export InferenceObjects, Dataset, @@ -113,7 +110,7 @@ export InferenceObjectsNetCDF, from_netcdf, to_netcdf export ArviZExampleData, describe_example_data, load_example_data ## Data -export extract, from_json, from_cmdstan, from_mcmcchains, from_samplechains, concat +export from_mcmcchains, from_samplechains ## rcParams export rcParams, with_rc_context @@ -149,9 +146,7 @@ end include("utils.jl") include("rcparams.jl") include("xarray.jl") -include("data.jl") include("plots.jl") include("stats.jl") -include("stats_utils.jl") end # module diff --git a/src/data.jl b/src/data.jl deleted file mode 100644 index e52c78fb..00000000 --- a/src/data.jl +++ /dev/null @@ -1,23 +0,0 @@ -@forwardfun extract -convert_result(::typeof(extract), result, args...) = convert(Dataset, result) - -function convert_to_inference_data(filename::AbstractString; kwargs...) - return from_netcdf(filename) -end - -@forwardfun from_json -@forwardfun from_beanmachine -@forwardfun from_cmdstan -@forwardfun from_cmdstanpy -@forwardfun from_emcee -@forwardfun from_pyro -@forwardfun from_numpyro -@forwardfun from_pystan - -@doc forwarddoc(:concat) concat - -function concat(data::InferenceData...; kwargs...) - return arviz.concat(data...; inplace=false, kwargs...) -end - -Docs.getdoc(::typeof(concat)) = forwardgetdoc(:concat) diff --git a/src/mcmcchains.jl b/src/mcmcchains.jl index 828b7f38..231fd098 100644 --- a/src/mcmcchains.jl +++ b/src/mcmcchains.jl @@ -269,12 +269,3 @@ function from_mcmcchains( return all_idata end - -""" - from_cmdstan(posterior::Chains; kwargs...) -> InferenceData - -Call [`from_mcmcchains`](@ref) on output of `CmdStan`. -""" -function from_cmdstan(posterior::Chains; kwargs...) - return from_mcmcchains(posterior; library="CmdStan", kwargs...) -end diff --git a/src/stats_utils.jl b/src/stats_utils.jl deleted file mode 100644 index 7c665df0..00000000 --- a/src/stats_utils.jl +++ /dev/null @@ -1,4 +0,0 @@ -@forwardfun autocov -@forwardfun autocorr -@forwardfun make_ufunc -@forwardfun wrap_xarray_ufunc diff --git a/test/Project.toml b/test/Project.toml index cfe7d683..c7478f50 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -9,7 +9,6 @@ PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SampleChains = "754583d1-7fc4-4dab-93b5-5eaca5c9622e" SampleChainsDynamicHMC = "6d9fd711-e8b2-4778-9c70-c1dfb499d4c4" -StanSample = "c1514b29-d3a0-5178-b312-660c88baa699" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -23,4 +22,3 @@ PyCall = "1.91.2" PyPlot = "2.8.2" SampleChains = "0.5" SampleChainsDynamicHMC = "0.3" -StanSample = "6, 7" diff --git a/test/runtests.jl b/test/runtests.jl index 49eac165..d3378cdd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,6 @@ using Test include("helpers.jl") include("test_rcparams.jl") include("test_utils.jl") - include("test_data.jl") include("test_stats.jl") include("test_plots.jl") include("test_samplechains.jl") diff --git a/test/test_data.jl b/test/test_data.jl deleted file mode 100644 index b4506149..00000000 --- a/test/test_data.jl +++ /dev/null @@ -1,32 +0,0 @@ -using ArviZ, DimensionalData, Test - -@testset "extract" begin - idata = random_data() - post = extract(idata, :posterior; combined=false) - for k in keys(idata.posterior) - @test haskey(post, k) - @test post[k] ≈ idata.posterior[k] - dims = DimensionalData.dims(post) - dims_exp = DimensionalData.dims(idata.posterior) - @test DimensionalData.name(dims) === DimensionalData.name(dims_exp) - @test DimensionalData.index(dims) == DimensionalData.index(dims_exp) - end - prior = extract(idata, :prior; combined=false) - for k in keys(idata.prior) - @test haskey(prior, k) - @test prior[k] ≈ idata.prior[k] - dims = DimensionalData.dims(prior) - dims_exp = DimensionalData.dims(idata.prior) - @test DimensionalData.name(dims) === DimensionalData.name(dims_exp) - @test DimensionalData.index(dims) == DimensionalData.index(dims_exp) - end -end - -@testset "concat" begin - data = random_data() - idata1 = InferenceData(; posterior=data.posterior) - idata2 = InferenceData(; prior=data.prior) - new_idata1 = concat(idata1, idata2) - new_idata2 = InferenceData(; posterior=data.posterior, prior=data.prior) - test_idata_approx_equal(new_idata1, new_idata2) -end diff --git a/test/test_mcmcchains.jl b/test/test_mcmcchains.jl index 030469b3..17579992 100644 --- a/test/test_mcmcchains.jl +++ b/test/test_mcmcchains.jl @@ -2,39 +2,7 @@ using Test using ArviZ using DimensionalData using MCMCChains: MCMCChains -using OrderedCollections, StanSample - -const noncentered_schools_stan_model = """ - data { - int J; - real y[J]; - real sigma[J]; - } - parameters { - real mu; - real tau; - real eta[J]; - } - transformed parameters { - real theta[J]; - for (j in 1:J) - theta[j] = mu + tau * eta[j]; - } - model { - mu ~ normal(0, 5); - tau ~ cauchy(0, 5); - eta ~ normal(0, 1); - y ~ normal(theta, sigma); - } - generated quantities { - vector[J] log_lik; - vector[J] y_hat; - for (j in 1:J) { - log_lik[j] = normal_lpdf(y[j] | theta[j], sigma[j]); - y_hat[j] = normal_rng(theta[j], sigma[j]); - } - } -""" +using OrderedCollections function makechains( names, ndraws, nchains, domains=[Float64 for _ in names]; seed=42, internal_names=[] @@ -52,17 +20,6 @@ function makechains(nvars::Int, args...; kwargs...) return makechains(names, args...; kwargs...) end -function stan_noncentered_schools(data, draws, chains; tmpdir=mktempdir()) - model_name = "school8" - stan_model = SampleModel(model_name, noncentered_schools_stan_model, tmpdir) - _ = stan_sample( - stan_model; data=data, num_chains=chains, num_samples=draws, summary=false - ) - chns = read_samples(stan_model, :mcmcchains; include_internals=true) - outfiles = ["$(stan_model.output_base)_chain_$(i).csv" for i in 1:chains] - return (model=stan_model, files=outfiles, chains=chns) -end - function test_chains_data(chns, idata, group, names=names(chns); coords=(;), dims=(;)) ndraws, nvars, nchains = size(chns) @test idata isa InferenceData @@ -336,48 +293,3 @@ end @test ArviZ.summary(chn) !== nothing end - -Sys.iswindows() || VERSION < v"1.8" || @testset "from_cmdstan" begin - data = noncentered_schools_data() - mktempdir() do path - output = stan_noncentered_schools(data, 500, 4; tmpdir=path) - posterior_predictive = prior_predictive = [:y_hat] - log_likelihood = :log_lik - coords = (school=1:8,) - dims = ( - theta=[:school], y=[:school], log_lik=[:school], y_hat=[:school], eta=[:school] - ) - idata1 = from_cmdstan( - output.chains; - posterior_predictive, - log_likelihood, - prior=output.chains, - prior_predictive, - coords, - dims, - ) - idata2 = from_cmdstan( - output.files; - posterior_predictive, - log_likelihood, - prior=output.files, - prior_predictive, - coords=Dict(pairs(coords)), - dims=Dict(pairs(dims)), - ) - @testset "idata.$(group)" for group in ArviZ.groupnames(idata2) - ds1 = idata1[group] - ds2 = idata2[group] - - for var_name in keys(ds1) - da1 = ds1[var_name] - da2 = ds2[var_name] - if ndims(da1) == 3 - @test da1 ≈ permutedims(da2, (:draw, :chain, :school)) - else - @test da1 ≈ permutedims(da2, (:draw, :chain)) - end - end - end - end -end