changed all sampling statements NUTS() (#31)

* changed all sampling statements NUTS() * Update turing/02-linear_regression-kidiq.jl
storopoli · Sep 9, 2022 · 44e5bec · 44e5bec
1 parent d873a48
commit 44e5bec
Show file tree

Hide file tree

Showing 20 changed files with 289 additions and 315 deletions.
diff --git a/slides/11-MCMC.tex b/slides/11-MCMC.tex
@@ -1039,7 +1039,7 @@ \subsubsection{What To Do If the Markov Chains Do Not Converge?}
 	%    y ~ Normal(0, 3)
 	%    x ~ Normal(0, exp(y/2))
 	% end
-	%chn = sample(funnel_cp(), NUTS(), MCMCThreads(), 2_000, 4)
+	%chn = sample(funnel_cp(), NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 	\begin{lstlisting}[basicstyle=\footnotesize]
 Summary Statistics
       parameters     mean      std  naive_se     mcse      ess     rhat  ess_per_sec

diff --git a/slides/slides.pdf b/slides/slides.pdf
diff --git a/turing/02-linear_regression-kidiq.jl b/turing/02-linear_regression-kidiq.jl
@@ -29,24 +29,23 @@ y = standardize(ZScoreTransform, y; dims=1)
 
     # likelihood
     y ~ MvNormal(α .+ X * β, σ^2 * I)
-    return(; y, α, β, σ)
+    return (; y, α, β, σ)
 end
 
 # instantiate the model
 model = linear_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#  parameters      mean       std   naive_se      mcse          ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64      Float64   Float64       Float64
-#
-#           α    0.0003    0.0431     0.0005    0.0004   13310.0844    0.9997      392.4195
-#        β[1]    0.1138    0.0463     0.0005    0.0005    9252.6086    0.9997      272.7935
-#        β[2]    0.4138    0.0447     0.0005    0.0005    9399.6572    0.9999      277.1289
-#        β[3]    0.0298    0.0430     0.0005    0.0004   11093.6757    1.0001      327.0734
-#           σ    0.8913    0.0304     0.0003    0.0003   12746.3192    0.9997      375.7981
+#   parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    0.0012    0.0426     0.0007    0.0006   4569.6175    0.9996      134.4955
+#         β[1]    0.1154    0.0446     0.0007    0.0005   4613.5210    0.9993      135.7876
+#         β[2]    0.4129    0.0452     0.0007    0.0005   4378.9035    0.9995      128.8823
+#         β[3]    0.0302    0.0447     0.0007    0.0006   5270.6517    0.9995      155.1287
+#            σ    0.8914    0.0316     0.0005    0.0004   4606.7431    1.0005      135.5882
 
 # QR Decomposition
 Q, R = qr(X)
@@ -57,23 +56,22 @@ R_ast = R / sqrt(size(X, 1) - 1)
 # instantiate the model
 model_qr = linear_regression(Q_ast, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn_qr = sample(model_qr, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn_qr = sample(model_qr, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # reconstruct β back from the Q_ast scale into X scale
-betas = mapslices(x -> R_ast^-1 * x, chn_qr[:, namesingroup(chn_qr, :β),:].value.data, dims=[2])
-chain_beta = setrange(Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:3_000)
+betas = mapslices(x -> R_ast^-1 * x, chn_qr[:, namesingroup(chn_qr, :β), :].value.data, dims=[2])
+chain_beta = setrange(Chains(betas, ["real_β[$i]" for i in 1:size(Q_ast, 2)]), 1_001:1:2_000)
 chn_qr_reconstructed = hcat(chain_beta, chn_qr)
 
 # results:
-#  parameters      mean       std   naive_se      mcse          ess      rhat
-#      Symbol   Float64   Float64    Float64   Float64      Float64   Float64
-#
-#   real_β[1]    0.1136    0.0455     0.0005    0.0004   13883.9215    0.9999
-#   real_β[2]    0.4134    0.0446     0.0005    0.0004   12017.9704    1.0000
-#   real_β[3]    0.0304    0.0439     0.0005    0.0003   14301.8467    0.9996
-#           α    0.0001    0.0432     0.0005    0.0004   12634.6117    0.9997
-#        β[1]   -0.2369    0.0425     0.0005    0.0004   13464.0249    0.9999
-#        β[2]    0.3975    0.0428     0.0005    0.0004   12020.5203    1.0000
-#        β[3]   -0.0297    0.0428     0.0005    0.0003   14301.8467    0.9996
-#           σ    0.8905    0.0304     0.0003    0.0002   14061.4641    0.9998
+#   parameters      mean       std   naive_se      mcse         ess      rhat 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64 
+#    real_β[1]    0.1134    0.0448     0.0007    0.0006   4200.1480    0.9995
+#    real_β[2]    0.4145    0.0443     0.0007    0.0006   4418.2596    0.9995
+#    real_β[3]    0.0304    0.0445     0.0007    0.0006   4541.7050    0.9996
+#            α    0.0000    0.0434     0.0007    0.0006   4875.4944    1.0004
+#         β[1]   -0.2371    0.0423     0.0007    0.0006   4203.4585    0.9994
+#         β[2]    0.3985    0.0424     0.0007    0.0006   4408.2610    0.9995
+#         β[3]   -0.0297    0.0434     0.0007    0.0006   4541.7050    0.9996
+#            σ    0.8915    0.0309     0.0005    0.0005   4251.5506    0.9993
diff --git a/turing/03-logistic_regression-wells.jl b/turing/03-logistic_regression-wells.jl
@@ -21,29 +21,28 @@ X = standardize(ZScoreTransform, X; dims=1)
 y = wells[:, :switch]
 
 # define the model
-@model function logistic_regression(X,  y; predictors=size(X, 2))
+@model function logistic_regression(X, y; predictors=size(X, 2))
     # priors
     α ~ TDist(3) * 2.5
     β ~ filldist(TDist(3) * 2.5, predictors)
 
     # likelihood
     y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β)))
     # you could also do BinomialLogit(n, logitp) if you can group the successes
-    return(; y, α, β)
+    return (; y, α, β)
 end
 
 # instantiate the model
 model = logistic_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#  parameters      mean       std   naive_se      mcse          ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64      Float64   Float64       Float64
-#
-#           α    0.3370    0.0386     0.0004    0.0003   14018.6701    0.9999      218.7341
-#        β[1]    0.5176    0.0468     0.0005    0.0004   12269.1734    0.9998      191.4366
-#        β[2]   -0.3453    0.0407     0.0005    0.0003   13928.3031    0.9997      217.3241
-#        β[3]   -0.0615    0.0377     0.0004    0.0003   14014.8302    0.9999      218.6742
-#        β[4]    0.1703    0.0383     0.0004    0.0003   13054.2746    0.9999      203.6866
+#   parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    0.3363    0.0382     0.0006    0.0006   4832.5931    1.0005      225.3903
+#         β[1]    0.5178    0.0460     0.0007    0.0007   3946.2992    0.9999      184.0539
+#         β[2]   -0.3458    0.0406     0.0006    0.0006   4342.0380    0.9993      202.5110
+#         β[3]   -0.0607    0.0381     0.0006    0.0005   5132.8763    0.9993      239.3954
+#         β[4]    0.1693    0.0384     0.0006    0.0005   4088.2092    0.9992      190.6725
diff --git a/turing/04-ordinal_regression-esoph.jl b/turing/04-ordinal_regression-esoph.jl
@@ -24,7 +24,7 @@ transform!(
         x -> categorical(x; levels=["0-39g/day", "40-79", "80-119", "120+"], ordered=true),
     :tobgp =>
         x -> categorical(x; levels=["0-9g/day", "10-19", "20-29", "30+"], ordered=true);
-    renamecols=false,
+    renamecols=false
 )
 transform!(esoph, [:agegp, :alcgp, :tobgp] .=> ByRow(levelcode); renamecols=false)
 
@@ -47,15 +47,14 @@ end
 # instantiate the model
 model = ordered_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#    parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
-#        Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
-#
-#  cutpoints[1]   -1.4140    0.6359     0.0071    0.0092   4165.4455    1.0002      440.5548
-#  cutpoints[2]   -0.2446    0.6182     0.0069    0.0089   4330.7124    1.0001      458.0341
-#  cutpoints[3]    0.8074    0.6286     0.0070    0.0087   4580.5094    1.0002      484.4537
-#          β[1]   -0.0740    0.1184     0.0013    0.0017   5576.9867    1.0000      589.8452
-#          β[2]   -0.0727    0.1704     0.0019    0.0022   5732.0269    1.0005      606.2429
+#     parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#         Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#   cutpoints[1]   -1.4154    0.6353     0.0100    0.0135   1799.8496    1.0021       78.8820
+#   cutpoints[2]   -0.2437    0.6054     0.0096    0.0127   1949.7758    1.0025       85.4528
+#   cutpoints[3]    0.8066    0.6168     0.0098    0.0122   2152.8488    1.0018       94.3528
+#           β[1]   -0.0733    0.1151     0.0018    0.0022   2636.8129    1.0012      115.5635
+#           β[2]   -0.0735    0.1719     0.0027    0.0029   2440.2544    1.0007      106.9490
diff --git a/turing/05-poisson_regression-roaches.jl b/turing/05-poisson_regression-roaches.jl
@@ -21,28 +21,27 @@ X = standardize(ZScoreTransform, X; dims=1)
 y = roaches[:, :y]
 
 # define the model
-@model function poisson_regression(X,  y; predictors=size(X, 2))
+@model function poisson_regression(X, y; predictors=size(X, 2))
     # priors
     α ~ TDist(3) * 2.5
     β ~ filldist(TDist(3) * 2.5, predictors)
 
     # likelihood
     y ~ arraydist(LazyArray(@~ LogPoisson.(α .+ X * β)))
-    return(; y, α, β)
+    return (; y, α, β)
 end
 
 # instantiate the model
 model = poisson_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
-#
-#           α    2.9808    0.0145     0.0002    0.0002   7320.0230    1.0003      108.0095
-#        β[1]    0.4910    0.0067     0.0001    0.0001   7695.6080    1.0002      113.5514
-#        β[2]   -0.2523    0.0126     0.0001    0.0001   8870.7340    1.0002      130.8908
-#        β[3]   -0.1752    0.0156     0.0002    0.0002   7876.9316    0.9998      116.2269
-#        β[4]    0.0517    0.0116     0.0001    0.0001   7634.0212    1.0002      112.6427
+#   parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    2.9810    0.0146     0.0002    0.0002   3951.1294    1.0016      585.2658
+#         β[1]    0.4910    0.0066     0.0001    0.0001   4108.0121    1.0006      608.5042
+#         β[2]   -0.2521    0.0123     0.0002    0.0002   5078.9666    0.9996      752.3280
+#         β[3]   -0.1748    0.0156     0.0002    0.0002   4236.2675    0.9994      627.5022
+#         β[4]    0.0516    0.0116     0.0002    0.0002   4171.0393    0.9994      617.8402
diff --git a/turing/06-robust_linear_regression-duncan.jl b/turing/06-robust_linear_regression-duncan.jl
@@ -30,21 +30,20 @@ y = standardize(ZScoreTransform, y; dims=1)
 
     # likelihood
     y ~ arraydist(TDist.(ν) .* σ .+ α .+ (X * β))
-    return(; y, α, β, σ, ν)
+    return (; y, α, β, σ, ν)
 end
 
 # instantiate the model
 model = student_t_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
-#
-#           α    0.0028    0.0607     0.0007    0.0007   6302.5129    0.9999      106.9764
-#        β[1]    0.5397    0.1033     0.0012    0.0016   4150.2182    1.0008       70.4442
-#        β[2]    0.4606    0.0997     0.0011    0.0015   4274.3720    1.0012       72.5515
-#           σ    0.3554    0.0684     0.0008    0.0011   3640.0110    1.0001       61.7841
-#           ν   10.4426   13.3598     0.1494    0.1887   3917.3390    1.0001       66.4914
+#  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    0.0016    0.0630     0.0010    0.0013   2557.3011    1.0009      141.8831
+#         β[1]    0.5338    0.1049     0.0017    0.0024   1966.9407    1.0025      109.1290
+#         β[2]    0.4667    0.1017     0.0016    0.0023   2145.9728    1.0015      119.0620
+#            σ    0.3596    0.0677     0.0011    0.0015   1819.3829    1.0021      100.9422
+#            ν   10.6302   12.1233     0.1917    0.3741   1495.5097    1.0056       82.9732
diff --git a/turing/07-robust_beta_binomial_regression-wells.jl b/turing/07-robust_beta_binomial_regression-wells.jl
@@ -24,10 +24,10 @@ X = standardize(ZScoreTransform, X; dims=1)
 y = wells[:, :switch]
 
 # define alternate parameterizations
-BetaBinomial2(n, μ, ϕ) = BetaBinomial(n, μ*ϕ, (1-μ)*ϕ)
+BetaBinomial2(n, μ, ϕ) = BetaBinomial(n, μ * ϕ, (1 - μ) * ϕ)
 
 # define the model
-@model function beta_binomial_regression(X,  y; predictors=size(X, 2))
+@model function beta_binomial_regression(X, y; predictors=size(X, 2))
     # priors
     α ~ TDist(3) * 2.5
     β ~ filldist(TDist(3) * 2.5, predictors)
@@ -37,23 +37,21 @@ BetaBinomial2(n, μ, ϕ) = BetaBinomial(n, μ*ϕ, (1-μ)*ϕ)
     p̂ = logistic.(α .+ X * β)
     y ~ arraydist(LazyArray(@~ BetaBinomial2.(1, p̂, ϕ)))
     # you could also do BetaBinomial2.(n, p̂, θ) if you can group the successes
-    return(; y, α, β, p̂, ϕ)
+    return (; y, α, β, p̂, ϕ)
 end
 
 # instantiate the model
 model = beta_binomial_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
-#
-#           α    0.3366    0.0383     0.0004    0.0004   8272.7360    1.0005       73.3035
-#        β[1]    0.5187    0.0461     0.0005    0.0006   7365.2488    1.0003       65.2624
-#        β[2]   -0.3456    0.0400     0.0004    0.0004   7604.7011    1.0002       67.3841
-#        β[3]   -0.0615    0.0376     0.0004    0.0004   8252.4562    1.0001       73.1238
-#        β[4]    0.1705    0.0377     0.0004    0.0005   7669.6174    1.0002       67.9593
-#           ϕ    0.9964    1.0148     0.0113    0.0105   6972.6583    1.0001       61.7837
-#
+#   parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    0.3373    0.0394     0.0006    0.0006   5610.8988    0.9992       75.4224
+#         β[1]    0.5203    0.0458     0.0007    0.0006   5412.8138    0.9994       72.7597
+#         β[2]   -0.3456    0.0398     0.0006    0.0005   5275.5706    0.9997       70.9149
+#         β[3]   -0.0614    0.0388     0.0006    0.0005   4602.2511    0.9993       61.8640
+#         β[4]    0.1709    0.0386     0.0006    0.0006   5515.4004    1.0010       74.1387
+#            ϕ    0.9983    1.0268     0.0162    0.0143   4096.5605    0.9998       55.0665
diff --git a/turing/08-robust_robit_regression-wells.jl b/turing/08-robust_robit_regression-wells.jl
@@ -23,19 +23,19 @@ y = wells[:, :switch]
 # define the model
 # setting ν to 4 since we have a lot of parameters
 # and Turing v0.21.9 samples slower than Stan
-@model function robit_regression(X,  y; predictors=size(X, 2), N=size(X, 1), ν=4)
+@model function robit_regression(X, y; predictors=size(X, 2), N=size(X, 1), ν=4)
     # priors
     α ~ TDist(3) * 2.5
     β ~ filldist(TDist(3) * 2.5, predictors)
 
     # likelihood
     ϵ ~ filldist(TDist(ν), N)
     y ~ arraydist(LazyArray(@~ BernoulliLogit.(α .+ X * β .+ ϵ)))
-    return(; y, α, β)
+    return (; y, α, β)
 end
 
 # instantiate the model
 model = robit_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
diff --git a/turing/09-robust_negative_binomial_regression-roaches.jl b/turing/09-robust_negative_binomial_regression-roaches.jl
@@ -29,7 +29,7 @@ function NegativeBinomial2(μ, ϕ)
 end
 
 # define the model
-@model function negative_binomial_regression(X,  y; predictors=size(X, 2))
+@model function negative_binomial_regression(X, y; predictors=size(X, 2))
     # priors
     α ~ TDist(3) * 2.5
     β ~ filldist(TDist(3) * 2.5, predictors)
@@ -38,22 +38,21 @@ end
 
     # likelihood
     y ~ arraydist(LazyArray(@~ NegativeBinomial2.(exp.(α .+ X * β), ϕ)))
-    return(; y, α, β, ϕ)
+    return (; y, α, β, ϕ)
 end
 
 # instantiate the model
 model = negative_binomial_regression(X, y)
 
-# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters
-chn = sample(model, NUTS(), MCMCThreads(), 2_000, 4)
+# sample with NUTS, 4 multi-threaded parallel chains, and 2k iters with 1k warmup
+chn = sample(model, NUTS(1_000, 0.8), MCMCThreads(), 1_000, 4)
 
 # results:
-#  parameters      mean       std   naive_se      mcse          ess      rhat   ess_per_sec
-#      Symbol   Float64   Float64    Float64   Float64      Float64   Float64       Float64
-#
-#           α    2.8260    0.0759     0.0008    0.0008    9864.8871    1.0000      261.3561
-#        β[1]    0.9536    0.1148     0.0013    0.0012    8856.1048    1.0006      234.6299
-#        β[2]   -0.3712    0.0773     0.0009    0.0007    9403.3178    0.9999      249.1275
-#        β[3]   -0.1538    0.0760     0.0008    0.0007   11360.4621    0.9999      300.9793
-#        β[4]    0.1459    0.1168     0.0013    0.0011    9233.2377    1.0000      244.6215
-#          ϕ⁻    1.4091    0.0787     0.0009    0.0009    9940.3546    1.0001      263.3555
+#   parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec 
+#       Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64
+#            α    2.8263    0.0776     0.0012    0.0011   5151.7518    1.0000      263.2205
+#         β[1]    0.9501    0.1142     0.0018    0.0015   5775.2010    0.9994      295.0746
+#         β[2]   -0.3690    0.0754     0.0012    0.0011   5572.7346    1.0007      284.7300
+#         β[3]   -0.1568    0.0764     0.0012    0.0009   5676.4081    0.9994      290.0270
+#         β[4]    0.1454    0.1212     0.0019    0.0020   4262.6961    0.9996      217.7956
+#           ϕ⁻    1.4097    0.0791     0.0013    0.0011   5411.2014    0.9999      276.4767