13 - Modelos com Memória¶
Imports, loadings and functions¶
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import pandas as pd
import networkx as nx
# from causalgraphicalmodels import CausalGraphicalModel
import arviz as az
# ArviZ ships with style sheets!
# https://python.arviz.org/en/stable/examples/styles.html#example-styles
az.style.use("arviz-darkgrid")
import xarray as xr
import stan
import nest_asyncio
plt.style.use('default')
plt.rcParams['axes.facecolor'] = 'lightgray'
# To DAG's
import daft
# from causalgraphicalmodels import CausalGraphicalModel # Just work in < python3.9
# Add fonts to matplotlib to run xkcd
from matplotlib import font_manager
font_dirs = ["fonts/"] # The path to the custom font file.
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
for font_file in font_files:
font_manager.fontManager.addfont(font_file)
# To make plots like drawing
# plt.xkcd()
# To running the stan in jupyter notebook
nest_asyncio.apply()
# Utils functions
def logit(p):
return np.log(p) - np.log(1 - p)
def inv_logit(p):
return np.exp(p) / (1 + np.exp(p))
13.1 Example: Multilevel tadpoles¶
R Code 13.1¶
df = pd.read_csv('./data/reedfrogs.csv', sep=";")
df['tank'] = df.index.to_list()
df['tank'] += 1 # index start from 1 like Stan works
df
density | pred | size | surv | propsurv | tank | |
---|---|---|---|---|---|---|
0 | 10 | no | big | 9 | 0.900000 | 1 |
1 | 10 | no | big | 10 | 1.000000 | 2 |
2 | 10 | no | big | 7 | 0.700000 | 3 |
3 | 10 | no | big | 10 | 1.000000 | 4 |
4 | 10 | no | small | 9 | 0.900000 | 5 |
5 | 10 | no | small | 9 | 0.900000 | 6 |
6 | 10 | no | small | 10 | 1.000000 | 7 |
7 | 10 | no | small | 9 | 0.900000 | 8 |
8 | 10 | pred | big | 4 | 0.400000 | 9 |
9 | 10 | pred | big | 9 | 0.900000 | 10 |
10 | 10 | pred | big | 7 | 0.700000 | 11 |
11 | 10 | pred | big | 6 | 0.600000 | 12 |
12 | 10 | pred | small | 7 | 0.700000 | 13 |
13 | 10 | pred | small | 5 | 0.500000 | 14 |
14 | 10 | pred | small | 9 | 0.900000 | 15 |
15 | 10 | pred | small | 9 | 0.900000 | 16 |
16 | 25 | no | big | 24 | 0.960000 | 17 |
17 | 25 | no | big | 23 | 0.920000 | 18 |
18 | 25 | no | big | 22 | 0.880000 | 19 |
19 | 25 | no | big | 25 | 1.000000 | 20 |
20 | 25 | no | small | 23 | 0.920000 | 21 |
21 | 25 | no | small | 23 | 0.920000 | 22 |
22 | 25 | no | small | 23 | 0.920000 | 23 |
23 | 25 | no | small | 21 | 0.840000 | 24 |
24 | 25 | pred | big | 6 | 0.240000 | 25 |
25 | 25 | pred | big | 13 | 0.520000 | 26 |
26 | 25 | pred | big | 4 | 0.160000 | 27 |
27 | 25 | pred | big | 9 | 0.360000 | 28 |
28 | 25 | pred | small | 13 | 0.520000 | 29 |
29 | 25 | pred | small | 20 | 0.800000 | 30 |
30 | 25 | pred | small | 8 | 0.320000 | 31 |
31 | 25 | pred | small | 10 | 0.400000 | 32 |
32 | 35 | no | big | 34 | 0.971429 | 33 |
33 | 35 | no | big | 33 | 0.942857 | 34 |
34 | 35 | no | big | 33 | 0.942857 | 35 |
35 | 35 | no | big | 31 | 0.885714 | 36 |
36 | 35 | no | small | 31 | 0.885714 | 37 |
37 | 35 | no | small | 35 | 1.000000 | 38 |
38 | 35 | no | small | 33 | 0.942857 | 39 |
39 | 35 | no | small | 32 | 0.914286 | 40 |
40 | 35 | pred | big | 4 | 0.114286 | 41 |
41 | 35 | pred | big | 12 | 0.342857 | 42 |
42 | 35 | pred | big | 13 | 0.371429 | 43 |
43 | 35 | pred | big | 14 | 0.400000 | 44 |
44 | 35 | pred | small | 22 | 0.628571 | 45 |
45 | 35 | pred | small | 12 | 0.342857 | 46 |
46 | 35 | pred | small | 31 | 0.885714 | 47 |
47 | 35 | pred | small | 17 | 0.485714 | 48 |
df.describe()
density | surv | propsurv | tank | |
---|---|---|---|---|
count | 48.000000 | 48.000000 | 48.000000 | 48.00 |
mean | 23.333333 | 16.312500 | 0.721607 | 24.50 |
std | 10.382746 | 9.884775 | 0.266416 | 14.00 |
min | 10.000000 | 4.000000 | 0.114286 | 1.00 |
25% | 10.000000 | 9.000000 | 0.496429 | 12.75 |
50% | 25.000000 | 12.500000 | 0.885714 | 24.50 |
75% | 35.000000 | 23.000000 | 0.920000 | 36.25 |
max | 35.000000 | 35.000000 | 1.000000 | 48.00 |
R Code 13.2¶
model = """
data {
int qty;
array[qty] int N; // Total quantities that have tadpoles in tank
array[qty] int survival; // How many tadpoles survival
array[qty] int tank; // Tank index
}
parameters {
vector[qty] alpha;
}
model {
vector[qty] p;
alpha ~ normal(0, 1.5);
for (i in 1:qty){
p[i] = alpha[ tank[i] ];
p[i] = inv_logit(alpha[i]);
}
survival ~ binomial(N, p);
}
"""
dat_list = {
'qty': len(df),
'tank': df['tank'].to_list(),
'survival': df['surv'].to_list(),
'N': df['density'].to_list()
}
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_1 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list.keys()
)
az.summary(model_13_1, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
alpha[0] | 1.729 | 0.782 | 0.473 | 2.954 | 0.011 | 0.009 | 5033.0 | 2899.0 | 1.00 |
alpha[1] | 2.397 | 0.896 | 0.960 | 3.810 | 0.012 | 0.010 | 5933.0 | 2874.0 | 1.00 |
alpha[2] | 0.761 | 0.627 | -0.247 | 1.749 | 0.008 | 0.008 | 5851.0 | 2858.0 | 1.00 |
alpha[3] | 2.397 | 0.873 | 0.989 | 3.729 | 0.010 | 0.009 | 7616.0 | 2956.0 | 1.00 |
alpha[4] | 1.721 | 0.768 | 0.518 | 2.939 | 0.010 | 0.009 | 6444.0 | 2264.0 | 1.00 |
alpha[5] | 1.697 | 0.751 | 0.465 | 2.871 | 0.009 | 0.008 | 6611.0 | 2922.0 | 1.00 |
alpha[6] | 2.417 | 0.891 | 1.038 | 3.823 | 0.012 | 0.010 | 6037.0 | 3012.0 | 1.00 |
alpha[7] | 1.724 | 0.804 | 0.382 | 2.897 | 0.010 | 0.009 | 6642.0 | 2708.0 | 1.00 |
alpha[8] | -0.369 | 0.616 | -1.348 | 0.580 | 0.009 | 0.009 | 5099.0 | 3129.0 | 1.00 |
alpha[9] | 1.729 | 0.774 | 0.468 | 2.932 | 0.011 | 0.009 | 5226.0 | 2840.0 | 1.00 |
alpha[10] | 0.760 | 0.649 | -0.294 | 1.767 | 0.008 | 0.008 | 6808.0 | 2797.0 | 1.00 |
alpha[11] | 0.374 | 0.618 | -0.615 | 1.336 | 0.008 | 0.009 | 6522.0 | 2646.0 | 1.00 |
alpha[12] | 0.750 | 0.624 | -0.232 | 1.733 | 0.008 | 0.007 | 6833.0 | 2804.0 | 1.00 |
alpha[13] | 0.004 | 0.593 | -0.923 | 0.953 | 0.008 | 0.010 | 5845.0 | 2809.0 | 1.00 |
alpha[14] | 1.721 | 0.780 | 0.457 | 2.893 | 0.011 | 0.009 | 5877.0 | 2668.0 | 1.00 |
alpha[15] | 1.731 | 0.773 | 0.549 | 2.968 | 0.011 | 0.009 | 5508.0 | 2657.0 | 1.00 |
alpha[16] | 2.547 | 0.675 | 1.465 | 3.591 | 0.009 | 0.007 | 6075.0 | 2900.0 | 1.00 |
alpha[17] | 2.141 | 0.606 | 1.210 | 3.112 | 0.009 | 0.007 | 4941.0 | 2607.0 | 1.00 |
alpha[18] | 1.815 | 0.533 | 0.942 | 2.630 | 0.007 | 0.006 | 5376.0 | 2942.0 | 1.00 |
alpha[19] | 3.100 | 0.823 | 1.861 | 4.392 | 0.011 | 0.009 | 5902.0 | 2957.0 | 1.00 |
alpha[20] | 2.149 | 0.619 | 1.147 | 3.097 | 0.010 | 0.008 | 4751.0 | 2430.0 | 1.00 |
alpha[21] | 2.124 | 0.586 | 1.215 | 3.052 | 0.008 | 0.007 | 5953.0 | 2490.0 | 1.00 |
alpha[22] | 2.140 | 0.613 | 1.192 | 3.098 | 0.008 | 0.007 | 5610.0 | 2987.0 | 1.00 |
alpha[23] | 1.551 | 0.523 | 0.695 | 2.324 | 0.007 | 0.006 | 6228.0 | 2590.0 | 1.00 |
alpha[24] | -1.089 | 0.436 | -1.818 | -0.408 | 0.005 | 0.004 | 7119.0 | 2925.0 | 1.00 |
alpha[25] | 0.076 | 0.397 | -0.556 | 0.701 | 0.005 | 0.007 | 6264.0 | 2929.0 | 1.00 |
alpha[26] | -1.548 | 0.505 | -2.323 | -0.709 | 0.007 | 0.005 | 6025.0 | 2769.0 | 1.00 |
alpha[27] | -0.554 | 0.402 | -1.177 | 0.100 | 0.005 | 0.004 | 6636.0 | 2947.0 | 1.00 |
alpha[28] | 0.081 | 0.407 | -0.593 | 0.689 | 0.005 | 0.007 | 5490.0 | 2935.0 | 1.00 |
alpha[29] | 1.310 | 0.480 | 0.532 | 2.039 | 0.006 | 0.005 | 7013.0 | 2854.0 | 1.00 |
alpha[30] | -0.732 | 0.407 | -1.422 | -0.124 | 0.006 | 0.004 | 5134.0 | 2874.0 | 1.00 |
alpha[31] | -0.398 | 0.387 | -1.026 | 0.213 | 0.005 | 0.005 | 5524.0 | 2954.0 | 1.00 |
alpha[32] | 2.831 | 0.651 | 1.827 | 3.820 | 0.009 | 0.007 | 5118.0 | 3261.0 | 1.00 |
alpha[33] | 2.475 | 0.600 | 1.542 | 3.414 | 0.009 | 0.007 | 5083.0 | 2667.0 | 1.00 |
alpha[34] | 2.448 | 0.581 | 1.467 | 3.332 | 0.007 | 0.006 | 6680.0 | 2426.0 | 1.00 |
alpha[35] | 1.905 | 0.481 | 1.161 | 2.714 | 0.006 | 0.005 | 6105.0 | 2837.0 | 1.00 |
alpha[36] | 1.906 | 0.464 | 1.139 | 2.600 | 0.006 | 0.005 | 6146.0 | 3053.0 | 1.00 |
alpha[37] | 3.352 | 0.778 | 2.097 | 4.528 | 0.011 | 0.008 | 5970.0 | 2790.0 | 1.00 |
alpha[38] | 2.445 | 0.583 | 1.524 | 3.334 | 0.008 | 0.006 | 6204.0 | 3449.0 | 1.00 |
alpha[39] | 2.164 | 0.524 | 1.383 | 3.051 | 0.007 | 0.006 | 6301.0 | 2722.0 | 1.00 |
alpha[40] | -1.904 | 0.479 | -2.635 | -1.122 | 0.006 | 0.004 | 7453.0 | 2977.0 | 1.00 |
alpha[41] | -0.637 | 0.355 | -1.205 | -0.078 | 0.004 | 0.004 | 6393.0 | 3005.0 | 1.00 |
alpha[42] | -0.511 | 0.339 | -1.006 | 0.074 | 0.004 | 0.004 | 5884.0 | 2943.0 | 1.01 |
alpha[43] | -0.395 | 0.336 | -0.930 | 0.138 | 0.004 | 0.004 | 5973.0 | 2821.0 | 1.00 |
alpha[44] | 0.507 | 0.347 | -0.033 | 1.068 | 0.004 | 0.004 | 6663.0 | 2982.0 | 1.00 |
alpha[45] | -0.631 | 0.346 | -1.185 | -0.074 | 0.005 | 0.004 | 5811.0 | 2715.0 | 1.00 |
alpha[46] | 1.905 | 0.480 | 1.152 | 2.637 | 0.006 | 0.005 | 6090.0 | 2761.0 | 1.00 |
alpha[47] | -0.053 | 0.334 | -0.546 | 0.523 | 0.004 | 0.006 | 6006.0 | 2621.0 | 1.00 |
az.plot_forest(model_13_1, hdi_prob=0.89, combined=True, figsize=(17, 20))
plt.grid(axis='y', c='white', alpha=0.3)
plt.show()

R Code 13.3¶
Multilevel model Tadpole¶
model = """
data {
int qty;
array[qty] int N;
array[qty] int survival;
array[qty] int tank;
}
parameters {
vector[qty] alpha;
real bar_alpha;
real<lower=0> sigma;
}
model {
vector[qty] p;
alpha ~ normal(bar_alpha, sigma);
bar_alpha ~ normal(0, 1.5);
sigma ~ exponential(1);
for (i in 1:qty){
p[i] = alpha[ tank[i] ];
p[i] = inv_logit(p[i]);
}
survival ~ binomial(N, p);
}
"""
dat_list = {
'qty': len(df),
'tank': df['tank'].to_list(),
'survival': df['surv'].to_list(),
'N': df['density'].to_list()
}
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_2 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list.keys(),
)
az.summary(model_13_2, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
alpha[0] | 2.120 | 0.878 | 0.638 | 3.364 | 0.013 | 0.011 | 5103.0 | 2774.0 | 1.0 |
alpha[1] | 3.042 | 1.090 | 1.234 | 4.649 | 0.017 | 0.013 | 4432.0 | 3020.0 | 1.0 |
alpha[2] | 1.012 | 0.678 | -0.009 | 2.145 | 0.009 | 0.008 | 5420.0 | 2781.0 | 1.0 |
alpha[3] | 3.067 | 1.134 | 1.212 | 4.680 | 0.020 | 0.017 | 3701.0 | 2066.0 | 1.0 |
alpha[4] | 2.143 | 0.887 | 0.787 | 3.555 | 0.013 | 0.011 | 5244.0 | 2510.0 | 1.0 |
alpha[5] | 2.122 | 0.864 | 0.782 | 3.457 | 0.012 | 0.010 | 5285.0 | 2885.0 | 1.0 |
alpha[6] | 3.076 | 1.122 | 1.159 | 4.666 | 0.017 | 0.014 | 5095.0 | 2384.0 | 1.0 |
alpha[7] | 2.134 | 0.871 | 0.681 | 3.370 | 0.013 | 0.010 | 4979.0 | 2943.0 | 1.0 |
alpha[8] | -0.176 | 0.624 | -1.097 | 0.884 | 0.009 | 0.010 | 4791.0 | 2630.0 | 1.0 |
alpha[9] | 2.131 | 0.856 | 0.870 | 3.516 | 0.012 | 0.010 | 5257.0 | 2688.0 | 1.0 |
alpha[10] | 1.009 | 0.657 | -0.059 | 2.031 | 0.009 | 0.008 | 5011.0 | 2379.0 | 1.0 |
alpha[11] | 0.578 | 0.643 | -0.410 | 1.625 | 0.009 | 0.008 | 5160.0 | 3014.0 | 1.0 |
alpha[12] | 1.002 | 0.643 | 0.018 | 2.031 | 0.009 | 0.007 | 5615.0 | 3058.0 | 1.0 |
alpha[13] | 0.205 | 0.619 | -0.818 | 1.132 | 0.008 | 0.010 | 5383.0 | 2795.0 | 1.0 |
alpha[14] | 2.144 | 0.850 | 0.811 | 3.456 | 0.013 | 0.011 | 4588.0 | 2692.0 | 1.0 |
alpha[15] | 2.130 | 0.888 | 0.742 | 3.503 | 0.013 | 0.011 | 5481.0 | 2815.0 | 1.0 |
alpha[16] | 2.930 | 0.808 | 1.772 | 4.282 | 0.012 | 0.009 | 5486.0 | 2837.0 | 1.0 |
alpha[17] | 2.414 | 0.673 | 1.351 | 3.467 | 0.010 | 0.008 | 4980.0 | 2624.0 | 1.0 |
alpha[18] | 2.029 | 0.596 | 1.078 | 2.965 | 0.010 | 0.008 | 3769.0 | 2496.0 | 1.0 |
alpha[19] | 3.668 | 0.998 | 2.060 | 5.129 | 0.015 | 0.012 | 4742.0 | 2956.0 | 1.0 |
alpha[20] | 2.401 | 0.696 | 1.294 | 3.475 | 0.010 | 0.008 | 5393.0 | 2721.0 | 1.0 |
alpha[21] | 2.400 | 0.652 | 1.354 | 3.378 | 0.010 | 0.008 | 4742.0 | 2220.0 | 1.0 |
alpha[22] | 2.402 | 0.657 | 1.379 | 3.436 | 0.011 | 0.008 | 4170.0 | 2524.0 | 1.0 |
alpha[23] | 1.705 | 0.540 | 0.862 | 2.557 | 0.008 | 0.006 | 4555.0 | 2817.0 | 1.0 |
alpha[24] | -0.999 | 0.449 | -1.655 | -0.244 | 0.006 | 0.005 | 5423.0 | 2412.0 | 1.0 |
alpha[25] | 0.163 | 0.400 | -0.491 | 0.800 | 0.005 | 0.006 | 5549.0 | 2698.0 | 1.0 |
alpha[26] | -1.429 | 0.503 | -2.269 | -0.687 | 0.008 | 0.006 | 4286.0 | 2894.0 | 1.0 |
alpha[27] | -0.483 | 0.410 | -1.148 | 0.160 | 0.006 | 0.005 | 5325.0 | 3363.0 | 1.0 |
alpha[28] | 0.157 | 0.388 | -0.489 | 0.743 | 0.006 | 0.005 | 4797.0 | 2966.0 | 1.0 |
alpha[29] | 1.438 | 0.479 | 0.657 | 2.166 | 0.006 | 0.005 | 5631.0 | 3250.0 | 1.0 |
alpha[30] | -0.637 | 0.412 | -1.259 | 0.053 | 0.006 | 0.005 | 5600.0 | 2739.0 | 1.0 |
alpha[31] | -0.317 | 0.395 | -1.002 | 0.256 | 0.006 | 0.006 | 5105.0 | 2842.0 | 1.0 |
alpha[32] | 3.188 | 0.771 | 1.963 | 4.334 | 0.012 | 0.009 | 4968.0 | 2644.0 | 1.0 |
alpha[33] | 2.714 | 0.639 | 1.685 | 3.693 | 0.009 | 0.007 | 4885.0 | 3002.0 | 1.0 |
alpha[34] | 2.715 | 0.657 | 1.705 | 3.773 | 0.010 | 0.008 | 4591.0 | 2027.0 | 1.0 |
alpha[35] | 2.072 | 0.519 | 1.255 | 2.887 | 0.008 | 0.006 | 4743.0 | 2557.0 | 1.0 |
alpha[36] | 2.061 | 0.514 | 1.291 | 2.909 | 0.008 | 0.006 | 4889.0 | 2645.0 | 1.0 |
alpha[37] | 3.893 | 0.955 | 2.436 | 5.387 | 0.015 | 0.012 | 4467.0 | 2872.0 | 1.0 |
alpha[38] | 2.702 | 0.641 | 1.767 | 3.752 | 0.010 | 0.008 | 4560.0 | 2247.0 | 1.0 |
alpha[39] | 2.354 | 0.562 | 1.433 | 3.187 | 0.009 | 0.007 | 4042.0 | 2801.0 | 1.0 |
alpha[40] | -1.799 | 0.474 | -2.528 | -1.032 | 0.007 | 0.005 | 4750.0 | 2938.0 | 1.0 |
alpha[41] | -0.573 | 0.355 | -1.133 | -0.006 | 0.005 | 0.004 | 4697.0 | 2564.0 | 1.0 |
alpha[42] | -0.456 | 0.345 | -1.001 | 0.096 | 0.005 | 0.004 | 5231.0 | 3157.0 | 1.0 |
alpha[43] | -0.334 | 0.343 | -0.872 | 0.226 | 0.005 | 0.004 | 4927.0 | 3032.0 | 1.0 |
alpha[44] | 0.582 | 0.345 | 0.041 | 1.143 | 0.005 | 0.004 | 4770.0 | 3146.0 | 1.0 |
alpha[45] | -0.573 | 0.366 | -1.163 | 0.008 | 0.005 | 0.004 | 5334.0 | 3158.0 | 1.0 |
alpha[46] | 2.069 | 0.522 | 1.285 | 2.919 | 0.008 | 0.006 | 4822.0 | 2614.0 | 1.0 |
alpha[47] | -0.000 | 0.340 | -0.524 | 0.552 | 0.005 | 0.006 | 5063.0 | 2720.0 | 1.0 |
bar_alpha | 1.346 | 0.260 | 0.940 | 1.758 | 0.005 | 0.003 | 3131.0 | 2923.0 | 1.0 |
sigma | 1.618 | 0.216 | 1.263 | 1.940 | 0.004 | 0.003 | 2491.0 | 3083.0 | 1.0 |
az.plot_forest(model_13_2, hdi_prob=0.89, combined=True, figsize=(17, 20))
plt.grid(axis='y', color='white', alpha=0.3)
plt.show()

R Code 13.4¶
# az.compare(model_13_1, model_13_2)
R Code 13.5¶
means = [ model_13_2.posterior.alpha.sel(alpha_dim_0=(i-1)).values.flatten().mean() for i in df.tank ]
means = inv_logit(means)
# My test, this is not originaly in book
means_13_1 = [ model_13_1.posterior.alpha.sel(alpha_dim_0=(i-1)).values.flatten().mean() for i in df.tank ]
means_13_1 = inv_logit(means_13_1)
bar_alpha_log = model_13_2.posterior.bar_alpha.values.flatten()
bar_alpha = inv_logit(bar_alpha_log)
bar_alpha_mean = bar_alpha.mean()
sigma_log = model_13_2.posterior.sigma.values.flatten()
sigma = inv_logit(sigma_log)
sigma_mean = sigma.mean()
plt.figure(figsize=(17, 6))
plt.ylim = ([0, 1])
plt.scatter(df.tank, means, edgecolors='black', c='lightgray', s=100)
plt.scatter(df.tank, means_13_1, edgecolors='yellow', c='lightgray', s=100, alpha=0.4)
plt.scatter(df.tank, df.propsurv, c='blue')
plt.axvline(x=15.5, ls='--', color='white', alpha=0.3)
plt.axvline(x=31.5, ls='--', color='white', alpha=0.3)
plt.axhline(y=bar_alpha_mean, ls='--', c='black', alpha=0.7)
plt.text(4, 0.05, 'Small Tank', size=12)
plt.text(22, 0.05, 'Medium Tank', size=12)
plt.text(40, 0.05, 'Large Tank', size=12)
plt.gca().set_ylim(0.0, 1.05)
plt.title('Tadpole survival Tanks')
plt.xlabel('Tank Index')
plt.ylabel('Porportion Survival')
plt.show()

Blue dot: Proportion survival s_i/N_i
Black circle: Multilevel model estimative
Light Yellow: No-pooling estimative
R Code 13.6¶
fig = plt.figure(figsize=(17, 6))
gs = GridSpec(1, 2)
x = np.linspace(-3, 4)
s_sampled = 500
ax1 = fig.add_subplot(gs[0, 0])
log_odds_survival = []
log_odds_sampled_index = np.random.choice(len(bar_alpha_log) ,size=s_sampled, replace=False)
for i in log_odds_sampled_index:
log_odds_survival.append(stats.norm.pdf(x, bar_alpha_log[i], sigma_log[i]))
for i in range(s_sampled):
ax1.plot(x, log_odds_survival[i], c='darkblue', linewidth=0.05)
ax1.set_title('Survival across Tank')
ax1.set_xlabel('log_odds survival')
ax1.set_ylabel('Density')
ax2 = fig.add_subplot(gs[0, 1])
samples_log = np.random.normal(bar_alpha_log, sigma_log)
ax2.hist(inv_logit(samples_log), rwidth=0.9, color='darkblue', density=True)
ax2.axvline(x=np.mean(inv_logit(samples_log)), c='black', ls='--')
ax2.text(np.mean(inv_logit(samples_log))+0.01, 2.5, 'Mean')
ax2.set_title('Survival probabilities simulations')
ax2.set_xlabel('Probability survival')
ax2.set_ylabel('Density')
plt.show()

13.2 Varing effects and underfitting/overfitting trade-off¶
The model
R Code 13.7¶
a_bar = 1.5
sigma = 1.5
nponds = 60
repeats = 15
Ni = np.repeat([5, 10, 25, 35], repeats=repeats)
R Code 13.8¶
a_pond = np.random.normal(loc=a_bar, scale=sigma, size=nponds)
R Code 13.9¶
d = {
'pond': np.arange(nponds) + 1,
'Ni':Ni,
'true_a': a_pond,
}
dsim = pd.DataFrame(data=d)
dsim.head()
pond | Ni | true_a | |
---|---|---|---|
0 | 1 | 5 | 1.812547 |
1 | 2 | 5 | 0.484266 |
2 | 3 | 5 | 2.273773 |
3 | 4 | 5 | 1.495332 |
4 | 5 | 5 | 1.674099 |
R Code 13.10¶
# Code in R -> integer vs numeric
R Code 13.11¶
dsim['Si'] = np.random.binomial(n=dsim['Ni'], p=inv_logit(dsim['true_a']))
dsim.head()
pond | Ni | true_a | Si | |
---|---|---|---|---|
0 | 1 | 5 | 1.812547 | 5 |
1 | 2 | 5 | 0.484266 | 4 |
2 | 3 | 5 | 2.273773 | 3 |
3 | 4 | 5 | 1.495332 | 3 |
4 | 5 | 5 | 1.674099 | 5 |
R Code 13.12¶
13.2.4 Compute the no-pooling estimates¶
dsim['p_nopool'] = dsim['Si'] / dsim['Ni']
dsim.head()
pond | Ni | true_a | Si | p_nopool | |
---|---|---|---|---|---|
0 | 1 | 5 | 1.812547 | 5 | 1.0 |
1 | 2 | 5 | 0.484266 | 4 | 0.8 |
2 | 3 | 5 | 2.273773 | 3 | 0.6 |
3 | 4 | 5 | 1.495332 | 3 | 0.6 |
4 | 5 | 5 | 1.674099 | 5 | 1.0 |
R Code 13.13¶
13.2.5 Compute the partial-pooling estimates¶
model = """
data {
int N;
array[N] int pond; // Pond index
array[N] int Ni; // Population in pond[i]
array[N] int Si; // Survivals from Ni pond
}
parameters {
vector[N] alpha;
real bar_alpha;
real<lower=0> sigma;
}
model {
vector[N] pi;
// Link
for (i in 1:N) {
pi[i] = alpha[ pond[i] ];
pi[i] = inv_logit(pi[i]);
}
// Prior
alpha ~ normal(bar_alpha, sigma);
// Hyper Prior
bar_alpha ~ normal(0, 1.5);
sigma ~ exponential(1);
// Likelihood
Si ~ binomial(Ni, pi);
}
"""
dat_list = {
'N': len(dsim),
'Ni': dsim['Ni'].to_list(),
'pond': dsim['pond'].to_list(),
'Si': dsim['Si'].to_list(),
}
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
R Code 13.14¶
model_13_3 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list
)
az.summary(model_13_3, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
alpha[0] | 2.787 | 1.241 | 0.919 | 4.799 | 0.019 | 0.016 | 4867.0 | 2442.0 | 1.0 |
alpha[1] | 1.562 | 0.969 | -0.034 | 3.021 | 0.014 | 0.012 | 5222.0 | 2609.0 | 1.0 |
alpha[2] | 0.721 | 0.865 | -0.728 | 2.038 | 0.012 | 0.012 | 5554.0 | 2651.0 | 1.0 |
alpha[3] | 0.728 | 0.854 | -0.587 | 2.118 | 0.013 | 0.012 | 4619.0 | 2732.0 | 1.0 |
alpha[4] | 2.759 | 1.252 | 0.761 | 4.686 | 0.019 | 0.015 | 4595.0 | 2736.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
alpha[57] | 4.017 | 1.033 | 2.446 | 5.569 | 0.017 | 0.014 | 4304.0 | 2100.0 | 1.0 |
alpha[58] | 1.430 | 0.410 | 0.782 | 2.105 | 0.006 | 0.004 | 5454.0 | 3090.0 | 1.0 |
alpha[59] | 1.115 | 0.384 | 0.451 | 1.690 | 0.005 | 0.004 | 5327.0 | 3270.0 | 1.0 |
bar_alpha | 1.442 | 0.254 | 1.026 | 1.835 | 0.004 | 0.003 | 3741.0 | 2961.0 | 1.0 |
sigma | 1.700 | 0.236 | 1.320 | 2.046 | 0.006 | 0.004 | 1522.0 | 2025.0 | 1.0 |
62 rows × 9 columns
R Code 13.15¶
dsim['p_partpool'] = [inv_logit(model_13_3.posterior.alpha.sel(alpha_dim_0=i).values.mean()) for i in range(len(dsim))]
dsim.head()
pond | Ni | true_a | Si | p_nopool | p_partpool | |
---|---|---|---|---|---|---|
0 | 1 | 5 | 1.812547 | 5 | 1.0 | 0.941987 |
1 | 2 | 5 | 0.484266 | 4 | 0.8 | 0.826685 |
2 | 3 | 5 | 2.273773 | 3 | 0.6 | 0.672823 |
3 | 4 | 5 | 1.495332 | 3 | 0.6 | 0.674364 |
4 | 5 | 5 | 1.674099 | 5 | 1.0 | 0.940440 |
R Code 13.16¶
dsim['p_true'] = inv_logit(dsim['true_a'])
dsim.head()
pond | Ni | true_a | Si | p_nopool | p_partpool | p_true | |
---|---|---|---|---|---|---|---|
0 | 1 | 5 | 1.812547 | 5 | 1.0 | 0.941987 | 0.859669 |
1 | 2 | 5 | 0.484266 | 4 | 0.8 | 0.826685 | 0.618755 |
2 | 3 | 5 | 2.273773 | 3 | 0.6 | 0.672823 | 0.906681 |
3 | 4 | 5 | 1.495332 | 3 | 0.6 | 0.674364 | 0.816877 |
4 | 5 | 5 | 1.674099 | 5 | 1.0 | 0.940440 | 0.842122 |
R Code 13.17¶
no_pool_error = np.abs(dsim['p_nopool'] - dsim['p_true'])
partpool_error = np.abs(dsim['p_partpool'] - dsim['p_true'])
R Code 13.18¶
plt.figure(figsize=(17, 8))
plt.ylim = ([0, 1])
max_lim_graph = 0.3
plt.scatter(dsim.pond, partpool_error, edgecolors='black', c='lightgray', s=100)
plt.scatter(dsim.pond, no_pool_error, c='blue')
qty_unique_ponds = len(dsim['Ni'].unique())
qty_each_ponds = repeats # The number of repetitions for each element in each pond.
# Vertical lines
for i in range(qty_unique_ponds):
plt.axvline(x=qty_each_ponds*(i+1) + 0.5, ls='--', color='white', alpha=0.3)
partpool_error_mean = np.mean(partpool_error[(qty_each_ponds*i):(qty_each_ponds*(i+1))])
no_pool_error_mean = np.mean(no_pool_error[(qty_each_ponds*i):(qty_each_ponds*(i+1))])
plt.hlines(y=partpool_error_mean, xmin=1+(qty_each_ponds*i), xmax=qty_each_ponds+(qty_each_ponds*i), ls='--', colors='black', alpha=0.7)
plt.hlines(y=no_pool_error_mean, xmin=1+(qty_each_ponds*i), xmax=qty_each_ponds+(qty_each_ponds*i), ls='-', colors='blue', alpha=0.7)
score_no_pooling = 0
score_partial_pooling = 0
for i in dsim.pond:
if no_pool_error[i-1] >= partpool_error[i-1]: # partial polling is better
plt.vlines(x=i, ymin=no_pool_error[i-1], ymax=partpool_error[i-1], ls='--', colors='green', alpha=0.3)
score_partial_pooling += no_pool_error[i-1] - partpool_error[i-1] # How partial pooling is better
else: # no pooling is better
plt.vlines(x=i, ymin=no_pool_error[i-1], ymax=partpool_error[i-1], ls='--', colors='red', alpha=0.3)
score_no_pooling += partpool_error[i-1] - no_pool_error[i-1] # How no pooling is better
plt.text(7, max_lim_graph, 'Tiny Ponds ($5$)', size=12)
plt.text(21, max_lim_graph, 'Small Ponds ($10$)', size=12)
plt.text(35, max_lim_graph, 'Medium Ponds ($25$)', size=12)
plt.text(50, max_lim_graph, 'Large Ponds ($35$)', size=12)
plt.text(47, 0.25, f'Partial pooling is better by = {round(score_partial_pooling, 2)}')
plt.text(47, 0.24, f'No pooling is better by = {round(score_no_pooling, 2)}')
plt.text(47, 0.23, f'Partial Polling/No polling = {round((score_partial_pooling/score_no_pooling)*100, 2)}%')
plt.gca().set_ylim(-0.01, max_lim_graph + 0.05)
plt.title('Pond survival error absolute \n\n Black dash line = partial pooling \n Blue line = no pooling')
plt.xlabel('Pond Index')
plt.ylabel('Absolute Error')
plt.show()

R Code 13.20¶
# Reuse code in using Rethinking packages in R, here is automatically reuse!
# Just re-run from R Code 13.7
13.3 More than one type of cluster¶
R Code 13.21¶
# Previous chimpanzees models is in chapter 11
df = pd.read_csv('./data/chimpanzees.csv', sep=';')
df.head()
actor | recipient | condition | block | trial | prosoc_left | chose_prosoc | pulled_left | |
---|---|---|---|---|---|---|---|---|
0 | 1 | NaN | 0 | 1 | 2 | 0 | 1 | 0 |
1 | 1 | NaN | 0 | 1 | 4 | 0 | 0 | 1 |
2 | 1 | NaN | 0 | 1 | 6 | 1 | 0 | 0 |
3 | 1 | NaN | 0 | 1 | 8 | 0 | 1 | 0 |
4 | 1 | NaN | 0 | 1 | 10 | 1 | 1 | 1 |
df['treatment'] = 1 + df['prosoc_left'] + 2 * df['condition']
df.head()
actor | recipient | condition | block | trial | prosoc_left | chose_prosoc | pulled_left | treatment | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | NaN | 0 | 1 | 2 | 0 | 1 | 0 | 1 |
1 | 1 | NaN | 0 | 1 | 4 | 0 | 0 | 1 | 1 |
2 | 1 | NaN | 0 | 1 | 6 | 1 | 0 | 0 | 2 |
3 | 1 | NaN | 0 | 1 | 8 | 0 | 1 | 0 | 1 |
4 | 1 | NaN | 0 | 1 | 10 | 1 | 1 | 1 | 2 |
model = """
data {
int N;
int qty_chimpanzees;
int qty_blocks;
int qty_treatments;
array[N] int pulled_left;
array[N] int actor;
array[N] int block;
array[N] int treatment;
}
parameters {
vector[qty_treatments] beta;
vector[qty_chimpanzees] alpha;
real bar_alpha;
real<lower=0> sigma_alpha;
vector[qty_blocks] gamma;
real<lower=0> sigma_gamma;
}
model {
vector[N] p;
// priors
beta ~ normal(0, 0.5);
alpha ~ normal(bar_alpha, sigma_alpha);
bar_alpha ~ normal(0, 1.5);
sigma_alpha ~ exponential(1);
gamma ~ normal(0, sigma_gamma);
sigma_gamma ~ exponential(1);
// link
for (i in 1:N){
p[i] = alpha[ actor[i] ] + gamma[ block[i] ] + beta[ treatment[i] ];
p[i] = inv_logit(p[i]);
}
// linkelihood
pulled_left ~ binomial(1, p);
}
"""
dat_list = df[['pulled_left', 'actor', 'block', 'treatment']].to_dict('list')
dat_list['N'] = len(df)
dat_list['qty_chimpanzees'] = len(df['actor'].unique())
dat_list['qty_blocks'] = len(df['block'].unique())
dat_list['qty_treatments'] = len(df['treatment'].unique())
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
R Code 13.22¶
model_13_4 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list
)
az.summary(model_13_4, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
beta[0] | -0.178 | 0.296 | -0.569 | 0.352 | 0.042 | 0.030 | 54.0 | 1029.0 | 1.06 |
beta[1] | 0.332 | 0.321 | -0.083 | 0.828 | 0.077 | 0.056 | 20.0 | 1608.0 | 1.14 |
beta[2] | -0.537 | 0.308 | -0.994 | -0.069 | 0.059 | 0.043 | 31.0 | 1329.0 | 1.09 |
beta[3] | 0.232 | 0.297 | -0.221 | 0.702 | 0.046 | 0.033 | 45.0 | 1159.0 | 1.06 |
alpha[0] | -0.263 | 0.405 | -0.829 | 0.320 | 0.106 | 0.077 | 17.0 | 1605.0 | 1.17 |
alpha[1] | 4.763 | 1.249 | 2.811 | 6.460 | 0.035 | 0.025 | 1182.0 | 1293.0 | 1.03 |
alpha[2] | -0.624 | 0.347 | -1.205 | -0.104 | 0.021 | 0.024 | 400.0 | 1396.0 | 1.03 |
alpha[3] | -0.588 | 0.393 | -1.237 | -0.112 | 0.085 | 0.061 | 24.0 | 1595.0 | 1.12 |
alpha[4] | -0.275 | 0.387 | -0.899 | 0.220 | 0.092 | 0.066 | 20.0 | 860.0 | 1.14 |
alpha[5] | 0.620 | 0.353 | 0.033 | 1.161 | 0.016 | 0.011 | 507.0 | 1421.0 | 1.03 |
alpha[6] | 2.121 | 0.437 | 1.383 | 2.782 | 0.013 | 0.009 | 1137.0 | 1182.0 | 1.26 |
bar_alpha | 0.678 | 0.714 | -0.443 | 1.795 | 0.089 | 0.063 | 65.0 | 1892.0 | 1.04 |
sigma_alpha | 2.069 | 0.624 | 1.058 | 2.831 | 0.036 | 0.026 | 135.0 | 1582.0 | 1.04 |
gamma[0] | -0.154 | 0.208 | -0.471 | 0.101 | 0.025 | 0.018 | 76.0 | 1477.0 | 1.04 |
gamma[1] | 0.038 | 0.167 | -0.227 | 0.299 | 0.004 | 0.006 | 2537.0 | 1366.0 | 1.27 |
gamma[2] | 0.043 | 0.173 | -0.214 | 0.319 | 0.004 | 0.006 | 1370.0 | 1513.0 | 1.05 |
gamma[3] | 0.005 | 0.164 | -0.272 | 0.248 | 0.003 | 0.005 | 2437.0 | 1654.0 | 1.29 |
gamma[4] | -0.030 | 0.172 | -0.286 | 0.246 | 0.003 | 0.005 | 2766.0 | 1883.0 | 1.28 |
gamma[5] | 0.103 | 0.187 | -0.160 | 0.402 | 0.014 | 0.010 | 395.0 | 1375.0 | 1.04 |
sigma_gamma | 0.196 | 0.173 | 0.017 | 0.405 | 0.034 | 0.025 | 11.0 | 4.0 | 1.29 |
az.plot_forest(model_13_4, hdi_prob=0.89, combined=True, figsize=(15, 8))
plt.grid('--', color='white', alpha=0.2)
plt.axvline(x=0, color='red', alpha=0.5, ls='--')
plt.show()

plt.figure(figsize=(17, 6))
az.plot_dist(
[model_13_4.posterior.sigma_gamma], color='blue', quantiles=[.05, .89]
)
az.plot_dist(
[model_13_4.posterior.bar_alpha + model_13_4.posterior.sigma_alpha],
color='black', quantiles=[.05, .89]
)
plt.legend(['Block', 'Actor'])
plt.title('Posteioris')
plt.ylabel('Density')
plt.xlabel('Standard Deviation')
plt.show()

R Code 13.23¶
model = """
data {
int N;
int qty_chimpanzees;
int qty_blocks;
int qty_treatments;
array[N] int pulled_left;
array[N] int actor;
array[N] int block;
array[N] int treatment;
}
parameters {
vector[qty_treatments] beta;
vector[qty_chimpanzees] alpha;
real bar_alpha;
real<lower=0> sigma_alpha;
}
model {
vector[N] p;
// priors
beta ~ normal(0, 0.5);
alpha ~ normal(bar_alpha, sigma_alpha);
bar_alpha ~ normal(0, 1.5);
sigma_alpha ~ exponential(1);
// link
for (i in 1:N){
p[i] = alpha[ actor[i] ] + beta[ treatment[i] ];
p[i] = inv_logit(p[i]);
}
// linkelihood
pulled_left ~ binomial(1, p);
}
"""
dat_list = df[['pulled_left', 'actor', 'block', 'treatment']].to_dict('list')
dat_list['N'] = len(df)
dat_list['qty_chimpanzees'] = len(df['actor'].unique())
dat_list['qty_blocks'] = len(df['block'].unique())
dat_list['qty_treatments'] = len(df['treatment'].unique())
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_5 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list.keys()
)
R Code 13.24¶
# az.compare(model_13_4, model_13_5)
R Code 13.25¶
model = """
data {
int N;
int qty_chimpanzees;
int qty_blocks;
int qty_treatments;
array[N] int pulled_left;
array[N] int actor;
array[N] int block;
array[N] int treatment;
}
parameters {
vector[qty_treatments] beta;
real<lower=0> sigma_beta;
vector[qty_chimpanzees] alpha;
real bar_alpha;
real<lower=0> sigma_alpha;
vector[qty_blocks] gamma;
real<lower=0> sigma_gamma;
}
model {
vector[N] p;
// priors
beta ~ normal(0, sigma_beta);
sigma_beta ~ exponential(1);
alpha ~ normal(bar_alpha, sigma_alpha);
bar_alpha ~ normal(0, 1.5);
sigma_alpha ~ exponential(1);
gamma ~ normal(0, sigma_gamma);
sigma_gamma ~ exponential(1);
// link
for (i in 1:N){
p[i] = alpha[ actor[i] ] + gamma[ block[i] ] + beta[ treatment[i] ];
p[i] = inv_logit(p[i]);
}
// linkelihood
pulled_left ~ binomial(1, p);
}
"""
dat_list = df[['pulled_left', 'actor', 'block', 'treatment']].to_dict('list')
dat_list['N'] = len(df)
dat_list['qty_chimpanzees'] = len(df['actor'].unique())
dat_list['qty_blocks'] = len(df['block'].unique())
dat_list['qty_treatments'] = len(df['treatment'].unique())
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_6 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list.keys()
)
az.plot_forest([model_13_4, model_13_6], combined=True, figsize=(17,12), hdi_prob=0.89,
model_names = ["model_13_4", "model_13_6"])
plt.show()

13.4 Divergent Transitions and non-centered priors¶
R Code 13.26¶
model = """
parameters {
real v;
real x;
}
model {
v ~ normal(0, 3);
x ~ normal(0, exp(v));
}
"""
posteriori = stan.build(model)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_5 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
)
az.summary(model_13_5, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
v | 3.515 | 1.508 | 1.243 | 5.486 | 0.208 | 0.148 | 37.0 | 26.0 | 1.1 |
x | -34.371 | 286.623 | -142.353 | 209.821 | 20.340 | 14.404 | 290.0 | 232.0 | 1.1 |
R Code 13.27¶
model = """
parameters {
real v;
real z;
}
model {
v ~ normal(0, 3);
z ~ normal(0, 1);
}
generated quantities {
real x;
x = z*exp(v);
}
"""
posteriori = stan.build(model)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_6 = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
)
az.summary(model_13_6, hdi_prob=0.89)
mean | sd | hdi_5.5% | hdi_94.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
v | 0.055 | 2.991 | -4.401 | 5.018 | 0.050 | 0.048 | 3573.0 | 2181.0 | 1.0 |
z | 0.018 | 0.998 | -1.515 | 1.663 | 0.018 | 0.015 | 3015.0 | 2616.0 | 1.0 |
x | 4.028 | 3223.155 | -22.220 | 30.356 | 50.982 | 36.052 | 2943.0 | 2668.0 | 1.0 |
13.4.2 Non-centered chimpanzees¶
R Code 13.28¶
# Don't have apapt_delta in pystan3, until today.
R Code 13.29¶
To Actor:
$
To Block:
To Treatment:
Where, each actor is defined by:
and, each block is defined by:
model = """
data {
int N;
int qty_chimpanzees;
int qty_blocks;
int qty_treatments;
array[N] int pulled_left;
array[N] int actor;
array[N] int block;
array[N] int treatment;
}
parameters {
// To treatments
vector[qty_treatments] beta;
// To actors
real bar_alpha;
vector[qty_chimpanzees] z;
real<lower=0> sigma_alpha;
// To block
vector[qty_blocks] x;
real<lower=0> sigma_gamma;
}
model {
vector[N] p;
// priors
beta ~ normal(0, 0.5); // treatment
z ~ normal(0, 1); // actor
x ~ normal(0, 1); // block
bar_alpha ~ normal(0, 1.5); // Intercept to alpha (actor)
sigma_alpha ~ exponential(1);
sigma_gamma ~ exponential(1);
// Link
for (i in 1:N){
p[i] = bar_alpha + z[ actor[i] ]*sigma_alpha + x[ block[i] ]*sigma_gamma + beta[ treatment[i] ];
p[i] = inv_logit(p[i]);
}
// Linkelihood
pulled_left ~ binomial(1, p);
}
generated quantities {
vector[qty_chimpanzees] alpha;
vector[qty_blocks] gamma;
alpha = bar_alpha + z*sigma_alpha;
gamma = x*sigma_gamma;
}
"""
dat_list = df[['pulled_left', 'actor', 'block', 'treatment']].to_dict('list')
dat_list['N'] = len(df)
dat_list['qty_chimpanzees'] = len(df['actor'].unique())
dat_list['qty_blocks'] = len(df['block'].unique())
dat_list['qty_treatments'] = len(df['treatment'].unique())
posteriori = stan.build(model, data=dat_list)
samples = posteriori.sample(num_chains=4, num_samples=1000)
model_13_4_nc = az.from_pystan(
posterior=samples,
posterior_model=posteriori,
observed_data=dat_list.keys()
)
az.plot_forest(model_13_4_nc, hdi_prob=0.89, combined=True, figsize=(17, 13))
plt.axvline(x=0, ls='--', color='red')
plt.grid(axis='y', color='white', ls='--', alpha=0.5)
plt.show()

R Code 13.30¶
# Extract features from ess
# non-centered
ess_nc = np.array(az.ess(model_13_4_nc, var_names=['alpha']).alpha.values)
ess_nc = np.append(ess_nc, az.ess(model_13_4_nc, var_names=['beta']).beta.values)
ess_nc = np.append(ess_nc, az.ess(model_13_4_nc, var_names=['gamma']).gamma.values)
ess_nc = np.append(ess_nc, az.ess(model_13_4_nc, var_names=['bar_alpha']).bar_alpha.values)
ess_nc = np.append(ess_nc, az.ess(model_13_4_nc, var_names=['sigma_alpha']).sigma_alpha.values)
ess_nc = np.append(ess_nc, az.ess(model_13_4_nc, var_names=['sigma_gamma']).sigma_gamma.values)
# centered
ess_c = np.array(az.ess(model_13_4, var_names=['alpha']).alpha.values)
ess_c = np.append(ess_c, az.ess(model_13_4, var_names=['beta']).beta.values)
ess_c = np.append(ess_c, az.ess(model_13_4, var_names=['gamma']).gamma.values)
ess_c = np.append(ess_c, az.ess(model_13_4, var_names=['bar_alpha']).bar_alpha.values)
ess_c = np.append(ess_c, az.ess(model_13_4, var_names=['sigma_alpha']).sigma_alpha.values)
ess_c = np.append(ess_c, az.ess(model_13_4, var_names=['sigma_gamma']).sigma_gamma.values)
plt.figure(figsize=(17, 8))
plt.scatter(ess_c, ess_nc)
plt.plot([0, 2500], [0, 2500], ls='--', c='k', alpha=0.4)
plt.text(650, 500, 'Identity line ($x=y$)')
plt.title('Effective number samples')
plt.xlabel('n_eff(centered)')
plt.ylabel('n_eff(non-centered)')
plt.show()
