8 - Os peixes-bois condicionais

import numpy as np

from scipy import stats

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import pandas as pd

import networkx as nx
# from causalgraphicalmodels import CausalGraphicalModel

import arviz as az
# ArviZ ships with style sheets!
# https://python.arviz.org/en/stable/examples/styles.html#example-styles
az.style.use("arviz-darkgrid")

import xarray as xr

import stan
import nest_asyncio

plt.style.use('default')
plt.rcParams['axes.facecolor'] = 'lightgray'

# To DAG's
import daft
from causalgraphicalmodels import CausalGraphicalModel
# Add fonts to matplotlib to run xkcd

from matplotlib import font_manager

font_dirs = ["fonts/"]  # The path to the custom font file.
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)
# plt.xkcd()
# To running the stan in jupyter notebook
nest_asyncio.apply()

R Code 8.1

df = pd.read_csv('./data/rugged.csv', sep=';')
df.head()
isocode isonum country rugged rugged_popw rugged_slope rugged_lsd rugged_pc land_area lat ... africa_region_w africa_region_e africa_region_c slave_exports dist_slavemkt_atlantic dist_slavemkt_indian dist_slavemkt_saharan dist_slavemkt_redsea pop_1400 european_descent
0 ABW 533 Aruba 0.462 0.380 1.226 0.144 0.000 18.0 12.508 ... 0 0 0 0.0 NaN NaN NaN NaN 614.0 NaN
1 AFG 4 Afghanistan 2.518 1.469 7.414 0.720 39.004 65209.0 33.833 ... 0 0 0 0.0 NaN NaN NaN NaN 1870829.0 0.0
2 AGO 24 Angola 0.858 0.714 2.274 0.228 4.906 124670.0 -12.299 ... 0 0 1 3610000.0 5.669 6.981 4.926 3.872 1223208.0 2.0
3 AIA 660 Anguilla 0.013 0.010 0.026 0.006 0.000 9.0 18.231 ... 0 0 0 0.0 NaN NaN NaN NaN NaN NaN
4 ALB 8 Albania 3.427 1.597 10.451 1.006 62.133 2740.0 41.143 ... 0 0 0 0.0 NaN NaN NaN NaN 200000.0 100.0

5 rows × 51 columns

df['log_gdp'] = np.log(df['rgdppc_2000'])  # Log version of outcome
df[['rgdppc_2000', 'log_gdp']].head()
rgdppc_2000 log_gdp
0 NaN NaN
1 NaN NaN
2 1794.729 7.492609
3 NaN NaN
4 3703.113 8.216929
ddf = df[~np.isnan(df['log_gdp'].values)].copy()

ddf['log_gdp_std'] = ddf['log_gdp'] / np.mean(ddf['log_gdp'])
ddf['rugged_std'] = ddf['rugged'] / np.max(ddf['rugged'])

ddf[['log_gdp_std', 'rugged_std']]
log_gdp_std rugged_std
2 0.879712 0.138342
4 0.964755 0.552564
7 1.166270 0.123992
8 1.104485 0.124960
9 0.914904 0.433409
... ... ...
229 0.996681 0.270397
230 0.783032 0.374557
231 1.074365 0.283941
232 0.780967 0.085940
233 0.918589 0.192519

170 rows × 2 columns

plt.figure(figsize=(17, 8))

plt.hist(ddf['log_gdp_std'], density=True, rwidth=0.9)
plt.title('log_gdp \n 1: mean; \n 0.8: 80% of the average \n 1.1: 10% more than average')
plt.axvline(x=1, c='r', ls='--')
plt.show()

plt.figure(figsize=(17, 8))
plt.hist(ddf['rugged_std'], density=True, rwidth=0.9)
plt.title('Rugged \n min:0 and max:1')
plt.axvline(x=np.mean(ddf['rugged_std']), c='r', ls='--')
plt.show()
_images/parte_8_9_0.png _images/parte_8_9_1.png

R Code 8.2

model = """
    data {
        int N;
        vector[N] log_gdp_std;
        vector[N] rugged_std;
        real rugged_std_average;
    }
    
    parameters {
        real alpha;
        real beta;
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        mu = alpha + beta * (rugged_std - rugged_std_average);
    }
    
    model {
        // Prioris
        
        alpha ~ normal(1, 1);
        beta ~ normal(0, 1);
        sigma ~ exponential(1);
        
        // Likelihood
        log_gdp_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;  // By default, if a variable log_lik is present in the Stan model, it will be retrieved as pointwise log likelihood values.
        vector[N] log_gdp_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(log_gdp_std[i] | mu[i], sigma);
            log_gdp_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(ddf),
    'log_gdp_std': ddf['log_gdp_std'].values,
    'rugged_std': ddf['rugged_std'].values,
    'rugged_std_average': ddf['rugged_std'].mean(),
}

posteriori = stan.build(model, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
# Transform to dataframe pandas
df_samples = samples.to_frame()
df_samples.head()
parameters lp__ accept_stat__ stepsize__ treedepth__ n_leapfrog__ divergent__ energy__ alpha beta sigma ... log_gdp_std_hat.161 log_gdp_std_hat.162 log_gdp_std_hat.163 log_gdp_std_hat.164 log_gdp_std_hat.165 log_gdp_std_hat.166 log_gdp_std_hat.167 log_gdp_std_hat.168 log_gdp_std_hat.169 log_gdp_std_hat.170
draws
0 251.095956 0.934916 0.756421 2.0 3.0 0.0 -249.133644 1.003160 0.012665 0.132500 ... 1.034012 0.906565 0.989983 0.945965 1.046566 1.012428 0.871437 0.955593 1.041951 0.856201
1 247.262579 0.989367 0.751819 2.0 3.0 0.0 -246.744846 1.005781 0.135747 0.151454 ... 1.044460 1.144566 0.875533 0.860964 1.094581 1.177379 1.083208 1.013122 0.992572 1.167171
2 250.380368 0.998516 0.943934 2.0 7.0 0.0 -249.945732 1.002515 0.032714 0.128198 ... 1.047150 1.007480 1.019530 1.052204 1.112828 1.149602 1.175402 1.279579 1.000648 0.870344
3 250.516786 0.913792 0.862509 2.0 3.0 0.0 -249.646704 1.002620 0.045010 0.130002 ... 1.144794 0.994287 1.006064 1.133213 0.878218 1.246487 0.956800 0.669329 1.084075 0.973772
4 247.927975 0.574694 0.756421 2.0 3.0 0.0 -246.256142 1.018086 -0.077998 0.129151 ... 0.921305 0.814742 0.918643 0.982626 0.949444 0.886278 1.159055 0.902815 0.837451 1.115775

5 rows × 520 columns

# stan_fit to arviz_stan

stan_data = az.from_pystan(
    posterior=samples,
    posterior_predictive="log_gdp_std_hat",
    observed_data=['log_gdp_std'],
    prior=samples,
    prior_model=posteriori,
    posterior_model=posteriori,
)
stan_data
arviz.InferenceData
    • <xarray.Dataset>
      Dimensions:   (chain: 4, draw: 1000, mu_dim_0: 170)
      Coordinates:
        * chain     (chain) int64 0 1 2 3
        * draw      (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999
        * mu_dim_0  (mu_dim_0) int64 0 1 2 3 4 5 6 7 ... 163 164 165 166 167 168 169
      Data variables:
          alpha     (chain, draw) float64 1.003 1.018 1.01 ... 1.0 0.9874 0.9897
          beta      (chain, draw) float64 0.01267 -0.078 -0.09028 ... -0.06006 -0.0592
          sigma     (chain, draw) float64 0.1325 0.1292 0.1447 ... 0.1439 0.1427
          mu        (chain, draw, mu_dim_0) float64 1.002 1.007 1.002 ... 0.9973 0.991
      Attributes:
          created_at:                 2023-08-11T19:31:12.686511
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/tsiivt35
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 1.119...
      Attributes:
          created_at:                 2023-08-11T19:31:12.851711
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:        (chain: 4, draw: 1000, log_lik_dim_0: 170)
      Coordinates:
        * chain          (chain) int64 0 1 2 3
        * draw           (draw) int64 0 1 2 3 4 5 6 7 ... 993 994 995 996 997 998 999
        * log_lik_dim_0  (log_lik_dim_0) int64 0 1 2 3 4 5 ... 164 165 166 167 168 169
      Data variables:
          log_lik        (chain, draw, log_lik_dim_0) float64 0.675 1.05 ... 0.899
      Attributes:
          created_at:                 2023-08-11T19:31:12.800781
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          acceptance_rate  (chain, draw) float64 0.9349 0.5747 0.946 ... 0.9162 1.0
          step_size        (chain, draw) float64 0.7564 0.7564 ... 0.8625 0.8625
          tree_depth       (chain, draw) int64 2 2 2 2 2 2 2 3 2 ... 2 3 2 3 3 2 2 2 1
          n_steps          (chain, draw) int64 3 3 3 3 3 3 3 7 3 ... 3 7 3 7 7 3 3 7 1
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -249.1 -246.3 ... -247.2 -249.7
          lp               (chain, draw) float64 251.1 247.9 249.2 ... 249.7 250.1
      Attributes:
          created_at:                 2023-08-11T19:31:12.739805
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/tsiivt35
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, mu_dim_0: 170,
                                  log_lik_dim_0: 170, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * mu_dim_0               (mu_dim_0) int64 0 1 2 3 4 5 ... 165 166 167 168 169
        * log_lik_dim_0          (log_lik_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          alpha                  (chain, draw) float64 1.003 1.018 ... 0.9874 0.9897
          beta                   (chain, draw) float64 0.01267 -0.078 ... -0.0592
          sigma                  (chain, draw) float64 0.1325 0.1292 ... 0.1439 0.1427
          mu                     (chain, draw, mu_dim_0) float64 1.002 1.007 ... 0.991
          log_lik                (chain, draw, log_lik_dim_0) float64 0.675 ... 0.899
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 1.119...
      Attributes:
          created_at:                 2023-08-11T19:31:12.909230
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/tsiivt35
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          lp               (chain, draw) float64 251.1 247.9 249.2 ... 249.7 250.1
          acceptance_rate  (chain, draw) float64 0.9349 0.5747 0.946 ... 0.9162 1.0
          step_size        (chain, draw) float64 0.7564 0.7564 ... 0.8625 0.8625
          tree_depth       (chain, draw) int64 2 2 2 2 2 2 2 3 2 ... 2 3 2 3 3 2 2 2 1
          n_steps          (chain, draw) int64 3 3 3 3 3 3 3 7 3 ... 3 7 3 7 7 3 3 7 1
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -249.1 -246.3 ... -247.2 -249.7
      Attributes:
          created_at:                 2023-08-11T19:31:12.966971
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/tsiivt35
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:            (log_gdp_std_dim_0: 170)
      Coordinates:
        * log_gdp_std_dim_0  (log_gdp_std_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
      Data variables:
          log_gdp_std        (log_gdp_std_dim_0) float64 0.8797 0.9648 ... 0.9186
      Attributes:
          created_at:                 2023-08-11T19:31:12.653606
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

az.summary(stan_data, var_names=['alpha', 'beta', 'sigma'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha 1.000 0.010 0.980 1.019 0.000 0.000 4227.0 2654.0 1.0
beta 0.002 0.056 -0.097 0.113 0.001 0.001 3467.0 2810.0 1.0
sigma 0.138 0.008 0.125 0.153 0.000 0.000 3838.0 2808.0 1.0

R Code 8.3

plt.figure(figsize=(17, 8))

alpha = np.random.normal(1, 1, 1000)
beta = np.random.normal(0, 1, 1000)

rugged_seq = np.linspace(0, 1, 100)

for i in range(100):
    plt.plot(rugged_seq, alpha[i] + beta[i] * rugged_seq, c='blue')
    
plt.axhline(y=1.3, c='r', ls='--')    
plt.axhline(y=0.7, c='r', ls='--')    
    
plt.ylim((0.5, 1.5))
plt.title('Using vague priors')
plt.xlabel('Ruggedness')
plt.ylabel('Log GDP')

plt.show()
_images/parte_8_17_0.png

R Code 8.4

plt.figure(figsize=(17, 8))

plt.hist(beta, bins=30, rwidth=0.9)
plt.axvline(x=0.6, c='r', ls='--')
plt.axvline(x=-0.6, c='r', ls='--')
plt.show()
_images/parte_8_19_0.png
np.sum(np.sum(np.abs(beta) > 0.6) / len(beta))
0.55

R Code 8.5

plt.figure(figsize=(17, 8))

alpha = np.random.normal(1, 0.1, 1000)
beta = np.random.normal(0, 0.3, 1000)

rugged_seq = np.linspace(0, 1, 100)

for i in range(100):
    plt.plot(rugged_seq, alpha[i] + beta[i] * rugged_seq, c='blue')
    
plt.axhline(y=1.3, c='r', ls='--')    
plt.axhline(y=0.7, c='r', ls='--')    

plt.ylim((0.5, 1.5))
plt.title('Using a informative prior')
plt.xlabel('Ruggedness')
plt.ylabel('Log GDP')

plt.show()
_images/parte_8_22_0.png
model2 = """
    data {
        int N;
        vector[N] log_gdp_std;
        vector[N] rugged_std;
        real rugged_std_average;
    }
    
    parameters {
        real alpha;
        real beta;
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        mu = alpha + beta * (rugged_std - rugged_std_average);
    
    }
    
    model {
        // Prioris
        
        alpha ~ normal(1, 0.1);
        beta ~ normal(0, 0.3);
        sigma ~ exponential(1);
        
        // Likelihood
        log_gdp_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;  // By default, if a variable log_lik is present in the Stan model, it will be retrieved as pointwise log likelihood values.
        vector[N] log_gdp_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(log_gdp_std[i] | mu[i], sigma);
            log_gdp_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(ddf),
    'log_gdp_std': ddf['log_gdp_std'].values,
    'rugged_std': ddf['rugged_std'].values,
    'rugged_std_average': ddf['rugged_std'].mean(),
}

posteriori = stan.build(model2, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
stan_data2 = az.from_pystan(
    posterior=samples,
    posterior_predictive="log_gdp_std_hat",
    observed_data=['log_gdp_std'],
    prior=samples,
    prior_model=posteriori,
    posterior_model=posteriori,
)
az.summary(stan_data, var_names=['alpha', 'beta', 'sigma'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha 1.000 0.010 0.980 1.019 0.000 0.000 4227.0 2654.0 1.0
beta 0.002 0.056 -0.097 0.113 0.001 0.001 3467.0 2810.0 1.0
sigma 0.138 0.008 0.125 0.153 0.000 0.000 3838.0 2808.0 1.0

R Code 8.7

ddf['cid'] = [1 if cont_africa == 1 else 2 for cont_africa in ddf['cont_africa']]
ddf[['cont_africa', 'cid']].head()
cont_africa cid
2 1 1
4 0 2
7 0 2
8 0 2
9 0 2

R Code 8.8

model3 = """
    data {
        int N;
        vector[N] log_gdp_std;
        vector[N] rugged_std;
        array[N] int cid;  // Must be integer because this is index to alpha.
        real rugged_std_average;
    }
    
    parameters {
        real alpha[2];  //Can be used to real alpha[2] or array[2] int alpha;
        real beta;
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        for (i in 1:N){
            mu[i] = alpha[ cid[i] ] + beta * (rugged_std[i] - rugged_std_average);
        }
    }
    
    model {
        // Prioris
        
        alpha ~ normal(1, 0.1);
        beta ~ normal(0, 0.3);
        sigma ~ exponential(1);
        
        // Likelihood
        log_gdp_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;
        vector[N] log_gdp_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(log_gdp_std[i] | mu[i], sigma);
            log_gdp_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(ddf),
    'log_gdp_std': ddf['log_gdp_std'].values,
    'rugged_std': ddf['rugged_std'].values,
    'rugged_std_average': ddf['rugged_std'].mean(),
    'cid': ddf['cid'].values,
}

posteriori = stan.build(model3, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
stan_data3 = az.from_pystan(
    posterior=samples,
    posterior_predictive="log_gdp_std_hat",
    observed_data=['log_gdp_std'],
    prior=samples,
    prior_model=posteriori,
    posterior_model=posteriori,
    dims={
        "alpha": ["africa"],
    },
)

R Code 8.9

models_8 = { 'm8.1': stan_data2, 'm8.2': stan_data3 }

az.compare(models_8, ic='waic')
/home/rodolpho/Projects/bayesian/BAYES/lib/python3.8/site-packages/arviz/stats/stats.py:1645: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail. 
See http://arxiv.org/abs/1507.04544 for details
  warnings.warn(
rank elpd_waic p_waic elpd_diff weight se dse warning scale
m8.2 0 126.166124 4.117261 0.000000 0.969256 7.396644 0.000000 True log
m8.1 1 94.499399 2.491680 31.666725 0.030744 6.463956 7.317875 False log

R Code 8.10

az.summary(stan_data3, var_names=['alpha', 'beta', 'sigma'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha[0] 0.880 0.016 0.851 0.911 0.000 0.000 4512.0 3483.0 1.0
alpha[1] 1.049 0.010 1.031 1.069 0.000 0.000 4305.0 2642.0 1.0
beta -0.046 0.047 -0.130 0.048 0.001 0.001 3508.0 2517.0 1.0
sigma 0.114 0.006 0.103 0.126 0.000 0.000 3742.0 3076.0 1.0

R Code 8.11

stan_data3
arviz.InferenceData
    • <xarray.Dataset>
      Dimensions:   (chain: 4, draw: 1000, africa: 2, mu_dim_0: 170)
      Coordinates:
        * chain     (chain) int64 0 1 2 3
        * draw      (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999
        * africa    (africa) int64 0 1
        * mu_dim_0  (mu_dim_0) int64 0 1 2 3 4 5 6 7 ... 163 164 165 166 167 168 169
      Data variables:
          alpha     (chain, draw, africa) float64 0.8731 1.052 0.8843 ... 0.8777 1.072
          beta      (chain, draw) float64 -0.04755 -0.02791 ... -0.07995 0.03227
          sigma     (chain, draw) float64 0.1152 0.1109 0.11 ... 0.1056 0.1162 0.1162
          mu        (chain, draw, mu_dim_0) float64 0.8768 1.036 ... 0.8736 0.877
      Attributes:
          created_at:                 2023-08-11T18:56:27.122181
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/22jq2cue
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 0.746...
      Attributes:
          created_at:                 2023-08-11T18:56:27.276036
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:        (chain: 4, draw: 1000, log_lik_dim_0: 170)
      Coordinates:
        * chain          (chain) int64 0 1 2 3
        * draw           (draw) int64 0 1 2 3 4 5 6 7 ... 993 994 995 996 997 998 999
        * log_lik_dim_0  (log_lik_dim_0) int64 0 1 2 3 4 5 ... 164 165 166 167 168 169
      Data variables:
          log_lik        (chain, draw, log_lik_dim_0) float64 1.242 1.051 ... 1.169
      Attributes:
          created_at:                 2023-08-11T18:56:27.225035
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          acceptance_rate  (chain, draw) float64 0.9829 0.9857 0.9661 ... 0.7619 1.0
          step_size        (chain, draw) float64 0.7249 0.7249 ... 0.7518 0.7518
          tree_depth       (chain, draw) int64 3 3 2 2 3 2 3 2 3 ... 3 2 3 2 3 3 3 3 3
          n_steps          (chain, draw) int64 7 7 3 7 7 3 7 3 7 ... 7 3 7 3 7 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -281.1 -282.8 ... -274.9 -275.5
          lp               (chain, draw) float64 283.2 283.2 282.8 ... 278.0 279.2
      Attributes:
          created_at:                 2023-08-11T18:56:27.172412
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/22jq2cue
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, africa: 2, mu_dim_0: 170,
                                  log_lik_dim_0: 170, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * africa                 (africa) int64 0 1
        * mu_dim_0               (mu_dim_0) int64 0 1 2 3 4 5 ... 165 166 167 168 169
        * log_lik_dim_0          (log_lik_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          alpha                  (chain, draw, africa) float64 0.8731 1.052 ... 1.072
          beta                   (chain, draw) float64 -0.04755 -0.02791 ... 0.03227
          sigma                  (chain, draw) float64 0.1152 0.1109 ... 0.1162 0.1162
          mu                     (chain, draw, mu_dim_0) float64 0.8768 ... 0.877
          log_lik                (chain, draw, log_lik_dim_0) float64 1.242 ... 1.169
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 0.746...
      Attributes:
          created_at:                 2023-08-11T18:56:27.342979
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/22jq2cue
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          lp               (chain, draw) float64 283.2 283.2 282.8 ... 278.0 279.2
          acceptance_rate  (chain, draw) float64 0.9829 0.9857 0.9661 ... 0.7619 1.0
          step_size        (chain, draw) float64 0.7249 0.7249 ... 0.7518 0.7518
          tree_depth       (chain, draw) int64 3 3 2 2 3 2 3 2 3 ... 3 2 3 2 3 3 3 3 3
          n_steps          (chain, draw) int64 7 7 3 7 7 3 7 3 7 ... 7 3 7 3 7 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -281.1 -282.8 ... -274.9 -275.5
      Attributes:
          created_at:                 2023-08-11T18:56:27.392039
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/22jq2cue
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:            (log_gdp_std_dim_0: 170)
      Coordinates:
        * log_gdp_std_dim_0  (log_gdp_std_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
      Data variables:
          log_gdp_std        (log_gdp_std_dim_0) float64 0.8797 0.9648 ... 0.9186
      Attributes:
          created_at:                 2023-08-11T18:56:27.082048
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

alpha_a1 = stan_data3.posterior.alpha.sel(africa=0)
alpha_a2 = stan_data3.posterior.alpha.sel(africa=1)

diff_alpha_a1_a2 = az.extract(alpha_a1 - alpha_a2).alpha.values

az.hdi(diff_alpha_a1_a2, hdi_prob=0.89)
array([-0.19925646, -0.13838364])

R Code 8.12

# Extract 200 samples from arviz-fit to numpy
params_post = az.extract(stan_data3.posterior, num_samples=200)
rugged_seq = np.linspace(0, 1, 30)

log_gdp_mean_africa = []
log_gdp_hdi_africa = []

log_gdp_mean_not_africa = []
log_gdp_hdi_not_africa = []

# Calculation posterior mean and interval HDI
for i in range(len(rugged_seq)):
        log_gdp_africa = params_post.alpha.sel(africa=0) + params_post.beta.values * rugged_seq[i]
        log_gdp_mean_africa.append(np.mean(log_gdp_africa.values))
        log_gdp_hdi_africa.append(az.hdi(log_gdp_africa.values, hdi_prob=0.89))
        
        log_gdp_not_africa = params_post.alpha.sel(africa=1) + params_post.beta.values * rugged_seq[i]
        log_gdp_mean_not_africa.append(np.mean(log_gdp_not_africa.values))
        log_gdp_hdi_not_africa.append(az.hdi(log_gdp_not_africa.values, hdi_prob=0.89))
        
log_gdp_hdi_africa = np.array(log_gdp_hdi_africa)
log_gdp_hdi_not_africa = np.array(log_gdp_hdi_not_africa) 
plt.figure(figsize=(17, 8))

plt.plot(rugged_seq, log_gdp_mean_africa, c='blue')
plt.fill_between(rugged_seq, log_gdp_hdi_africa[:,0], log_gdp_hdi_africa[:,1], color='blue', alpha=0.3)

plt.plot(rugged_seq, log_gdp_mean_not_africa, c='black')
plt.fill_between(rugged_seq, log_gdp_hdi_not_africa[:,0], log_gdp_hdi_not_africa[:,1], color='darkgray', alpha=0.3)

plt.plot(ddf.loc[ddf.cid == 1,'rugged_std'], ddf.loc[ddf.cid==1, 'log_gdp_std'], 'o', markerfacecolor='blue', color='gray')
plt.plot(ddf.loc[ddf.cid == 2,'rugged_std'], ddf.loc[ddf.cid==2, 'log_gdp_std'], 'o', markerfacecolor='none', color='black')

plt.title('m8.4')
plt.xlabel('ruggedness (standardized)')
plt.ylabel('log GDP (as proportion of mean)')

plt.text(0.8, 1.04, 'Not Africa', fontsize='x-large', color='black')
plt.text(0.8, 0.86, 'Africa',  fontsize='x-large', color='darkblue')

plt.show()
_images/parte_8_41_0.png

R Code 8.13

model4 = """
    data {
        int N;
        vector[N] log_gdp_std;
        vector[N] rugged_std;
        array[N] int cid;  // Must be integer because this is index to alpha.
        real rugged_std_average;
    }
    
    parameters {
        real alpha[2];  //Can be used to real alpha[2] or array[2] int alpha;
        real beta[2];
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        for (i in 1:N){
            mu[i] = alpha[ cid[i] ] + beta[ cid[i] ] * (rugged_std[i] - rugged_std_average);
        }
    }
    
    model {
        // Prioris
        
        alpha ~ normal(1, 0.1);
        beta ~ normal(0, 0.3);
        sigma ~ exponential(1);
        
        // Likelihood
        log_gdp_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;
        vector[N] log_gdp_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(log_gdp_std[i] | mu[i], sigma);
            log_gdp_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(ddf),
    'log_gdp_std': ddf['log_gdp_std'].values,
    'rugged_std': ddf['rugged_std'].values,
    'rugged_std_average': ddf['rugged_std'].mean(),
    'cid': ddf['cid'].values,
}

posteriori = stan.build(model4, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
stan_data4 = az.from_pystan(
    posterior=samples,
    posterior_predictive="log_gdp_std_hat",
    observed_data=['log_gdp_std'],
    prior=samples,
    prior_model=posteriori,
    posterior_model=posteriori,
    dims={
        "alpha": ["africa"],
        "beta": ["africa"],
    },
)

R Code 8.14

az.summary(stan_data4, var_names=['alpha', 'beta', 'sigma'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha[0] 0.887 0.016 0.858 0.916 0.000 0.000 5156.0 2914.0 1.0
alpha[1] 1.051 0.010 1.033 1.071 0.000 0.000 5063.0 3095.0 1.0
beta[0] 0.131 0.075 -0.006 0.270 0.001 0.001 4710.0 3386.0 1.0
beta[1] -0.143 0.056 -0.247 -0.036 0.001 0.001 4727.0 3117.0 1.0
sigma 0.112 0.006 0.100 0.123 0.000 0.000 5069.0 3262.0 1.0

R Code 8.15

models_8 = { 'm8.1': stan_data2, 'm8.2': stan_data3, 'm8.3': stan_data4 }

# https://python.arviz.org/en/stable/api/generated/arviz.compare.html
# https://rss.onlinelibrary.wiley.com/doi/10.1111/1467-9868.00353
az.compare(models_8, ic='loo')  # loo is same that PSIS
rank elpd_loo p_loo elpd_diff weight se dse warning scale
m8.3 0 129.589277 4.989499 0.000000 0.870793 7.309693 0.000000 False log
m8.2 1 126.137495 4.145890 3.451783 0.129207 7.405339 3.225278 False log
m8.1 2 94.492463 2.498616 35.096814 0.000000 6.464447 7.448111 False log

R Code 8.16

az.loo(stan_data4, pointwise=True)
Computed from 4000 posterior samples and 170 observations log-likelihood matrix.

         Estimate       SE
elpd_loo   129.59     7.31
p_loo        4.99        -
------

Pareto k diagnostic values:
                         Count   Pct.
(-Inf, 0.5]   (good)      169   99.4%
 (0.5, 0.7]   (ok)          1    0.6%
   (0.7, 1]   (bad)         0    0.0%
   (1, Inf)   (very bad)    0    0.0%
stan_data4
arviz.InferenceData
    • <xarray.Dataset>
      Dimensions:   (chain: 4, draw: 1000, africa: 2, mu_dim_0: 170)
      Coordinates:
        * chain     (chain) int64 0 1 2 3
        * draw      (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999
        * africa    (africa) int64 0 1
        * mu_dim_0  (mu_dim_0) int64 0 1 2 3 4 5 6 7 ... 163 164 165 166 167 168 169
      Data variables:
          alpha     (chain, draw, africa) float64 0.88 1.051 0.8771 ... 0.876 1.022
          beta      (chain, draw, africa) float64 0.2193 -0.1387 ... 0.1464 -0.1622
          sigma     (chain, draw) float64 0.1212 0.131 0.1102 ... 0.1118 0.1209 0.1094
          mu        (chain, draw, mu_dim_0) float64 0.8632 1.004 ... 0.8572 0.8728
      Attributes:
          created_at:                 2023-08-11T18:56:30.413832
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/5jmion4o
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 0.913...
      Attributes:
          created_at:                 2023-08-11T18:56:30.571354
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:        (chain: 4, draw: 1000, log_lik_dim_0: 170)
      Coordinates:
        * chain          (chain) int64 0 1 2 3
        * draw           (draw) int64 0 1 2 3 4 5 6 7 ... 993 994 995 996 997 998 999
        * log_lik_dim_0  (log_lik_dim_0) int64 0 1 2 3 4 5 ... 164 165 166 167 168 169
      Data variables:
          log_lik        (chain, draw, log_lik_dim_0) float64 1.182 1.139 ... 1.206
      Attributes:
          created_at:                 2023-08-11T18:56:30.520437
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          acceptance_rate  (chain, draw) float64 0.9562 0.7774 ... 0.9232 0.8468
          step_size        (chain, draw) float64 0.7482 0.7482 ... 0.6294 0.6294
          tree_depth       (chain, draw) int64 2 3 3 3 2 3 3 3 2 ... 3 3 2 3 3 3 3 2 3
          n_steps          (chain, draw) int64 3 7 7 7 3 7 7 7 3 ... 7 7 7 7 7 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -280.2 -279.4 ... -284.5 -279.4
          lp               (chain, draw) float64 285.4 280.7 282.5 ... 285.7 283.1
      Attributes:
          created_at:                 2023-08-11T18:56:30.463167
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/5jmion4o
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:                (chain: 4, draw: 1000, africa: 2, mu_dim_0: 170,
                                  log_lik_dim_0: 170, log_gdp_std_hat_dim_0: 170)
      Coordinates:
        * chain                  (chain) int64 0 1 2 3
        * draw                   (draw) int64 0 1 2 3 4 5 ... 994 995 996 997 998 999
        * africa                 (africa) int64 0 1
        * mu_dim_0               (mu_dim_0) int64 0 1 2 3 4 5 ... 165 166 167 168 169
        * log_lik_dim_0          (log_lik_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
        * log_gdp_std_hat_dim_0  (log_gdp_std_hat_dim_0) int64 0 1 2 3 ... 167 168 169
      Data variables:
          alpha                  (chain, draw, africa) float64 0.88 1.051 ... 1.022
          beta                   (chain, draw, africa) float64 0.2193 ... -0.1622
          sigma                  (chain, draw) float64 0.1212 0.131 ... 0.1209 0.1094
          mu                     (chain, draw, mu_dim_0) float64 0.8632 ... 0.8728
          log_lik                (chain, draw, log_lik_dim_0) float64 1.182 ... 1.206
          log_gdp_std_hat        (chain, draw, log_gdp_std_hat_dim_0) float64 0.913...
      Attributes:
          created_at:                 2023-08-11T18:56:30.647322
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/5jmion4o
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          lp               (chain, draw) float64 285.4 280.7 282.5 ... 285.7 283.1
          acceptance_rate  (chain, draw) float64 0.9562 0.7774 ... 0.9232 0.8468
          step_size        (chain, draw) float64 0.7482 0.7482 ... 0.6294 0.6294
          tree_depth       (chain, draw) int64 2 3 3 3 2 3 3 3 2 ... 3 3 2 3 3 3 3 2 3
          n_steps          (chain, draw) int64 3 7 7 7 3 7 7 7 3 ... 7 7 7 7 7 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -280.2 -279.4 ... -284.5 -279.4
      Attributes:
          created_at:                 2023-08-11T18:56:30.696630
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/5jmion4o
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:            (log_gdp_std_dim_0: 170)
      Coordinates:
        * log_gdp_std_dim_0  (log_gdp_std_dim_0) int64 0 1 2 3 4 ... 166 167 168 169
      Data variables:
          log_gdp_std        (log_gdp_std_dim_0) float64 0.8797 0.9648 ... 0.9186
      Attributes:
          created_at:                 2023-08-11T18:56:30.365835
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

R Code 8.16 - XXX

# az.plot_loo_pit(stan_data4, y='log_gdp_std')

R Code 8.17

# Extract 200 samples from arviz-fit to numpy
params_post = az.extract(stan_data4.posterior, num_samples=200)
rugged_seq = np.linspace(0, 1, 30)

log_gdp_mean_africa = []
log_gdp_hdi_africa = []

log_gdp_mean_not_africa = []
log_gdp_hdi_not_africa = []

# Calculation posterior mean and interval HDI
for i in range(len(rugged_seq)):
        log_gdp_africa = params_post.alpha.sel(africa=0) + params_post.beta.sel(africa=0).values * rugged_seq[i]
        log_gdp_mean_africa.append(np.mean(log_gdp_africa.values))
        log_gdp_hdi_africa.append(az.hdi(log_gdp_africa.values, hdi_prob=0.89))
        
        log_gdp_not_africa = params_post.alpha.sel(africa=1) + params_post.beta.sel(africa=1).values * rugged_seq[i]
        log_gdp_mean_not_africa.append(np.mean(log_gdp_not_africa.values))
        log_gdp_hdi_not_africa.append(az.hdi(log_gdp_not_africa.values, hdi_prob=0.89))
        
log_gdp_hdi_africa = np.array(log_gdp_hdi_africa)
log_gdp_hdi_not_africa = np.array(log_gdp_hdi_not_africa)
fig = plt.figure(figsize=(17, 8))

gs = GridSpec(1, 2)

# Africa plots
ax_africa = fig.add_subplot(gs[0])

ax_africa.plot(rugged_seq, log_gdp_mean_africa, c='blue')
ax_africa.fill_between(rugged_seq, log_gdp_hdi_africa[:,0], log_gdp_hdi_africa[:,1], color='blue', alpha=0.3)
ax_africa.plot(ddf.loc[ddf.cid == 1,'rugged_std'], ddf.loc[ddf.cid==1, 'log_gdp_std'], 'o', markerfacecolor='blue', color='gray')

ax_africa.set_title('African Nations')
ax_africa.set_xlabel('ruggedness (standardized)')
ax_africa.set_ylabel('log GDP (as proportion of mean)')
ax_africa.text(0.7, 0.8, 'Africa',  fontsize='xx-large', color='darkblue')


# Non africa plots
ax_not_africa = fig.add_subplot(gs[1])

ax_not_africa.plot(rugged_seq, log_gdp_mean_not_africa, c='black')
ax_not_africa.fill_between(rugged_seq, log_gdp_hdi_not_africa[:,0], log_gdp_hdi_not_africa[:,1], color='darkgray', alpha=0.3)
ax_not_africa.plot(ddf.loc[ddf.cid == 2,'rugged_std'], ddf.loc[ddf.cid==2, 'log_gdp_std'], 'o', markerfacecolor='none', color='black')

ax_not_africa.set_title('Non-African Nations')
ax_not_africa.set_xlabel('ruggedness (standardized)')
ax_not_africa.set_ylabel('log GDP (as proportion of mean)')
ax_not_africa.text(0.7, 1.1, 'Not Africa', fontsize='xx-large', color='black')


plt.show()
_images/parte_8_57_0.png

R Code 8.18

log_gdp_delta_mean = []
log_gdp_delta_hdi = []

# Calculation posterior mean and interval HDI
for i in range(len(rugged_seq)):
        log_gdp_africa = params_post.alpha.sel(africa=0) + params_post.beta.sel(africa=0).values * rugged_seq[i]
        log_gdp_not_africa = params_post.alpha.sel(africa=1) + params_post.beta.sel(africa=1).values * rugged_seq[i]
        
        log_gdp_delta = log_gdp_africa - log_gdp_not_africa

        log_gdp_delta_mean.append(np.mean(log_gdp_delta.values))
        log_gdp_delta_hdi.append(az.hdi(log_gdp_delta.values, hdi_prob=0.89))
        
log_gdp_delta_hdi = np.array(log_gdp_delta_hdi)
fig = plt.figure(figsize=(17, 8))

gs = GridSpec(1, 1)

# Africa delta log GDP plots
ax_delta = fig.add_subplot(gs[0])

ax_delta.plot(rugged_seq, log_gdp_delta_mean, c='black')
ax_delta.fill_between(rugged_seq, log_gdp_delta_hdi[:,0], log_gdp_delta_hdi[:,1], color='blue', alpha=0.3)

ax_delta.set_title('Delta log GDP')
ax_delta.set_xlabel('ruggedness (standardized)')
ax_delta.set_ylabel('expected difference log GDP')

ax_delta.text(0.0, 0.02, 'Africa higher GDP',  fontsize='xx-large', color='darkblue')
ax_delta.text(0.0, -0.03, 'Africa lower GDP',  fontsize='xx-large', color='darkblue')
ax_delta.axhline(y=0, ls='--', color='black')

plt.show()
_images/parte_8_60_0.png

R Code 8.19

df = pd.read_csv('./data/tulips.csv', sep=';',
                 dtype={
                     'bed': 'category',  # cluster of plants from same section of the greenhouse
                     'water': 'float',  # Predictor: Soil moisture - (1) low and (3) high 
                     'shade': 'float',  # Predictor: Light exposure - (1) high and (3) low
                     'blooms': 'float',  # What we wish to predict
                 })
df.tail()
bed water shade blooms
22 c 2.0 2.0 135.92
23 c 2.0 3.0 90.66
24 c 3.0 1.0 304.52
25 c 3.0 2.0 249.33
26 c 3.0 3.0 134.59
df.describe(include='category')
bed
count 27
unique 3
top a
freq 9
df.describe().T
count mean std min 25% 50% 75% max
water 27.0 2.000000 0.832050 1.0 1.000 2.00 3.0 3.00
shade 27.0 2.000000 0.832050 1.0 1.000 2.00 3.0 3.00
blooms 27.0 128.993704 92.683923 0.0 71.115 111.04 190.3 361.66

R Code 8.20

df['blooms_std'] = df['blooms'] / df['blooms'].max()
df['water_cent'] = df['water'] - df['water'].mean()
df['shade_cent'] = df['shade'] - df['shade'].mean()

R Code 8.21

\[ \alpha \sim Normal(0.5, 1) \]
alpha = np.random.normal(0.5, 1, 1000)

alphas_true = np.any([alpha < 0, alpha > 1], axis=0)  # If (alpha < 0) or (alpha > 1) then True else False

np.sum(alphas_true) / len(alpha)
0.628

R Code 8.22

\[ \alpha \sim Normal(0.5, 0.25) \]
alpha = np.random.normal(0.5, 0.25, 1000)

alphas_true = np.any([alpha < 0, alpha > 1], axis=0)  # If (alpha < 0) or (alpha > 1) then True else False

np.sum(alphas_true) / len(alpha)
0.053

R Code 8.23

model = """
    data {
        int N;
        vector[N] blooms_std;
        vector[N] water_cent;
        vector[N] shade_cent;
    }
    
    parameters {
        real alpha;
        real beta_w;
        real beta_s;
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        mu = alpha + beta_w * water_cent + beta_s * shade_cent;
    }
    
    model {
        // Priori
        
        alpha ~ normal(0.5, 0.25);
        beta_w ~ normal(0, 0.25);
        beta_s ~ normal(0, 0.25);
        sigma ~ exponential(1);
        
        blooms_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;
        vector[N] blooms_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(blooms_std[i] | mu[i], sigma);
            blooms_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(df),
    'blooms_std': df.blooms_std.values,   
    'water_cent': df.water_cent.values,
    'shade_cent': df.shade_cent.values,
}

posteriori = stan.build(model, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
stan_blooms = az.from_pystan(
    prior_model=posteriori,
    prior=samples,
    posterior_model=posteriori,
    posterior=samples,
    posterior_predictive="blooms_std_hat",
    observed_data=['blooms_std', 'water_cent', 'shade_cent'],
)

stan_blooms
arviz.InferenceData
    • <xarray.Dataset>
      Dimensions:   (chain: 4, draw: 1000, mu_dim_0: 27)
      Coordinates:
        * chain     (chain) int64 0 1 2 3
        * draw      (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999
        * mu_dim_0  (mu_dim_0) int64 0 1 2 3 4 5 6 7 8 ... 18 19 20 21 22 23 24 25 26
      Data variables:
          alpha     (chain, draw) float64 0.3615 0.3282 0.3282 ... 0.3661 0.3922 0.34
          beta_w    (chain, draw) float64 0.2235 0.2231 0.2231 ... 0.2044 0.2055
          beta_s    (chain, draw) float64 -0.08126 -0.07431 ... -0.1841 -0.04058
          sigma     (chain, draw) float64 0.1818 0.1567 0.1567 ... 0.1719 0.1567
          mu        (chain, draw, mu_dim_0) float64 0.2193 0.1381 ... 0.5455 0.505
      Attributes:
          created_at:                 2023-08-11T18:56:33.510826
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/elkgjo5c
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:               (chain: 4, draw: 1000, blooms_std_hat_dim_0: 27)
      Coordinates:
        * chain                 (chain) int64 0 1 2 3
        * draw                  (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
        * blooms_std_hat_dim_0  (blooms_std_hat_dim_0) int64 0 1 2 3 4 ... 23 24 25 26
      Data variables:
          blooms_std_hat        (chain, draw, blooms_std_hat_dim_0) float64 0.1343 ...
      Attributes:
          created_at:                 2023-08-11T18:56:33.654050
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:        (chain: 4, draw: 1000, log_lik_dim_0: 27)
      Coordinates:
        * chain          (chain) int64 0 1 2 3
        * draw           (draw) int64 0 1 2 3 4 5 6 7 ... 993 994 995 996 997 998 999
        * log_lik_dim_0  (log_lik_dim_0) int64 0 1 2 3 4 5 6 ... 20 21 22 23 24 25 26
      Data variables:
          log_lik        (chain, draw, log_lik_dim_0) float64 0.05831 ... 0.5753
      Attributes:
          created_at:                 2023-08-11T18:56:33.609401
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          acceptance_rate  (chain, draw) float64 0.9282 0.9287 ... 0.8313 0.9974
          step_size        (chain, draw) float64 0.804 0.804 0.804 ... 0.6157 0.6157
          tree_depth       (chain, draw) int64 2 2 2 3 2 1 3 3 2 ... 2 2 3 3 2 2 3 3 3
          n_steps          (chain, draw) int64 3 3 3 7 3 3 7 7 3 ... 7 7 7 7 3 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -32.69 -31.76 -25.78 ... -28.81 -30.5
          lp               (chain, draw) float64 32.9 32.41 32.41 ... 31.39 31.47
      Attributes:
          created_at:                 2023-08-11T18:56:33.561609
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/elkgjo5c
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:               (chain: 4, draw: 1000, mu_dim_0: 27,
                                 log_lik_dim_0: 27, blooms_std_hat_dim_0: 27)
      Coordinates:
        * chain                 (chain) int64 0 1 2 3
        * draw                  (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
        * mu_dim_0              (mu_dim_0) int64 0 1 2 3 4 5 6 ... 21 22 23 24 25 26
        * log_lik_dim_0         (log_lik_dim_0) int64 0 1 2 3 4 5 ... 22 23 24 25 26
        * blooms_std_hat_dim_0  (blooms_std_hat_dim_0) int64 0 1 2 3 4 ... 23 24 25 26
      Data variables:
          alpha                 (chain, draw) float64 0.3615 0.3282 ... 0.3922 0.34
          beta_w                (chain, draw) float64 0.2235 0.2231 ... 0.2044 0.2055
          beta_s                (chain, draw) float64 -0.08126 -0.07431 ... -0.04058
          sigma                 (chain, draw) float64 0.1818 0.1567 ... 0.1719 0.1567
          mu                    (chain, draw, mu_dim_0) float64 0.2193 ... 0.505
          log_lik               (chain, draw, log_lik_dim_0) float64 0.05831 ... 0....
          blooms_std_hat        (chain, draw, blooms_std_hat_dim_0) float64 0.1343 ...
      Attributes:
          created_at:                 2023-08-11T18:56:33.701908
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/elkgjo5c
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          lp               (chain, draw) float64 32.9 32.41 32.41 ... 31.39 31.47
          acceptance_rate  (chain, draw) float64 0.9282 0.9287 ... 0.8313 0.9974
          step_size        (chain, draw) float64 0.804 0.804 0.804 ... 0.6157 0.6157
          tree_depth       (chain, draw) int64 2 2 2 3 2 1 3 3 2 ... 2 2 3 3 2 2 3 3 3
          n_steps          (chain, draw) int64 3 3 3 7 3 3 7 7 3 ... 7 7 7 7 3 7 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -32.69 -31.76 -25.78 ... -28.81 -30.5
      Attributes:
          created_at:                 2023-08-11T18:56:33.750637
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/elkgjo5c
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:           (blooms_std_dim_0: 27, water_cent_dim_0: 27,
                             shade_cent_dim_0: 27)
      Coordinates:
        * blooms_std_dim_0  (blooms_std_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
        * water_cent_dim_0  (water_cent_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
        * shade_cent_dim_0  (shade_cent_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
      Data variables:
          blooms_std        (blooms_std_dim_0) float64 0.0 0.0 0.307 ... 0.6894 0.3721
          water_cent        (water_cent_dim_0) float64 -1.0 -1.0 -1.0 ... 1.0 1.0 1.0
          shade_cent        (shade_cent_dim_0) float64 -1.0 0.0 1.0 ... -1.0 0.0 1.0
      Attributes:
          created_at:                 2023-08-11T18:56:33.478728
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

az.summary(stan_blooms, var_names=['alpha', 'beta_w', 'beta_s'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha 0.359 0.034 0.298 0.429 0.001 0.0 3857.0 2579.0 1.0
beta_w 0.203 0.041 0.126 0.279 0.001 0.0 4353.0 3032.0 1.0
beta_s -0.112 0.041 -0.188 -0.036 0.001 0.0 4151.0 2524.0 1.0

R Code 8.24

\[ B_i \sim Normal(\mu_i, \sigma) \]
\[ \mu_i = \alpha + \beta_W W_i + \beta_S S_i + \beta_{WS} S_i W_i \]
model = """
    data {
        int N;
        vector[N] blooms_std;
        vector[N] water_cent;
        vector[N] shade_cent;
    }
    
    parameters {
        real alpha;
        real beta_w;
        real beta_s;
        real beta_ws;
        real<lower=0> sigma;
    }
    
    transformed parameters {
        vector[N] mu;
        
        mu = alpha + beta_w * water_cent + beta_s * shade_cent + beta_ws * water_cent .* shade_cent;
    }
    
    model {
        // Priori
        
        alpha ~ normal(0.5, 0.25);
        beta_w ~ normal(0, 0.25);
        beta_s ~ normal(0, 0.25);
        beta_ws ~ normal(0, 0.25);
        sigma ~ exponential(1);
        
        blooms_std ~ normal(mu, sigma);
    }
    
    generated quantities {
        vector[N] log_lik;
        vector[N] blooms_std_hat;
        
        for(i in 1:N){
            log_lik[i] = normal_lpdf(blooms_std[i] | mu[i], sigma);
            blooms_std_hat[i] = normal_rng(mu[i], sigma);
        }
    }
"""

data = {
    'N': len(df),
    'blooms_std': df.blooms_std.values,   
    'water_cent': df.water_cent.values,
    'shade_cent': df.shade_cent.values,
}

posteriori = stan.build(model, data=data)
samples = posteriori.sample(num_chains=4, num_samples=1000)
stan_blooms_interaction = az.from_pystan(
    prior_model=posteriori,
    prior=samples,
    posterior_model=posteriori,
    posterior=samples,
    posterior_predictive="blooms_std_hat",
    observed_data=['blooms_std', 'water_cent', 'shade_cent'],
)

stan_blooms_interaction
arviz.InferenceData
    • <xarray.Dataset>
      Dimensions:   (chain: 4, draw: 1000, mu_dim_0: 27)
      Coordinates:
        * chain     (chain) int64 0 1 2 3
        * draw      (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998 999
        * mu_dim_0  (mu_dim_0) int64 0 1 2 3 4 5 6 7 8 ... 18 19 20 21 22 23 24 25 26
      Data variables:
          alpha     (chain, draw) float64 0.3674 0.3462 0.354 ... 0.3529 0.3582 0.3258
          beta_w    (chain, draw) float64 0.2106 0.2189 0.1748 ... 0.2551 0.2051
          beta_s    (chain, draw) float64 -0.1585 -0.1735 ... -0.1014 -0.06658
          beta_ws   (chain, draw) float64 -0.2024 -0.1429 -0.1649 ... -0.08582 -0.1958
          sigma     (chain, draw) float64 0.1466 0.1574 0.1383 ... 0.1805 0.1231
          mu        (chain, draw, mu_dim_0) float64 0.1129 0.1568 ... 0.5309 0.2686
      Attributes:
          created_at:                 2023-08-11T18:56:34.758903
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/2akx274x
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:               (chain: 4, draw: 1000, blooms_std_hat_dim_0: 27)
      Coordinates:
        * chain                 (chain) int64 0 1 2 3
        * draw                  (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
        * blooms_std_hat_dim_0  (blooms_std_hat_dim_0) int64 0 1 2 3 4 ... 23 24 25 26
      Data variables:
          blooms_std_hat        (chain, draw, blooms_std_hat_dim_0) float64 0.04633...
      Attributes:
          created_at:                 2023-08-11T18:56:34.902871
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:        (chain: 4, draw: 1000, log_lik_dim_0: 27)
      Coordinates:
        * chain          (chain) int64 0 1 2 3
        * draw           (draw) int64 0 1 2 3 4 5 6 7 ... 993 994 995 996 997 998 999
        * log_lik_dim_0  (log_lik_dim_0) int64 0 1 2 3 4 5 6 ... 20 21 22 23 24 25 26
      Data variables:
          log_lik        (chain, draw, log_lik_dim_0) float64 0.7045 0.4295 ... 0.8218
      Attributes:
          created_at:                 2023-08-11T18:56:34.858158
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          acceptance_rate  (chain, draw) float64 0.9824 0.9818 ... 0.9986 0.9993
          step_size        (chain, draw) float64 0.7264 0.7264 ... 0.6313 0.6313
          tree_depth       (chain, draw) int64 2 2 3 3 2 2 3 3 2 ... 3 3 2 3 3 2 3 3 3
          n_steps          (chain, draw) int64 7 3 7 7 3 3 7 7 3 ... 7 7 3 7 7 3 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -36.43 -35.68 ... -32.65 -33.83
          lp               (chain, draw) float64 37.17 37.08 37.86 ... 35.73 36.25
      Attributes:
          created_at:                 2023-08-11T18:56:34.810343
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/2akx274x
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:               (chain: 4, draw: 1000, mu_dim_0: 27,
                                 log_lik_dim_0: 27, blooms_std_hat_dim_0: 27)
      Coordinates:
        * chain                 (chain) int64 0 1 2 3
        * draw                  (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
        * mu_dim_0              (mu_dim_0) int64 0 1 2 3 4 5 6 ... 21 22 23 24 25 26
        * log_lik_dim_0         (log_lik_dim_0) int64 0 1 2 3 4 5 ... 22 23 24 25 26
        * blooms_std_hat_dim_0  (blooms_std_hat_dim_0) int64 0 1 2 3 4 ... 23 24 25 26
      Data variables:
          alpha                 (chain, draw) float64 0.3674 0.3462 ... 0.3582 0.3258
          beta_w                (chain, draw) float64 0.2106 0.2189 ... 0.2551 0.2051
          beta_s                (chain, draw) float64 -0.1585 -0.1735 ... -0.06658
          beta_ws               (chain, draw) float64 -0.2024 -0.1429 ... -0.1958
          sigma                 (chain, draw) float64 0.1466 0.1574 ... 0.1805 0.1231
          mu                    (chain, draw, mu_dim_0) float64 0.1129 ... 0.2686
          log_lik               (chain, draw, log_lik_dim_0) float64 0.7045 ... 0.8218
          blooms_std_hat        (chain, draw, blooms_std_hat_dim_0) float64 0.04633...
      Attributes:
          created_at:                 2023-08-11T18:56:34.950864
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/2akx274x
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:          (chain: 4, draw: 1000)
      Coordinates:
        * chain            (chain) int64 0 1 2 3
        * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
      Data variables:
          lp               (chain, draw) float64 37.17 37.08 37.86 ... 35.73 36.25
          acceptance_rate  (chain, draw) float64 0.9824 0.9818 ... 0.9986 0.9993
          step_size        (chain, draw) float64 0.7264 0.7264 ... 0.6313 0.6313
          tree_depth       (chain, draw) int64 2 2 3 3 2 2 3 3 2 ... 3 3 2 3 3 2 3 3 3
          n_steps          (chain, draw) int64 7 3 7 7 3 3 7 7 3 ... 7 7 3 7 7 3 7 7 7
          diverging        (chain, draw) bool False False False ... False False False
          energy           (chain, draw) float64 -36.43 -35.68 ... -32.65 -33.83
      Attributes:
          created_at:                 2023-08-11T18:56:35.000044
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0
          num_chains:                 4
          num_samples:                1000
          num_thin:                   1
          num_warmup:                 1000
          save_warmup:                0
          model_name:                 models/2akx274x
          program_code:               \n    data {\n        int N;\n        vector[...
          random_seed:                None

    • <xarray.Dataset>
      Dimensions:           (blooms_std_dim_0: 27, water_cent_dim_0: 27,
                             shade_cent_dim_0: 27)
      Coordinates:
        * blooms_std_dim_0  (blooms_std_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
        * water_cent_dim_0  (water_cent_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
        * shade_cent_dim_0  (shade_cent_dim_0) int64 0 1 2 3 4 5 ... 21 22 23 24 25 26
      Data variables:
          blooms_std        (blooms_std_dim_0) float64 0.0 0.0 0.307 ... 0.6894 0.3721
          water_cent        (water_cent_dim_0) float64 -1.0 -1.0 -1.0 ... 1.0 1.0 1.0
          shade_cent        (shade_cent_dim_0) float64 -1.0 0.0 1.0 ... -1.0 0.0 1.0
      Attributes:
          created_at:                 2023-08-11T18:56:34.723805
          arviz_version:              0.15.1
          inference_library:          stan
          inference_library_version:  3.7.0

az.summary(stan_blooms_interaction, var_names=['alpha', 'beta_w', 'beta_s', 'beta_ws'])
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
alpha 0.358 0.029 0.301 0.411 0.000 0.0 5629.0 2730.0 1.0
beta_w 0.206 0.034 0.145 0.275 0.000 0.0 4954.0 2852.0 1.0
beta_s -0.113 0.034 -0.177 -0.051 0.000 0.0 5101.0 3033.0 1.0
beta_ws -0.143 0.042 -0.220 -0.059 0.001 0.0 5011.0 3012.0 1.0

R Code 8.25

blooms_post = az.extract(stan_blooms.posterior, num_samples=20)  # get 20 lines
blooms_post_int = az.extract(stan_blooms_interaction.posterior, num_samples=20)  # get 20 lines
# ================
# Plot without interactions

water_seq = np.linspace(-1, 1, 30)

fig = plt.figure(figsize=(17, 6))

gs = GridSpec(1, 3)

shade_cent_values = [-1, 0, 1]

ax = [None] * len(shade_cent_values)

for j, shade_plot_aux in enumerate(shade_cent_values):
    ax[j] = fig.add_subplot(gs[j])

    for i in range(len(blooms_post.alpha)):
        mu = blooms_post.alpha[i].values + \
             blooms_post.beta_w[i].values * water_seq + \
             blooms_post.beta_s[i].values * (shade_plot_aux)

        ax[j].plot(water_seq, mu, c='gray')
        ax[j].set_ylim(0, 1)
        ax[j].set_title(f'8.4 post: Shade = ${shade_plot_aux}$')
        ax[j].set_xlabel('water')
        ax[j].set_ylabel('blooms')
        ax[j].set_xticks(shade_cent_values, shade_cent_values)
        ax[j].set_yticks([0, 0.5, 1.0], [0, 0.5, 1])

    ax[j].plot(
            df.loc[df['shade_cent'] == shade_plot_aux,'water_cent'].values, 
            df.loc[df['shade_cent'] == shade_plot_aux,'blooms_std'].values, 
            'o', c='blue')
    
plt.show()

# ================
# Plot with interactions

water_seq = np.linspace(-1, 1, 30)

fig = plt.figure(figsize=(17, 6))

gs = GridSpec(1, 3)

shade_cent_values = [-1, 0, 1]

ax = [None] * len(shade_cent_values)

for j, shade_plot_aux in enumerate(shade_cent_values):
    ax[j] = fig.add_subplot(gs[j])

    for i in range(len(blooms_post_int.alpha)):
        mu = blooms_post_int.alpha[i].values + \
             blooms_post_int.beta_w[i].values * water_seq + \
             blooms_post_int.beta_s[i].values * (shade_plot_aux) + \
             blooms_post_int.beta_ws[i].values * water_seq * (shade_plot_aux)

        ax[j].plot(water_seq, mu, c='gray')
        ax[j].set_ylim(0, 1)
        ax[j].set_title(f'8.5 post: Shade = ${shade_plot_aux}$')
        ax[j].set_xlabel('water')
        ax[j].set_ylabel('blooms')
        ax[j].set_xticks(shade_cent_values, shade_cent_values)
        ax[j].set_yticks([0, 0.5, 1.0], [0, 0.5, 1])

    ax[j].plot(
            df.loc[df['shade_cent'] == shade_plot_aux,'water_cent'].values, 
            df.loc[df['shade_cent'] == shade_plot_aux,'blooms_std'].values, 
            'o', c='blue')
    
plt.show()
_images/parte_8_84_0.png _images/parte_8_84_1.png

R Code 8.26

# ================
# Plot without interactions

# Prior
alpha_prior =  np.random.normal(0.5, 0.25, 20)
beta_w_prior =  np.random.normal(0, 0.25, 20)
beta_s_prior =  np.random.normal(0, 0.25, 20)

water_seq = np.linspace(-1, 1, 30)

fig = plt.figure(figsize=(17, 6))

gs = GridSpec(1, 3)

shade_cent_values = [-1, 0, 1]

ax = [None] * len(shade_cent_values)

for j, shade_plot_aux in enumerate(shade_cent_values):
    ax[j] = fig.add_subplot(gs[j])

    for i in range(len(alpha_prior)):
        mu = alpha_prior[i] + \
             beta_w_prior[i] * water_seq + \
             beta_s_prior[i] * (shade_plot_aux)

        ax[j].plot(water_seq, mu, c='gray')
        ax[j].set_ylim(0, 1)
        ax[j].set_title(f'8.4 prior: Shade = ${shade_plot_aux}$')
        ax[j].set_xlabel('water')
        ax[j].set_ylabel('blooms')
        ax[j].set_xticks(shade_cent_values, shade_cent_values)
        ax[j].set_yticks([0, 0.5, 1.0], [0, 0.5, 1])
        ax[j].set_ylim(-1, 2)
        ax[j].axhline(y=1, ls='--', color='red')
        ax[j].axhline(y=0, ls='--', color='red')

    ax[j].plot(
            df.loc[df['shade_cent'] == shade_plot_aux,'water_cent'].values, 
            df.loc[df['shade_cent'] == shade_plot_aux,'blooms_std'].values, 
            'o', c='blue')
    
plt.show()

# ================
# Plot with interactions

# Prior
alpha_prior =  np.random.normal(0.5, 0.25, 20)
beta_w_prior =  np.random.normal(0, 0.25, 20)
beta_s_prior =  np.random.normal(0, 0.25, 20)
beta_ws_prior =  np.random.normal(0, 0.25, 20)

water_seq = np.linspace(-1, 1, 30)

fig = plt.figure(figsize=(17, 6))

gs = GridSpec(1, 3)

shade_cent_values = [-1, 0, 1]

ax = [None] * len(shade_cent_values)

for j, shade_plot_aux in enumerate(shade_cent_values):
    ax[j] = fig.add_subplot(gs[j])

    for i in range(len(alpha_prior)):
        mu = alpha_prior[i] + \
             beta_w_prior[i] * water_seq + \
             beta_s_prior[i] * (shade_plot_aux) + \
             beta_ws_prior[i] * water_seq * (shade_plot_aux)

        ax[j].plot(water_seq, mu, c='gray')
        ax[j].set_ylim(0, 1)
        ax[j].set_title(f'8.5 post: Shade = ${shade_plot_aux}$')
        ax[j].set_xlabel('water')
        ax[j].set_ylabel('blooms')
        ax[j].set_xticks(shade_cent_values, shade_cent_values)
        ax[j].set_yticks([0, 0.5, 1.0], [0, 0.5, 1])
        ax[j].set_ylim(-1, 2)
        ax[j].axhline(y=1, ls='--', color='red')
        ax[j].axhline(y=0, ls='--', color='red')

    ax[j].plot(
            df.loc[df['shade_cent'] == shade_plot_aux,'water_cent'].values, 
            df.loc[df['shade_cent'] == shade_plot_aux,'blooms_std'].values, 
            'o', c='blue')
    
plt.show()
_images/parte_8_86_0.png _images/parte_8_86_1.png