10 - Grande Entropia e Modelos Lineares Generalizados¶

import numpy as np

from scipy import stats

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import pandas as pd

import networkx as nx
# from causalgraphicalmodels import CausalGraphicalModel

import arviz as az
# ArviZ ships with style sheets!
# https://python.arviz.org/en/stable/examples/styles.html#example-styles
az.style.use("arviz-darkgrid")

import xarray as xr

import stan
import nest_asyncio

plt.style.use('default')
plt.rcParams['axes.facecolor'] = 'lightgray'

# To DAG's
import daft
from causalgraphicalmodels import CausalGraphicalModel

# Add fonts to matplotlib to run xkcd

from matplotlib import font_manager

font_dirs = ["fonts/"]  # The path to the custom font file.
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)

# To make plots like drawing 
# plt.xkcd()

# To running the stan in jupyter notebook
nest_asyncio.apply()

R Code 10.1¶

buckets = {
    "A": (0, 0, 1, 0, 0),
    "B": (0, 1, 8, 1, 0),
    "C": (0, 2, 6, 2, 0),
    "D": (1, 2, 4, 2, 1),
    "E": (2, 2, 2, 2, 2),
}

df = pd.DataFrame.from_dict(buckets)
df

	A	B	C	D	E
0	0	0	0	1	2
1	0	1	2	2	2
2	1	8	6	4	2
3	0	1	2	2	2
4	0	0	0	1	2

R Code 10.2¶

# Normalize

p_norm = df / df.sum(axis=0)
p_norm

	A	B	C	D	E
0	0.0	0.0	0.0	0.1	0.2
1	0.0	0.1	0.2	0.2	0.2
2	1.0	0.8	0.6	0.4	0.2
3	0.0	0.1	0.2	0.2	0.2
4	0.0	0.0	0.0	0.1	0.2

R Code 10.3¶

def entropy(bucket):
    uncertainty = []
    
    for q in bucket:
        if q == 0:
            uncertainty.append(q)
        else:
            uncertainty.append(q * np.log(q))
    
    return (-1) * np.sum(uncertainty)

H = [entropy(p_norm[key]) for key in p_norm.keys()]

df_H = pd.DataFrame(H).T
df_H.columns = p_norm.keys()
df_H

	A	B	C	D	E
0	-0.0	0.639032	0.950271	1.470808	1.609438

R Code 10.4¶

ways = (1, 90, 1260, 37800, 113400)

logwayspp = np.log(ways) / 10

logwayspp

array([0.        , 0.44998097, 0.7138867 , 1.05400644, 1.16386767])

plt.figure(figsize=(16, 6))

plt.plot(logwayspp, df_H.T.values, '--', c='black')
plt.plot(logwayspp, df_H.T.values, 'o', ms=10)

plt.title('Entropy in Buckets')
plt.xlabel('log(ways) per pebble')
plt.ylabel('entropy')

plt.show()

_images/parte_10_14_0.png

R Code 10.5¶

# Build list of canditate distributions
p = [
    [1/4, 1/4, 1/4, 1/4],
    [2/6, 1/6, 1/6, 2/6],
    [1/6, 2/6, 2/6, 1/6],
    [1/8, 4/8, 2/8, 1/8],
]

# Compute the expected values of each
result = [np.sum(np.multiply(p_i, [0, 1, 1, 2])) for p_i in p]
result

[1.0, 1.0, 1.0, 1.0]

R Code 10.6¶

# Compute the entropy of each distribution

for p_i in p:
    print(-np.sum(p_i * np.log(p_i)))

3862943611198906
3296613488547582
3296613488547582
2130075659799042

R Code 10.7¶

p = 0.7

A = [
    (1-p)**2,
    p*(1-p),
    (1-p)*p,
    (p)**2,
]

np.round(A, 3)

array([0.09, 0.21, 0.21, 0.49])

R Code 10.8¶

- np.sum(A * np.log(A))

1.221728604109787

R Code 10.9¶

def sim_p(G=1.4):
    x = np.random.uniform(0, 1, size=4)
    x[3] = 0  # Removing the last random number x4
    
    x[3] = ( G * np.sum(x) - x[1] - x[2] ) / (2 - G)
    
    p = x / np.sum(x)
    
    return [-np.sum(p * np.log(p)), p]

R Code 10.10¶

H = pd.DataFrame([ sim_p(1.4) for _ in range(10000)], columns=('entropies', 'distributions'))

plt.figure(figsize=(17, 6))

plt.hist(H.entropies, density=True, rwidth=0.9)

plt.title('Entropy - Binomial')
plt.xlabel('Entropy')
plt.ylabel('Density')

plt.show()

_images/parte_10_26_0.png

R Code 10.11¶

# entropies = H.entropies
# distributions = H.distributions

R Code 10.12¶

H.entropies.max()

1.2217257418082403

R Code 10.13¶

H.loc[H.entropies == H.entropies.max(), 'distributions'].values

array([array([0.09039858, 0.21007322, 0.20912962, 0.49039858])],
      dtype=object)

previous

9 - Markov Chain Monte Carlo

next

11 - A cravada de Deus e Inteiros