In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow_probability.substrates import numpy as tfp

In [2]:
from cmdstanpy import install_cmdstan
install_cmdstan()

CmdStan install directory: C:\Users\39339\.cmdstan
CmdStan version 2.33.1 already installed
Test model compilation


True

In [3]:
from cmdstanpy import CmdStanModel, set_cmdstan_path
import arviz as az

In [68]:
X = pd.read_csv("../Datasets/covariates.csv")

In [69]:
Y = pd.read_csv("../Datasets/Dataset_120.csv")
Y['Count_120'] = pd.to_numeric(Y['Count_120']).astype('Int64')

In [70]:
X = pd.get_dummies(X ,columns = ['Type'] ,prefix = 'type', drop_first = True)

In [71]:
X.loc[X['Station'] == 10171, 'Quota'] = 10

In [91]:
rows_to_drop_Y = Y[Y['Count_120'].isna()].index

# Remove corresponding rows from dataset X
X = X.drop(index=rows_to_drop_Y)
Y = Y.drop(index=rows_to_drop_Y)

# If you want to reset the index after dropping rows
X = X.reset_index(drop=True)
Y = Y.reset_index(drop=True)

In [106]:
prior_elic = """

data {
  int<lower=1> N; // Number of observations
  int<lower=1> P; // Covariate number
  int<lower=1> nyears;
  int<lower=1> nstations;


  array[N] int<lower=1> station;
  array[N] int<lower=1> year;
  // array[N] int<lower=4, upper=10> month;

  array[N] int<lower=0> y; // Count data
  matrix[N, P] X; // Predictor matrix
}

parameters {
  vector[P] beta; // Coefficients for predictors
  vector[nstations] eta; // Random effects for comuni
  vector[nyears] xi; // Random effects for years

  //real<lower=0> sigma0; // Standard deviation for beta
  //real<lower=0> sigma1; // Standard deviation for xi
  //real<lower=0> sigma2; // Standard deviation for eta
}

transformed parameters {
    vector[N] lambda;
    vector[N] intercept;
    vector[N] fix_eff;

    intercept = xi[year] + eta[station];
    fix_eff = X * beta;

    lambda = exp(intercept + fix_eff);
}

model {

  beta ~ normal(0, 2);
  xi ~ normal(0, 2);
  eta ~ normal(0, 2);
  y[1:N] ~ poisson(lambda[1:N]);


}

"""

stan_file = "./priors.stan"

with open(stan_file, "w") as f:
    print(prior_elic, file=f)

priors = CmdStanModel(stan_file=stan_file)

15:02:07 - cmdstanpy - INFO - compiling stan file C:\Users\39339\OneDrive\Desktop\ISEMES~1\BAYESI~2\main\Modello\priors.stan to exe file C:\Users\39339\OneDrive\Desktop\I semestre\Bayesian_Project\main\Modello\priors.exe
15:03:04 - cmdstanpy - INFO - compiled model executable: C:\Users\39339\OneDrive\Desktop\I semestre\Bayesian_Project\main\Modello\priors.exe


In [94]:
stations = X['Station']
years = X['Year'] - 2009
unique_values = sorted(set(stations))
mapping_dict = {value: index + 1 for index, value in enumerate(unique_values)}
transformed_stations = [mapping_dict[value] for value in stations]
stations = transformed_stations

In [110]:
X = X.drop(['Year','Station','Month'], axis  = 1)

In [124]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_std = scaler.fit_transform(X)
X_std = pd.DataFrame(X_std, columns=X.columns)

In [127]:
data = {
    "N": len(Y),
    "P": X_std.shape[1],
    "nyears": 13,
    "nstations": 45,
    "station": stations,
    "year": years,
    "y": Y['Count_120'],
    "X": X_std
}


fit = priors.sample(data=data, chains=4, parallel_chains=4, 
    iter_warmup=1000, iter_sampling=1000)
prior_az = az.from_cmdstanpy(fit)

15:11:39 - cmdstanpy - INFO - CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

15:30:53 - cmdstanpy - INFO - CmdStan done processing.





	Chain 1 had 468 iterations at max treedepth (46.8%)
	Chain 2 had 292 iterations at max treedepth (29.2%)
	Chain 3 had 234 iterations at max treedepth (23.4%)
	Chain 4 had 379 iterations at max treedepth (37.9%)
	Use the "diagnose()" method on the CmdStanMCMC object to see further information.


In [None]:
az.plot_trace(prior_az, compact=False)
plt.tight_layout()
plt.show()