In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow_probability.substrates import numpy as tfp

In [None]:
!pip install cmdstanpy
!pip install git+https://github.com/OriolAbril/arviz.git@ci

In [None]:
from cmdstanpy import install_cmdstan
install_cmdstan()

In [None]:
from cmdstanpy import CmdStanModel, set_cmdstan_path
import arviz as az

In [None]:
X = pd.read_csv("X.csv").sort_values(by=['Station','Year','Month'])

In [None]:
stations = X['Station']
years = X['Year']

In [None]:
X['Type'] = X['Type'].astype('category')

In [None]:
X = X.drop(['Year','Station'], axis  = 1)
X.head()

In [None]:
X = X.drop('Type', axis = 1)

In [None]:
Y = pd.read_csv("Dataset_120.csv").sort_values(by=['idSensore','Year','Month'])
Y.head()

In [None]:
prior_elic = """

data {
  int<lower=1> N; // Number of observations
  int<lower=1> P; // Covariate number (should be around 10)
  int<lower=1> nyears;
  int<lower=1> nstations;


  array[N] int<lower=1> station;
  array[N] int<lower=2010, upper=2022> year;
  // array[N] int<lower=4, upper=10> month;

  array[N] int<lower=0> y; // Count data
  matrix[N, P] X; // Predictor matrix
}

parameters {
  vector[P] beta; // Coefficients for predictors
  vector[nstations] eta; // Random effects for comuni
  vector[nyears] xi; // Random effects for years

  real<lower=0> sigma0; // Standard deviation for beta
  real<lower=0> sigma1; // Standard deviation for xi
  real<lower=0> sigma2; // Standard deviation for eta
}

transformed parameters {
    vector[N] lambda;
    vector[N] intercept;
    vector[N] fix_eff;

    intercept = xi[year] + eta[station];
    fix_eff = X * beta;

    lambda = exp(intercept + fix_eff);
}

model {

  beta ~ normal(0, 10);
  xi ~ normal(0, 10);
  eta ~ normal(0, 10);

  for (n in 1:N) {
    y[n] ~ poisson(lambda[n]);
  }

  for (j in 1:P) {
    beta[j] ~ normal(0.0, 10);
  }

  for (j in 1:nyears) {
    xi[j] ~ normal(0.0, 10);
  }

  for (j in 1:nstations){
     eta[j] ~ normal(0.0, 10);
  }
}

"""

stan_file = "./priors.stan"

with open(stan_file, "w") as f:
    print(prior_elic, file=f)

priors = CmdStanModel(stan_file=stan_file)

In [None]:
ids_to_remove = [17288, 17295, 17297, 20041, 20154, 30165]

# Remove rows with specified ids
Y = Y[~Y['idSensore'].isin(ids_to_remove)]

In [None]:
Y['Count_120'] = pd.to_numeric(Y['Count_120']).astype('Int64')

In [None]:
data = {
    "N": len(Y),
    "P": X.shape[1],
    "nyears": 13,
    "nstations": 45,
    "station": stations,
    "year": years,
    "y": Y['Count_120'],
    "X": X
}


fit = priors.sample(data,chains = 1)
prior_az = az.from_cmdstanpy(fit)

DEBUG:cmdstanpy:cmd: /content/priors info
cwd: None
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxd3lsxpj/h0b37jcx.json


RecursionError: ignored