In [7]:
import pymc as pm
import arviz as az
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc as pm
import xarray as xr
import pytensor.tensor as at
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error


sys.path.append('../../')
from util.preprocessing_utils import standardize_column
from util.dataframe_utils import analyse_columns

In [8]:
df = pd.read_csv('../../data/processed/ProcessedTweets.csv')

In [9]:
with pm.Model() as model:
    alpha = pm.Normal('Intercept', mu=0, sigma=10)
    airline_coeff = pm.Normal('airline_coeff', mu=0, sigma=10, shape=len(df['airline'].unique()))
    conf_coeff = pm.Normal('conf_coeff', mu=0, sigma=10)
    sine_coeff = pm.Normal('sine_coeff', mu=0, sigma=10)
    cosine_coeff = pm.Normal('cosine_coeff', mu=0, sigma=10)
    sent_coeff = pm.Normal('sent_coeff', mu=0, sigma=10, shape=len(df['airline_sentiment'].unique()))
    sigma = pm.HalfNormal('sigma', sigma=10)

    airline_index = df['airline'].astype(int)
    sentiment_index = df['airline_sentiment'].astype(int)

    mu = (alpha + 
          airline_coeff[airline_index] +
          conf_coeff * df['airline_sentiment_confidence'] +
          sine_coeff * df['hour_sin'] +
          cosine_coeff * df['hour_cos']
    )

    sentiment_obs = pm.Normal('sentiment_obs', mu=mu, sigma=sigma, observed=df['airline_sentiment'])

    nuts_sampler_kwargs = {'target_accept': 0.95, 'max_treedepth': 20}
    trace = pm.sample(1000, tune=1000, nuts_sampler_kwargs=nuts_sampler_kwargs, return_inferencedata=True)

az.summary(trace)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Intercept, airline_coeff, conf_coeff, sine_coeff, cosine_coeff, sent_coeff, sigma]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1157 seconds.
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
Chain 0 reached the maximum tree depth. Increase `max_treedepth`, increase `target_accept` or reparameterize.
Chain 1 reached the maximum tree depth. Increase `max_treedepth`, increase `target_accept` or reparameterize.
Chain 2 reached the maximum tree depth. Increase `max_treedepth`, increase `target_accept` or reparameterize.
Chain 3 reached the maximum tree depth. Increase `max_treedepth`, increase `target_accept` or reparameterize.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
Intercept,0.575,3.778,-6.803,7.838,0.245,0.173,241.0,427.0,1.01
airline_coeff[0],-0.655,3.778,-7.883,6.731,0.245,0.173,241.0,436.0,1.01
airline_coeff[1],-0.21,3.778,-7.538,7.094,0.245,0.173,241.0,434.0,1.01
airline_coeff[2],-0.474,3.778,-7.766,6.883,0.245,0.173,241.0,438.0,1.01
airline_coeff[3],-0.566,3.778,-7.81,6.81,0.245,0.173,241.0,445.0,1.01
airline_coeff[4],-0.094,3.778,-7.394,7.255,0.245,0.173,241.0,445.0,1.01
airline_coeff[5],-0.194,3.778,-7.448,7.191,0.245,0.173,241.0,445.0,1.01
conf_coeff,1.275,0.038,1.203,1.344,0.001,0.0,3095.0,2847.0,1.0
sine_coeff,-0.019,0.009,-0.036,-0.003,0.0,0.0,2361.0,2045.0,1.0
cosine_coeff,0.038,0.01,0.018,0.057,0.0,0.0,2371.0,2617.0,1.0
