In [None]:
# Directories
import os

new_directory = r'c://Users//Fer//TESIS_ARCHIVOS//TESIS_AIRE//MP_Forecasting//mp_forecasting//notebooks'
os.chdir(new_directory)

# Data Manipulation
import pandas as pd # for data manipulation
import numpy as np # for data manipulation

# Guardar resultados

import pickle

# Tiempo
import datetime as dt
from dateutil.relativedelta import relativedelta, MO


# Visualizations
import plotly.graph_objects as go # for data visualization
import plotly.express as px # for data visualization
import matplotlib.pyplot as plt

# Advertencias
import warnings
warnings.filterwarnings("ignore")

In [None]:
datos = pd.read_csv('datos/230209_ESTACIONES.csv', parse_dates=['FECHAHORA'], index_col=[0])
datos.head()

In [None]:
for i in range(1,11):
    
    data = datos[datos['ESTACION'] == i]

    df = data[['FECHAHORA','AQI_MP2_5']]

    df.index = df['FECHAHORA']

    del df['FECHAHORA']

    aqi = df.rolling(1).mean()
    rolling_mean = df.rolling(288*30).mean()
    rolling_std = df.rolling(288*30).std()

    fig = go.Figure()

    fig.add_trace(
        go.Scatter( x = aqi.index, y = aqi.AQI_MP2_5,  name = 'AQI'))

    fig.add_trace(
        go.Scatter( x = aqi.index, y = rolling_mean.AQI_MP2_5,  name = 'Media'))

    fig.add_trace(
        go.Scatter( x = aqi.index, y = rolling_std.AQI_MP2_5, name = 'Desviacion estandar'))

    fig.update_layout( title_text = "Estacion" + str(i) + "AQI, media y desviacion estandar (mensual)")

    fig.write_html('graphs/TSA/30dias_mean_std_estacion'+str(i)+'.html')


In [None]:
from statsmodels.tsa.stattools import adfuller


In [None]:
adfuller_dfs = []
for i in range(1,11):
    data = datos[datos['ESTACION'] == i]

    df = data[['FECHAHORA','AQI_MP2_5']]

    df.index = df['FECHAHORA']

    del df['FECHAHORA']

    adft = adfuller(df,autolag="AIC")

    output_df = pd.DataFrame({"Valores":[i, adft[0],adft[1],adft[2],adft[3], adft[4]['1%'], adft[4]['5%'], adft[4]['10%']]  , "Metricas":["Estación","ADF","valor-p","No. de lags utilizado","Numero de observaciones", "valor critico (1%)", "valor crítico (5%)", "valor crítico (10%)"]})
    
    adfuller_dfs.append(output_df)

In [None]:
adfuller_dfs[2]

In [None]:
df_arreglada = []
for i in range(0,10):
    
    dfarr = adfuller_dfs[i].T
    dfarr.columns = dfarr.iloc[1]
    dfarr.drop('Metricas', axis = 0, inplace = True)
    df_arreglada.append(dfarr)

df_completo = pd.concat(df_arreglada)

df_completo.reset_index(drop = True)

df_completo

In [None]:
df_completo.to_csv('metrics/TSA/ADF_test.csv')

In [None]:
autocorrelaciones = {}

for i in range(1,11):

    data = datos[datos['ESTACION'] == i]

    df = data[['FECHAHORA','AQI_MP2_5']]

    df.index = df['FECHAHORA']

    del df['FECHAHORA']

    print('Estacion '+ str(i))
    print('\n')

    autocorrelation_lag_1h = df['AQI_MP2_5'].autocorr(lag=12)
    print("Hourly Lag: ", autocorrelation_lag_1h)

    autocorrelation_lag_6h = df['AQI_MP2_5'].autocorr(lag=72)
    print("6 hour Lag: ", autocorrelation_lag_6h)

    autocorrelation_lag_12h = df['AQI_MP2_5'].autocorr(lag=144)
    print("12 hour Lag: ", autocorrelation_lag_6h)

    autocorrelation_lag_d = df['AQI_MP2_5'].autocorr(lag=288)
    print("Daily Lag: ", autocorrelation_lag_d)

    autocorrelation_lag_w = df['AQI_MP2_5'].autocorr(lag=288*7)
    print("Weekly Lag: ", autocorrelation_lag_w)

    autocorrelation_lag1 = df['AQI_MP2_5'].autocorr(lag=288*30)
    print("One Month Lag: ", autocorrelation_lag1)

    autocorrelation_lag3 = df['AQI_MP2_5'].autocorr(lag=288*30*3)
    print("Three Month Lag: ", autocorrelation_lag3)

    autocorrelation_lag6 = df['AQI_MP2_5'].autocorr(lag=288*30*6)
    print("Six Month Lag: ", autocorrelation_lag6)

    autocorrelation_lag9 = df['AQI_MP2_5'].autocorr(lag=288*30*9)
    print("Nine Month Lag: ", autocorrelation_lag9)

    print('\n')

    autocorrelaciones[i] = {'ESTACION' : i, '1 hora' : autocorrelation_lag_1h, '6 horas' : autocorrelation_lag_6h, '12 horas' : autocorrelation_lag_12h, 
                            '1 dia' : autocorrelation_lag_d, '1 semana' : autocorrelation_lag_w, '1 mes' : autocorrelation_lag1, '3 meses' : autocorrelation_lag3,
                            '6 meses' : autocorrelation_lag6, '9 meses' : autocorrelation_lag9}

In [None]:
df_prueba = pd.DataFrame.from_dict(autocorrelaciones, orient='index')
df_prueba.to_csv('metrics/TSA/autocorrelaciones.csv')


In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import DecomposeResult, seasonal_decompose
from plotly.subplots import make_subplots

def plot_seasonal_decompose(result: DecomposeResult, title="Seasonal Decomposition"):
    return (
        make_subplots(
            rows=4,
            cols=1,
            subplot_titles=["Observed", "Trend", "Seasonal", "Residuals"],
        )
        .add_trace(
            go.Scatter(x=result.seasonal.index, y=result.observed, mode="lines"),
            row=1,
            col=1,
        )
        .add_trace(
            go.Scatter(x=result.trend.index, y=result.trend, mode="lines"),
            row=2,
            col=1,
        )
        .add_trace(
            go.Scatter(x=result.seasonal.index, y=result.seasonal, mode="lines"),
            row=3,
            col=1,
        )
        .add_trace(
            go.Scatter(x=result.resid.index, y=result.resid, mode="lines"),
            row=4,
            col=1,
        )
        .update_layout(
            height=900, title=title, margin=dict(t=100), title_x=0.5, showlegend=False
        )
    )


for i in range(1,11):
    data = datos[datos['ESTACION'] == i]

    df = data[['FECHAHORA','AQI_MP2_5']]

    df.index = df['FECHAHORA']

    del df['FECHAHORA']

    for j in [1, 7, 15, 30, 30*3, 30*6, 30*9]:

        decompose = seasonal_decompose(df['AQI_MP2_5'],model='additive', period=288*j)

        print(decompose.resid.shape)


        fig = plot_seasonal_decompose(decompose)

        fig.write_html('graphs/TSA/seasonal_decompose/'+str(j)+'dias_seasonal_decompose_estacion_'+str(i)+'.html')