In [1]:
# Directories
import os

new_directory = r'c://Users//Fer//TESIS_ARCHIVOS//TESIS_AIRE//MP_Forecasting//mp_forecasting//notebooks'
os.chdir(new_directory)

# Data Manipulation
import pandas as pd # for data manipulation
import numpy as np # for data manipulation

# Training utils for SVR
from training_code.utils import utils_svr

# Optuna
import optuna
import joblib
import pickle

# Tiempo
import datetime as dt
from dateutil.relativedelta import relativedelta, MO

# Modelos
from sklearn.linear_model import LinearRegression # for building a linear regression model
from sklearn.svm import SVR # for building SVR model
from sklearn.preprocessing import MinMaxScaler
from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb
from sklearn.model_selection import train_test_split

# Metricas
from sklearn.metrics import mean_absolute_error #MAE
from sklearn.metrics import mean_absolute_percentage_error #MAPE
from sklearn.metrics import mean_squared_error #MSE, para RMSE: squared = False

# Visualizations
import plotly.graph_objects as go # for data visualization
import plotly.express as px # for data visualization
import matplotlib.pyplot as plt

# Advertencias
import warnings
warnings.filterwarnings("ignore")

  from pandas import MultiIndex, Int64Index


In [2]:
validacion = pd.read_csv('datos/230127_test_ESTACIONES.csv', parse_dates=['FECHAHORA'])

In [3]:
study = joblib.load("optuna_studies/SVR/study_SVR_e4_MP2.5_correcto_7_days.pkl")

params = study.best_params

variables = ["ANHO", 'DIA', 'MES', 'HORA', 'MINUTO', 'MP1', 'MP2_5', 'MP10', 
                'TEMPERATURA', 'HUMEDAD', 'PRESION', 'TEMPERATURA_PRONOSTICO', 
                'HUMEDAD_PRONOSTICO', 'PRESION_PRONOSTICO', 'DIA_SEM', 'TRAFICO' , 'AQI_MP10', 'AQI_MP2_5']

dependent = ['AQI_MP2_5']

number_of_features = len(variables)

training_days = 7 
forecast_days = 1 
samples_per_day = 288
step = 288/4

input_samples = int(samples_per_day * training_days) # cantidad de muestras en 7 dias
output_samples = int(samples_per_day * forecast_days) # cantidad de muestras en 1 dia
train_test_samples = int(input_samples + output_samples) # cantidad de datos para el train_test

In [9]:
%%time

predicciones = {}
metricas = {}

for i in range(1, 11):

    estacion = i

    X_val, y_val = utils_svr.get_validation(validacion, 
                                            estacion, 
                                            variables, 
                                            dependent, 
                                            train_test_samples, 
                                            input_samples, 
                                            output_samples, 
                                            number_of_features,
                                            step)


    trained_svr_model = joblib.load('models/models_svr/7_dias/svr_7dias_estacion_'+str(i)+'.pkl')

    # trained_svr_model = MultiOutputRegressor(svr_model).fit(X_train , y_train)

    prediction = trained_svr_model.predict(X_val)
    
    # guardamos los valores predecidos vs reales en un diccionario
    
    predicciones[i] = {'real' : y_val, 'prediccion': prediction}

    df_predicciones = pd.DataFrame.from_dict(predicciones)

    df_predicciones.to_csv('predicciones_10estaciones_'+str(training_days)+'dias.csv')

    # guardamos los modelos

    #pickle.dump(trained_svr_model, open('models/models_svr/'+str(training_days)+'_dias/svr_'+str(training_days)+'dias_estacion_'+str(i) + '.pkl', 'wb'))

    mean_real = y_val.mean()
    mean_prediction = prediction.mean()

    MAPE = mean_absolute_percentage_error(prediction, y_val)
    MAE = mean_absolute_error(prediction, y_val)
    RMSE = mean_squared_error(prediction, y_val, squared = False)
    
    # guardamos las metricas en un diccionario
    
    metricas[i] = {'MAE': MAE, "MAPE": MAPE, 'RMSE': RMSE, 'Media real' : mean_real, 'Media predecida': mean_prediction}

    df_metricas = pd.DataFrame.from_dict(metricas)

    # guardamos las metricas en un csv
    df_metricas.to_csv('metrics/SVR/metricas_10estaciones_'+str(training_days)+'dias.csv')
    
    print('ESTACION '+ str(i) + ':')
    print('prediction shape: ', prediction.shape)
    print('test shape: ', y_val.shape)
    print('MAE :', MAE)
    print('MAPE: ', MAPE)
    print('RMSE: ', RMSE)
    print('\n')
    print('media real: ', mean_real)
    print('media predecida: ', mean_prediction)
    print('\n')

ESTACION 1:
prediction shape:  (337, 288)
test shape:  (337, 288)
MAE : 9.559312886531911
MAPE:  0.29749799940677935
RMSE:  13.285440214013969


media real:  37.44718513023409
media predecida:  38.5603562100277


ESTACION 2:
prediction shape:  (337, 288)
test shape:  (337, 288)
MAE : 8.155986562324658
MAPE:  0.26523779540795106
RMSE:  11.546929539305392


media real:  34.25979846686449
media predecida:  31.68108576713498


ESTACION 3:
prediction shape:  (337, 288)
test shape:  (337, 288)
MAE : 9.647774506558834
MAPE:  0.22632267181090335
RMSE:  14.300879295432335


media real:  46.554484009231786
media predecida:  43.70503990750268


ESTACION 4:
prediction shape:  (337, 288)
test shape:  (337, 288)
MAE : 10.064554921991503
MAPE:  0.35622336550496464
RMSE:  14.800891083091711


media real:  35.52617045829212
media predecida:  30.888299317168833


ESTACION 5:
prediction shape:  (337, 288)
test shape:  (337, 288)
MAE : 8.062395227646483
MAPE:  0.2777493753733374
RMSE:  12.449997083393997


In [20]:
prediction = predicciones[3]['prediccion']
y_val = predicciones[3]['real']

y_val.shape

(337, 288)

In [21]:
pred_1 = []

for i in range(3, len(prediction), 4):
    pred_1.append(prediction[i])
    
pred_1 = np.asarray(pred_1)
    
test_1 = []

for i in range(3, len(y_val), 4):
    test_1.append(y_val[i])
    
test_1 = np.asarray(test_1)

print(pred_1.shape)
print(test_1.shape)

y_pred = np.reshape(pred_1, ( len(pred_1) * len(pred_1[0])))
y_test_1 = np.reshape(test_1, (len(test_1) * len(test_1[0])))

print(y_pred)
print(y_test_1)

fig_val = go.Figure()

fig_val.add_trace(
    go.Scatter( y = list(y_test_1), name = 'Target'))

fig_val.add_trace(
     go.Scatter( y = list(y_pred), name = 'Forecasts'))

fig_val.update_layout( title_text = "Validation - Forecasts vs Targets")

(84, 288)
(84, 288)
[41.95597458 42.01900059 42.05805332 ... 37.15889558 37.35378187
 37.52108727]
[42. 42. 42. ... 33. 33. 33.]
