This notebook purpose is to try out AutoGluon package open sourced by AWS. The project details is available in link below

https://autogluon.mxnet.io/


In [14]:
#!pip install --upgrade mxnet
# !pip install autogluon

In [15]:
import autogluon as ag

# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
from scipy import stats

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
plt.rcParams['font.size'] = 10

# Modelado y Forecasting
# ==============================================================================
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import  KNeighborsRegressor
from sklearn import linear_model
from sklearn.model_selection import KFold, cross_val_score, train_test_split

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.ar_model import AutoReg

# Calculo de metricas
# ==============================================================================
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error
from sklearn import metrics

# Configuración warnings
# ==============================================================================
import warnings
pd.options.mode.chained_assignment = None  # default='warn'
# warnings.filterwarnings('ignore')

In [16]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [17]:
folder = ""
parametro = 'temperatura'
output=parametro
inputs = [
          'fecha', 'temperatura',
          'ambiente', 'nivel'
          ]
usecols = inputs.copy()

datos = pd.read_csv(folder + "lagoon_hourly_filled" + ".csv",sep=',', usecols=usecols)

# Preparación del dato
# ==============================================================================
datos['fecha'] = pd.to_datetime(datos['fecha'], format='%d/%m/%Y %H:%M')
#datos = datos.set_index('fecha')
datos['time_stamp'] = pd.to_datetime(datos['fecha'], format='%d/%m/%Y %H:%M')
datos = datos.drop(columns=["fecha"])
#datos = datos.rename(columns={'x': 'y'})
#datos = datos.asfreq('H')
#datos = datos.sort_index()
datos["t-1"] = datos["temperatura"].shift(1)
datos = datos.dropna()
datos["item_id"] = "H1"

datos.info()

#datos = datos.iloc[254:]

#datos.head()
datos

In [18]:
#CON ESTO PUEDO CONTROLAR CUAL ES EL DATO FINAL Y DEJARLO EN TENDENCIA ASCENDENTE O DESCENDENTE
#datos = datos.iloc[:-10]
datos_originales = datos
#datos = datos.diff().dropna()

fig, ax = plt.subplots(figsize=(20, 8))
datos['temperatura'].plot(ax=ax, label='train')


In [19]:
entrenar = 420

In [20]:
train_data = TimeSeriesDataFrame.from_data_frame(
    datos.iloc[:-entrenar],
    id_column="item_id",
    timestamp_column="time_stamp"
)
train_data.head()

In [21]:
predictor = TimeSeriesPredictor(
    prediction_length=entrenar,
    path="autogluon-m4-hourly",
    target="temperatura",
    eval_metric="MASE",
)

predictor.fit(
    train_data,
    presets="best_quality",
    time_limit=600,
)

In [22]:
predictions = predictor.predict(train_data)
predictions.head()

In [23]:
import matplotlib.pyplot as plt

# TimeSeriesDataFrame can also be loaded directly from a file
test_data = TimeSeriesDataFrame.from_data_frame(
    datos,
    id_column="item_id",
    timestamp_column="time_stamp"
)

y_train = train_data.loc["H1"]["temperatura"].iloc[-200:]
y_test = test_data.loc["H1"]["temperatura"].iloc[-entrenar-1:]

y_pred = predictions.loc["H1"]["mean"]

plt.figure(figsize=(20, 8))

plt.plot(y_train, label="Past time series values")
plt.plot(y_pred, label="Mean forecast")
plt.plot(y_test, label="Future time series values")

plt.legend();


In [26]:
y_train

In [38]:
start_date = "2023-08-08 16:00"
end_date = "2023-09-13 00:00"
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

date_range

In [49]:
y_test

In [50]:
y_pred

In [39]:
y_train = train_data.loc["H1"]["temperatura"].iloc[1264:]
y_test = test_data.loc["H1"]["temperatura"].iloc[-entrenar-1:]

In [40]:
plt.rcdefaults()

In [51]:
# Plot predictions, actual values, and input data
date_range = y_train.index.union(y_test.index)

plt.figure(figsize=(15,7))

y_pred = predictions.loc["H1"]["mean"]

plt.plot(y_train, label="Datos entrenamiento", linestyle='--')
plt.plot(y_pred, label='Predicción')
plt.plot(y_test[1:], label='Original')

# Configuración de los ejes y títulos
plt.xlabel('Fecha', fontsize=14)
plt.ylabel('Temperatura (ºC)', fontsize=14)
plt.title(f'Experimento 5 - AutoGluon (420 horas)', fontsize=16)

# Quitar los bordes superior y derecho
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Ajustar el tamaño de la leyenda
plt.legend(fontsize=12)

# Ajustar el tamaño de las etiquetas de los ejes
ax.tick_params(axis='both', which='major', labelsize=12)

ax.set_xticks(date_range[::168])  # Mostrar ticks cada 168 horas (una semana)
ax.set_xticklabels(date_range[::168].strftime('%Y-%m-%d %H:%M'))

# Configurar la cuadrícula
ax.grid(True, which='both', linestyle='--', linewidth=0.7, color='grey', alpha = 0.5)

# Guardar el gráfico en una carpeta con buena resolución
plt.savefig(f"exp5.png", dpi=300, bbox_inches='tight')
plt.show()


In [52]:
# Calculate MSE
mse = mean_squared_error(y_test[1:], y_pred)

print(f"MSE: {mse}")

# Calculate MAE
mae = np.mean(np.abs(y_test[1:] - y_pred))
print(f"MAE: {mae}")

#Caclular R2
r2 = metrics.r2_score(y_test[1:], y_pred)
print(f"R2: {r2}")

In [24]:
# The test score is computed using the last
# prediction_length=48 timesteps of each time series in test_data
predictor.leaderboard(train_data, silent=True)