In [36]:
import pandas as pd
import sys
import os
from pathlib import Path
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae
sys.path.append(str(Path("..").resolve()))
from src.data.feature_engineering import date_features, lags, partial
from src.data.data_preprocessing import prepare_df
from utilsforecast.feature_engineering import pipeline, fourier
from functools import partial
from src.data.feature_engineering import  exg_features
from fine_tuning.models import xgboost_space
from fine_tuning.features import feature_space
from mlforecast.auto import AutoMLForecast
from mlforecast.auto import AutoModel


## 1️⃣ Data preparation

In [87]:
# -------------------------------
# 1.1 Data Prepartion for Nixtla
# -------------------------------


raw_path =r'C:\Users\Guest\Desktop\ds-projects\Energy-Forecasting\data\raw\PJME_hourly.csv'


df= pd.read_csv(raw_path)  # raw CSV  
df = prepare_df(df)                      # clean, rename, add unique_id
df.head()

Unnamed: 0,ds,y,unique_id
8734,2002-01-01 01:00:00,30393.0,A
8735,2002-01-01 02:00:00,29265.0,A
8736,2002-01-01 03:00:00,28357.0,A
8737,2002-01-01 04:00:00,27899.0,A
8738,2002-01-01 05:00:00,28057.0,A


In [69]:
# -------------------------------
# 1.2 Split into train and test
# -------------------------------
split_date = '2018-08-02'

train = df[df['ds'] < split_date ]
test  = df[df['ds'] >= split_date ]

print(f'Train shape: {train.shape}, Test shape: {test.shape}')

Train shape: (145341, 3), Test shape: (25, 3)


In [73]:
train =df[:-10]
test  =df [-10:]


(10, 3)

In [None]:
train 

## Feature Engineering 

In [97]:
# -------------------------------
# 2. Apply exogenous features
# -------------------------------
# exg_features() returns list of feature functions (Fourier, etc.)




exg_df, future_df = pipeline(train , freq='h', h=len(test), features=exg_features())
print(f'Exogenous df shape: {exg_df.shape}')


Exogenous df shape: (145356, 7)


In [98]:
exg_df.tail()

Unnamed: 0,ds,y,unique_id,sin1_24,sin2_24,cos1_24,cos2_24
140240,2018-08-02 10:00:00,39902.0,A,0.865129,-0.867811,-0.50155,-0.496895
140241,2018-08-02 11:00:00,42189.0,A,0.705896,-0.999994,-0.708315,0.00342
140242,2018-08-02 12:00:00,43954.0,A,0.498588,-0.864391,-0.866839,0.50282
140243,2018-08-02 13:00:00,45372.0,A,0.257323,-0.497315,-0.966326,0.86757
140244,2018-08-02 14:00:00,46534.0,A,-0.001468,0.002937,-0.999999,0.999996


In [79]:
future_df

Unnamed: 0,unique_id,ds,sin1_24,sin2_24,cos1_24,cos2_24
0,A,2018-08-02 15:00:00,-0.260159,0.502402,-0.965566,0.864634
1,A,2018-08-02 16:00:00,-0.501131,0.867329,-0.865371,0.497734
2,A,2018-08-02 17:00:00,-0.707973,0.999997,-0.706239,-0.002453
3,A,2018-08-02 18:00:00,-0.866598,0.864877,-0.499007,-0.501983
4,A,2018-08-02 19:00:00,-0.966201,0.498154,-0.25779,-0.867089
5,A,2018-08-02 20:00:00,-1.0,-0.001969,0.000984,-0.999998
6,A,2018-08-02 21:00:00,-0.965692,-0.501565,0.259692,-0.86512
7,A,2018-08-02 22:00:00,-0.865614,-0.866847,0.500713,-0.498573
8,A,2018-08-02 23:00:00,-0.706582,-0.999999,0.707632,0.001485
9,A,2018-08-03 00:00:00,-0.499427,-0.865363,0.866356,0.501146


In [80]:
test

Unnamed: 0,ds,y,unique_id
140245,2018-08-02 15:00:00,47154.0,A
140246,2018-08-02 16:00:00,46989.0,A
140247,2018-08-02 17:00:00,46816.0,A
140248,2018-08-02 18:00:00,46760.0,A
140249,2018-08-02 19:00:00,45641.0,A
140250,2018-08-02 20:00:00,44057.0,A
140251,2018-08-02 21:00:00,43256.0,A
140252,2018-08-02 22:00:00,41552.0,A
140253,2018-08-02 23:00:00,38500.0,A
140254,2018-08-03 00:00:00,35486.0,A


In [60]:
len(test)

97

## 2️⃣ Models Training

In [128]:
# ===============================
# 
# ===============================


# Configure AutoMLForecast with both model and feature tuning


auto_mlf = AutoMLForecast(
                models={'custom_xgb': AutoModel(model=xgb.XGBRegressor(), config=xgboost_space)}, 
                freq='h',                                             
                init_config=feature_space,    
                fit_config=fit_config
                
                
                
            )

In [129]:
auto_mlf.fit(df=exg_df , 
                     n_windows= 2,
                     h= len(test), 
                     num_samples= 10,
                     
 )
#print(f'cross validation results', cv_df.head())
#print(f'✅ Models trained successfully!')


[I 2026-01-10 19:33:43,261] A new study created in memory with name: no-name-6097c462-fb91-406e-a98d-7c0fe89c1a98


[I 2026-01-10 19:33:57,137] Trial 0 finished with value: 0.009040503731813221 and parameters: {'learning_rate': 0.0646642271741456, 'n_estimators': 829, 'extra_lag': 0}. Best is trial 0 with value: 0.009040503731813221.
[I 2026-01-10 19:34:08,304] Trial 1 finished with value: 0.009428449812890314 and parameters: {'learning_rate': 0.08996299860535256, 'n_estimators': 662, 'extra_lag': 100}. Best is trial 0 with value: 0.009040503731813221.
[I 2026-01-10 19:34:19,364] Trial 2 finished with value: 0.009676136729605569 and parameters: {'learning_rate': 0.1477317633429356, 'n_estimators': 717, 'extra_lag': 100}. Best is trial 0 with value: 0.009040503731813221.
[I 2026-01-10 19:34:27,719] Trial 3 finished with value: 0.008485520683399205 and parameters: {'learning_rate': 0.013449326615130807, 'n_estimators': 412, 'extra_lag': 1200}. Best is trial 3 with value: 0.008485520683399205.
[I 2026-01-10 19:34:41,294] Trial 4 finished with value: 0.008707329833682527 and parameters: {'learning_rate'

AutoMLForecast(models={'custom_xgb': AutoModel(model=XGBRegressor)})

In [130]:
df_pred = auto_mlf.predict(h=len(future_df), 
                           X_df=future_df,
                           )
df_pred.head()


Unnamed: 0,unique_id,ds,custom_xgb
0,A,2018-08-02 15:00:00,47391.773438
1,A,2018-08-02 16:00:00,48220.835938
2,A,2018-08-02 17:00:00,48526.132812
3,A,2018-08-02 18:00:00,47927.351562
4,A,2018-08-02 19:00:00,46944.585938


In [131]:
auto_mlf.results_['custom_xgb'].best_params

{'learning_rate': 0.03396540216391729, 'n_estimators': 662, 'extra_lag': 0}

## 4️⃣ Evaluation and plotting

In [132]:
from utilsforecast.evaluation import evaluate


merg_df = pd.merge(df_pred,test, on =['unique_id', 'ds'],how='left')
eval_df = evaluate(merg_df,metrics= [mae])
eval_df.head()

Unnamed: 0,unique_id,metric,custom_xgb
0,A,mae,1007.557812


In [None]:
merg_df

In [133]:
# 3.2 plotting

from utilsforecast.plotting import plot_series


plot_series(df=exg_df, 
                forecasts_df= cv_df.drop(columns=['cutoff', 'y']),
                max_insample_length=500,
                palette='viridis', 
                models=['xgb']
        )

NameError: name 'cv_df' is not defined

In [None]:
import os
os.getcwd()

'c:\\Users\\Guest\\Desktop\\ds-projects\\Energy-Forecasting\\notbooks'