In [2]:
import os
# Change native directory to root
os.chdir(os.path.dirname(os.getcwd()))

In [3]:
import pandas as pd
import numpy as np
import lightgbm as lgb

features = ['Month', 'Hour', 'hour_x', 'hour_y', 'month_x', 'month_y',
'net_target-1', 'diffuse_solar_radiation+1', 'relative_humidity+1', 'drybulb_temp+1']
target = 'net_target'
qts = np.concatenate([[0.001],np.arange(0.05,0.951,0.05),[0.999]])

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


In [5]:
data_train = pd.read_csv('data/extra_train.csv', index_col=0)
data_test = pd.read_csv('data/extra_test.csv', index_col=0)
data_train

Unnamed: 0,Month,Hour,Day Type,cons_target-1,Solar Generation [W/kW],Outdoor Drybulb Temperature [C],Relative Humidity [%],Diffuse Solar Radiation [W/m2],Direct Solar Radiation [W/m2],building,...,net_target-1_min_lag3,net_target-1_std_lag3,cons_target,gen_target,cons_target-2,gen_target-2,diffuse_solar_radiation+1,drybulb_temp+1,relative_humidity+1,net_target-23
2,8.0,0.0,2.0,0.116313,0.0,19.4,87.0,0.0,0.0,2,...,0.3982,0.119400,0.106422,0.000000,0.170776,0.000000,0.000000,19.4,87.0,0.382273
3,8.0,1.0,2.0,0.106422,0.0,19.4,87.0,0.0,0.0,2,...,0.3884,0.023770,0.109127,0.000000,0.116313,0.000000,0.000000,19.4,90.0,0.437649
4,8.0,2.0,2.0,0.109127,0.0,19.4,90.0,0.0,0.0,2,...,0.3880,0.003506,0.107621,0.000000,0.106422,0.000000,0.000000,18.9,90.0,0.418428
5,8.0,3.0,2.0,0.107621,0.0,18.9,90.0,0.0,0.0,2,...,0.3877,0.000930,0.120207,0.000000,0.109127,0.000000,0.000000,18.3,93.0,0.466455
6,8.0,4.0,2.0,0.120207,0.0,18.3,93.0,0.0,0.0,2,...,0.3887,0.004715,0.156445,14.095833,0.107621,0.000000,2.772589,18.9,90.0,0.341153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43653,7.0,19.0,7.0,0.092567,0.0,18.3,90.0,0.0,0.0,1,...,0.4750,0.217000,0.144716,0.000000,0.703661,2.858333,0.000000,18.9,87.0,0.525154
43654,7.0,20.0,7.0,0.144716,0.0,18.9,87.0,0.0,0.0,1,...,0.4410,0.209100,0.335914,0.000000,0.092567,0.000000,0.000000,19.4,84.0,0.596310
43655,7.0,21.0,7.0,0.335914,0.0,19.4,84.0,0.0,0.0,1,...,0.4410,0.079400,0.223369,0.000000,0.144716,0.000000,0.000000,18.9,90.0,0.457615
43656,7.0,22.0,7.0,0.223369,0.0,18.9,90.0,0.0,0.0,1,...,0.4749,0.059540,0.272052,0.000000,0.335914,0.000000,0.000000,20.5,76.0,0.453849


In [6]:
# run several lgb models for multiple quantiles
def run_lgb(data, datat, features, target, quantile, params, seed=42):
    # set seed
    np.random.seed(seed)
    # set params
    params['seed'] = seed
    params['metric'] = 'quantile'
    params['alpha'] = quantile
    # train test split
    train = data[data['day_year'] < 7000]
    valid = data[data['day_year'] >= 7000]
    # train
    x_train = train[features]
    y_train = train[target]
    x_valid = valid[features]
    y_valid = valid[target]
    lgb_train = lgb.Dataset(x_train, y_train)
    lgb_valid = lgb.Dataset(x_valid, y_valid)
    model = lgb.train(params, lgb_train, valid_sets=[lgb_train, lgb_valid], num_boost_round=1000, early_stopping_rounds=50, verbose_eval=100)
    # save model in the model folder, with the name of the quantile
    model.save_model('models/lgb_' + str(quantile.round(3)) + '.txt')
    # predict
    x_test = datat[features]
    y_pred = model.predict(data[features], num_iteration=model.best_iteration)
    y_pred_test = model.predict(x_test, num_iteration=model.best_iteration)
    return y_pred, y_pred_test

# produce 5 quantile forecasts with uniform weights and output the dataframe with 5 columns
def run_lgb_ensemble(data, datat, features, target, params, seed=42):
    y_preds = []
    y_preds_test = []
    for quantile in qts:
        y_pred, y_pred_test = run_lgb(data, datat, features, target, quantile, params, seed)
        y_preds.append(y_pred)
        y_preds_test.append(y_pred_test)
    y_preds = np.array(y_preds).T
    y_preds_test = np.array(y_preds_test).T
    y_preds = pd.DataFrame(y_preds, columns=qts, index=data.timestamp)
    y_preds_test = pd.DataFrame(y_preds_test, columns=qts, index=datat.timestamp)
    return y_preds, y_preds_test

In [7]:
params = {
    'objective': 'quantile',
    'boosting': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'bagging_fraction': 0.7,
    'bagging_freq': 1,
    'feature_fraction': 0.7,
    'verbose': -1
}
output, output_test = run_lgb_ensemble(data_train, data_test, features, target, params, seed=42)

Training until validation scores don't improve for 50 rounds
[100]	training's quantile: 0.00019888	valid_1's quantile: 0.000265543
Early stopping, best iteration is:
[128]	training's quantile: 0.000180011	valid_1's quantile: 0.000260544
Training until validation scores don't improve for 50 rounds
[100]	training's quantile: 0.00559065	valid_1's quantile: 0.00751702
Early stopping, best iteration is:
[119]	training's quantile: 0.00543929	valid_1's quantile: 0.00748835
Training until validation scores don't improve for 50 rounds
[100]	training's quantile: 0.00949362	valid_1's quantile: 0.0127763
[200]	training's quantile: 0.00885746	valid_1's quantile: 0.0126423
Early stopping, best iteration is:
[186]	training's quantile: 0.00889786	valid_1's quantile: 0.0126362
Training until validation scores don't improve for 50 rounds
[100]	training's quantile: 0.0124695	valid_1's quantile: 0.0170324
[200]	training's quantile: 0.0118041	valid_1's quantile: 0.0169228
[300]	training's quantile: 0.01149

In [8]:
def post_process(data):
    # sort column names
    data = data[sorted(data.columns)]
    data.columns = np.round(data.columns, 3)
    data.index = pd.to_datetime(data.index)
    data['hour'] = data.index.hour
    return data

output = post_process(output)
output_test = post_process(output_test)

In [9]:
output.to_csv('./data/quantile/year_qs.csv', index=True)
output_test.to_csv('./data/quantile/year_qs_test.csv', index=True)

In [19]:
output_test

Unnamed: 0_level_0,0.001,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,...,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.999,hour
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-08-01 22:00:00,0.320683,0.368781,0.381283,0.397807,0.416420,0.411705,0.416610,0.416430,0.427399,0.440339,...,0.477432,0.496801,0.499934,0.520188,0.544241,0.578775,0.608012,0.655557,0.832597,22
2021-08-01 23:00:00,0.328759,0.369254,0.377965,0.391575,0.401106,0.402392,0.401573,0.404627,0.407656,0.416549,...,0.424501,0.431466,0.434378,0.444150,0.472103,0.490045,0.506456,0.568569,0.825682,23
2021-08-02 00:00:00,0.301606,0.366134,0.373308,0.383290,0.395295,0.398025,0.396642,0.399859,0.407817,0.414659,...,0.419391,0.419753,0.423620,0.432694,0.450319,0.448865,0.468456,0.506220,0.816416,0
2021-08-02 01:00:00,0.301606,0.364939,0.371483,0.379608,0.387677,0.395874,0.395273,0.399465,0.404344,0.411493,...,0.417078,0.418930,0.423436,0.431064,0.447701,0.442232,0.448732,0.502665,0.816416,1
2021-08-02 02:00:00,0.304875,0.365058,0.371539,0.379511,0.382475,0.394412,0.394633,0.399465,0.406140,0.411931,...,0.417256,0.420741,0.423319,0.431119,0.444874,0.440907,0.449360,0.502665,0.816416,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-31 17:00:00,0.267956,0.363932,0.388768,0.398471,0.407761,0.434331,0.435361,0.461604,0.471482,0.502968,...,0.541716,0.569130,0.580174,0.602705,0.622065,0.642781,0.659407,0.706190,0.849819,17
2022-07-31 18:00:00,0.288000,0.381748,0.389868,0.402354,0.401551,0.432020,0.443493,0.460860,0.473927,0.500870,...,0.551361,0.576017,0.596761,0.595950,0.634007,0.651974,0.654649,0.698798,0.847097,18
2022-07-31 19:00:00,0.305377,0.379938,0.390698,0.400881,0.402097,0.425121,0.431481,0.455304,0.462376,0.478621,...,0.519625,0.552346,0.570080,0.573213,0.627739,0.641396,0.670363,0.714076,0.841118,19
2022-07-31 20:00:00,0.304233,0.378396,0.387278,0.399609,0.401668,0.418799,0.439881,0.457029,0.461813,0.480400,...,0.530474,0.564428,0.581834,0.585303,0.637957,0.662953,0.666194,0.715289,0.841118,20
