# WiDS

## Data Loading

__Importing the libraries__

In [1]:
from sklearn.preprocessing import (
    OneHotEncoder,
    StandardScaler,
    FunctionTransformer,
    PolynomialFeatures
)

from sklearn.impute import SimpleImputer
# from sklearn.impute import IterativeImputer

from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

from sklearn.feature_selection import RFECV

from sklearn.model_selection import (
    cross_val_score,
    cross_validate,
    train_test_split,
    RandomizedSearchCV
)

from sklearn.linear_model import Ridge, Lasso

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from catboost import CatBoostRegressor
from lightgbm.sklearn import LGBMRegressor
from xgboost import XGBRegressor

from sklearn.ensemble import StackingRegressor

from sklearn.metrics import make_scorer
# from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve

In [2]:
import numpy as np
import pandas as pd
import time
from torchvision import transforms, datasets, utils
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split, Dataset
from torch import Tensor
from torch.nn import Linear
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import MSELoss
import torch.utils.data as data
from torch import nn

In [3]:
# train_path = "../input/wids2022/train.csv"
# test_path = "../input/wids2022/test.csv"
train_path = "data/train.csv"
test_path = "data/test.csv"

In [4]:
class Dataprep(Dataset):

    def __init__(self, train_path, test_path, size=0.1, is_train=True):
        df = pd.read_csv(train_path)
        test_df = pd.read_csv(test_path)

        TARGET_COLUMN = "site_eui"

        self.is_train = is_train

        train_df, val_df = train_test_split(
            df,
            test_size=0.1,
            random_state=123
        )

        self.X_train, self.y_train = train_df.drop(columns=[TARGET_COLUMN]), train_df[TARGET_COLUMN]
        self.X_val, self.y_val = val_df.drop(columns=[TARGET_COLUMN]), val_df[TARGET_COLUMN]
        self.X_test = test_df

    def engineer_features(self):
        # Find Standard Deviation of min, max and avg temp among months
        min_temps = [
            "january_min_temp",
            "february_min_temp",
            "march_min_temp",
            "april_min_temp",
            "may_min_temp",
            "june_min_temp",
            "july_min_temp",
            "august_min_temp",
            "september_min_temp",
            "october_min_temp",
            "november_min_temp",
            "december_min_temp"
        ]

        max_temps = [
            "january_max_temp",
            "february_max_temp",
            "march_max_temp",
            "april_max_temp",
            "may_max_temp",
            "june_max_temp",
            "july_max_temp",
            "august_max_temp",
            "september_max_temp",
            "october_max_temp",
            "november_max_temp",
            "december_max_temp"
        ]

        avg_temps = [
            "january_avg_temp",
            "february_avg_temp",
            "march_avg_temp",
            "april_avg_temp",
            "may_avg_temp",
            "june_avg_temp",
            "july_avg_temp",
            "august_avg_temp",
            "september_avg_temp",
            "october_avg_temp",
            "november_avg_temp",
            "december_avg_temp",
        ]

        self.numeric_features = [
            "floor_area",
            "year_built",
            "energy_star_rating",
            "ELEVATION",
            "cooling_degree_days",
            "heating_degree_days",
            "precipitation_inches",
            "snowfall_inches",
            "snowdepth_inches",
            "avg_temp",
            "days_below_30F",
            "days_below_20F",
            "days_below_10F",
            "days_below_0F",
            "days_above_80F",
            "days_above_90F",
            "days_above_100F",
            "days_above_110F",
            "max_wind_speed",
            "days_with_fog",
            "building_age",
            "min_temp_std",
            "max_temp_std",
            "avg_temp_std",
            "0-10",
            "10-20",
            "20-30",
            "30-80",
            "80-90",
            "90-100",
            "100-110"
        ] + min_temps + max_temps + avg_temps

        self.categorical_features = [
            "Year_Factor",
            "State_Factor",
            "building_class",
            "facility_type",
            "direction_max_wind_speed",
            "direction_peak_wind_speed"
        ]

        self.drop_columns = [
            "id"
        ]

        for X in [self.X_train, self.X_val, self.X_test]:
            X["building_age"] = 2022 - X["year_built"]

        for X in [self.X_train, self.X_val, self.X_test]:
            X["min_temp_std"] = X[min_temps].T.std()
            X["max_temp_std"] = X[max_temps].T.std()
            X["avg_temp_std"] = X[avg_temps].T.std()

        days_above_below = [
            "days_below_30F",
            "days_below_20F",
            "days_below_10F",
            "days_below_0F",
            "days_above_80F",
            "days_above_90F",
            "days_above_100F",
            "days_above_110F",
        ]

        for X in [self.X_train, self.X_val, self.X_test]:
            X["0-10"] = X["days_below_10F"] - X["days_below_0F"]
            X["10-20"] = X["days_below_20F"] - X["days_below_10F"]
            X["20-30"] = X["days_below_30F"] - X["days_below_20F"]
            X["80-90"] = X["days_above_80F"] - X["days_above_90F"]
            X["90-100"] = X["days_above_90F"] - X["days_above_100F"]
            X["100-110"] = X["days_above_100F"] - X["days_above_110F"]
            X["30-80"] = (366 - X[days_above_below].sum(axis=1)).clip(lower=0)

#         residential = [
#             "Multifamily_Uncategorized",
#             "2to4_Unit_Building",
#             "5plus_Unit_Building",
#             "Religious_worship",
#             "Parking_Garage",
#             "Mixed_Use_Predominantly_Residential"
#         ]

#         industrial = [
#             "Warehouse_Nonrefrigerated",
#             "Warehouse_Distribution_or_Shipping_center",
#             "Warehouse_Selfstorage",
#             "Industrial",
#             "Warehouse_Uncategorized",
#             "Warehouse_Refrigerated",
#             "Laboratory",
#             "Data_Center"
#         ]

#         commercial = list(
#             set(self.X_train["facility_type"].value_counts().index) -
#             set(residential) - set(industrial)
#         )

#         temp = pd.concat([self.X_train["facility_type"], feature_set.y_train], axis=1)
#         convert_dic = dict(temp[["facility_type", "site_eui"]].groupby("facility_type")["site_eui"].mean().sort_values())

#         for X in [self.X_train, self.X_val, self.X_test]:
#             for facility_type in [
#                 "Food",
#                 "Education",
#                 "Health_Care",
#                 "Public_Assembly",
#                 "Public_Safety",
#                 "Lodging",
#                 "Warehouse",
#                 "Office",
#                 "Service",
#                 "Retail"
#             ]:
#                 X["facility_type"] = [
#                     facility_type if f_type.lower().startswith(facility_type.lower())
#                     else f_type
#                     for f_type in X["facility_type"]
#                 ]
# #             X.replace({"facility_type": convert_dic}, inplace=True)
# #         #     X["facility_type"] = [
# #         #             "commercial" if f_type in commercial
# #         #             else "residential" if f_type in residential
# #         #             else "industrial"
# #         #         for f_type in X["facility_type"]]


    def preprocess(self):
        pipe_numeric_feats = make_pipeline(
           SimpleImputer(strategy="mean"),
           StandardScaler()
        )
        pipe_cat_feats = make_pipeline(
            SimpleImputer(strategy="most_frequent"),
            OneHotEncoder(handle_unknown="ignore", sparse=False)
        )
        self.column_transformer = make_column_transformer(
            (pipe_numeric_feats, self.numeric_features),
            (pipe_cat_feats, self.categorical_features)
        )

        self.X_train_raw = self.X_train
        self.X_val_raw = self.X_val
        self.X_test_raw = self.X_test
        self.X_train = self.column_transformer.fit_transform(self.X_train)
        self.X_val = self.column_transformer.transform(self.X_val)
        self.X_test = self.column_transformer.transform(self.X_test)

        self.X_train_tensor = torch.tensor(self.X_train, dtype=torch.float32)
        self.y_train_tensor = torch.tensor(self.y_train.values, dtype=torch.float32)
        self.X_val_tensor = torch.tensor(self.X_val, dtype=torch.float32)
        self.y_val_tensor = torch.tensor(self.y_val.values, dtype=torch.float32)
        self.X_test_tensor = torch.tensor(self.X_test, dtype=torch.float32)

In [5]:
feature_set = Dataprep(train_path, test_path)
feature_set.engineer_features()
feature_set.preprocess()

In [6]:
############################################ Temp zone #################################################

# Apply imputation in all columns

# Tried: X_train["facility_type"].value_counts() -> Categorize in 3-4 categories
# energy level / floor area group by mean and variance

# Time series seasonal component in the monthly data -> Try feeding to RNNs
# Take three month windows to calculate average -> Repeat it to narrow 12 features down to 1 or try SVD

# use site_eui to order facility_type

In [7]:
########################################################################################################

In [8]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)


def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())


def mape(true, pred):
    return 100.0 * np.mean(np.abs((pred - true) / true))


def r2score_torch(predictions, target):
    target_mean = torch.mean(target)
    ss_tot = torch.sum((target - target_mean) ** 2)
    ss_res = torch.sum((target - predictions) ** 2)
    r2 = 1 - ss_res / ss_tot
    return r2


def mse_torch(predictions, targets):
    return torch.mean((predictions - targets) ** 2)


def rmse_torch(predictions, targets):
    return torch.sqrt(((predictions - targets) ** 2).mean())

## Sklearn Models

In [9]:
for dt in [
    feature_set.X_train,
    feature_set.X_val,
    feature_set.y_train,
    feature_set.y_val
]:
    print(dt.shape)

(68181, 160)
(7576, 160)
(68181,)
(7576,)


In [10]:
def cross_val_scores(model, X_train, y_train, X_val, y_val, return_train_score=False):

    model.fit(X_train, y_train)
    y_val_pred = model.predict(X_val)

    score_dict = {
        "r2_val": model.score(X_val, y_val),
        "mse_val": mse(y_val, y_val_pred),
        "rmse_val": rmse(y_val, y_val_pred),
        "mape_val": mape(y_val, y_val_pred)
    }

    if return_train_score:
        y_train_pred = model.predict(X_train)

        score_dict["r2_train"] = model.score(X_train, y_train)
        score_dict["mse_train"] = mse(y_train, y_train_pred)
        score_dict["rmse_train"] = rmse(y_train, y_train_pred)
        score_dict["mape_train"] = mape(y_train, y_train_pred)

    scores_result = pd.Series(score_dict)

    return model, scores_result

In [11]:
pipe_ridge = make_pipeline(feature_set.column_transformer, Ridge(max_iter=10000))
pipe_lasso = make_pipeline(feature_set.column_transformer, Lasso())
pipe_rf = make_pipeline(feature_set.column_transformer, RandomForestRegressor())
pipe_xgb = make_pipeline(feature_set.column_transformer, XGBRegressor(verbosity=0))#, eta=0.01, max_depth=7, n_estimators=1000))
pipe_lgbm = make_pipeline(feature_set.column_transformer, LGBMRegressor())
pipe_catboost = make_pipeline(feature_set.column_transformer, CatBoostRegressor(verbose=False))

In [61]:
results = {}

def train(models, results):
    for name, model in models.items():
        print(f"Start {name}!")
        start_time = time.time()
        _, results[name] = cross_val_scores(
            model,
            feature_set.X_train_raw,
            feature_set.y_train,
            feature_set.X_val_raw,
            feature_set.y_val,
            return_train_score=True
        )

        print(f"Done {name} in {round(time.time() - start_time)} secs!")

    return results


mape_scorer = make_scorer(mape, greater_is_better=False)

scoring_metrics = {
    "neg RMSE": "neg_root_mean_squared_error",
    "r2": "r2",
    "mape": mape_scorer
}

In [13]:
models = {
    "Ridge": pipe_ridge,
    "Lasso": pipe_lasso,
    "Random Forest": pipe_rf,
    "XGB": pipe_xgb,
    "LGBM": pipe_lgbm,
    "Cat Boost": pipe_catboost,
}

In [None]:
results = train(models, results)

In [14]:
pd.DataFrame(results)

In [15]:
feature_set.X_train.shape

(68181, 160)

In [52]:
rfecv = RFECV(Ridge(), min_features_to_select=120, n_jobs=-1)

pipe_xgb_rfecv = make_pipeline(
    feature_set.column_transformer, rfecv, XGBRegressor(verbosity=0)
)

In [53]:
poly_feats = PolynomialFeatures(degree=2)

pipe_poly_ridge = make_pipeline(
    feature_set.column_transformer, rfecv, poly_feats, Ridge()
)

In [54]:
models_rfe_poly_ridge = {
    "XGB rfecv": pipe_xgb_rfecv,
    # "Poly Ridge": pipe_poly_ridge
}

In [55]:
results = train(models_rfe_poly_ridge, results)

Start XGB rfecv!
Done XGB rfecv in 22 secs!


In [56]:
pd.DataFrame(results)

Unnamed: 0,XGB rfecv,Poly Ridge
r2_val,0.51463,0.381328
mse_val,1506.993403,1920.875815
rmse_val,38.820013,43.827797
mape_val,44.149664,48.838148
r2_train,0.631292,0.415892
mse_train,1263.082954,2000.981302
rmse_train,35.539878,44.732329
mape_train,45.894592,55.636894


In [None]:
# Hyperparam Tune Random Forest 

params_rf = {
    'randomforestregressor__n_estimators': [10, 100, 500, 1000],
    'randomforestregressor__max_depth': [5, 10, 12],
    'randomforestregressor__max_features': ['auto', 'sqrt']
}

In [None]:
random_search_rf = RandomizedSearchCV(
    pipe_rf,
    params_rf,
    n_jobs=-1,
    n_iter=20,
    return_train_score=True,
    scoring=scoring_metrics,
    refit="r2"
)

In [None]:
# random_search_rf.fit(X_train, y_train)

In [None]:
pd.DataFrame(random_search_rf.cv_results_)
# pd.DataFrame(random_search.cv_results_)[[
#         "mean_fit_time",
#         "mean_score_time",
#         "param_ridge__alpha",
#         "mean_train_neg RMSE",
#         "std_train_neg RMSE",
#         "mean_test_mape",
#         "mean_train_mape",
#         "mean_test_r2",
#         "mean_train_r2"
#     ]
# ].sort_values(by='mean_test_r2', ascending=False)

In [None]:
params_lgbm = {
    'lgbmregressor__n_estimators': [10, 100, 1000],
    'lgbmregressor__max_depth': [5, 10, 15]
}

In [None]:
random_search_lgbm = RandomizedSearchCV(
    pipe_lgbm,
    params_lgbm,
    n_jobs=-1,
    n_iter=20,
    return_train_score=True,
    scoring=scoring_metrics,
    refit="r2"
)

In [None]:
# random_search_lgbm.fit(X_train, y_train)

In [None]:
# pd.DataFrame(random_search_lgbm.cv_results_)
pd.DataFrame(random_search_lgbm.cv_results_)[[
        "mean_fit_time",
        "mean_score_time",
        "params",
        "mean_train_neg RMSE",
        "mean_test_neg RMSE",
        "mean_test_mape",
        "mean_train_mape",
        "mean_test_r2",
        "mean_train_r2"
    ]
].sort_values(by="mean_test_r2", ascending=False)

In [None]:
print(random_search_lgbm.best_params_)
print(random_search_lgbm.best_score_)
# {'lgbmregressor__n_estimators': 1000, 'lgbmregressor__max_depth': 20}
# 0.49699611783887543

In [63]:
# HyperparamTune XGBoost
params_xgb = {
    'xgbregressor__n_estimators': [10, 100, 1000],
    'xgbregressor__max_depth': [3, 5, 7, 12],
    'xgbregressor__eta': [0.01, 0.03, 0.01, 0.3]
}

In [64]:
random_search_xgb = RandomizedSearchCV(
    pipe_xgb,
    params_xgb,
    n_jobs=-1,
    n_iter=20,
    return_train_score=True,
    scoring=scoring_metrics,
    refit="r2"
)

In [65]:
random_search_xgb.fit(feature_set.X_train_raw, feature_set.y_train)

RandomizedSearchCV(estimator=Pipeline(steps=[('columntransformer',
                                              ColumnTransformer(transformers=[('pipeline-1',
                                                                               Pipeline(steps=[('simpleimputer',
                                                                                                SimpleImputer()),
                                                                                               ('standardscaler',
                                                                                                StandardScaler())]),
                                                                               ['floor_area',
                                                                                'year_built',
                                                                                'energy_star_rating',
                                                                                'ELEVATION',

In [67]:
# pd.DataFrame(random_search_xgb.cv_results_)
pd.DataFrame(random_search_xgb.cv_results_)[[
        "mean_fit_time",
        "mean_score_time",
        "params",
        "mean_train_neg RMSE",
        "mean_test_neg RMSE",
        "mean_test_mape",
        "mean_train_mape",
        "mean_test_r2",
        "mean_train_r2"
    ]
].sort_values(by="mean_test_r2", ascending=False)

Unnamed: 0,mean_fit_time,mean_score_time,params,mean_train_neg RMSE,mean_test_neg RMSE,mean_test_mape,mean_train_mape,mean_test_r2,mean_train_r2
13,574.659637,0.307976,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-10.903566,-38.522429,-40.791748,-15.783641,0.565928,0.965286
1,356.662588,0.292019,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-19.550576,-38.947062,-43.210219,-28.68754,0.55645,0.888399
2,1175.295331,0.454385,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-20.642279,-39.257042,-42.864195,-29.014406,0.549508,0.875529
17,563.405348,0.279453,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-32.168396,-40.598057,-47.114703,-42.491214,0.518292,0.697873
8,49.140569,0.167951,"{'xgbregressor__n_estimators': 100, 'xgbregres...",-30.877795,-40.947238,-47.232692,-41.330082,0.509899,0.721631
4,307.116411,0.296407,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-36.729475,-42.003204,-49.669794,-46.950941,0.484413,0.606148
0,628.82279,0.373404,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-36.973773,-42.16841,-50.787486,-48.62163,0.480431,0.600853
10,134.556621,0.192685,"{'xgbregressor__n_estimators': 100, 'xgbregres...",-35.357155,-42.826538,-48.781719,-45.436484,0.463993,0.635034
18,319.171959,0.258908,"{'xgbregressor__n_estimators': 1000, 'xgbregre...",-40.370169,-43.3751,-53.796786,-52.469732,0.450265,0.524188
11,17.446537,0.161568,"{'xgbregressor__n_estimators': 100, 'xgbregres...",-40.689313,-43.54518,-52.544649,-51.480214,0.445782,0.51665


In [69]:
print(random_search_xgb.best_params_)
print(random_search_xgb.best_score_)

{'xgbregressor__n_estimators': 1000, 'xgbregressor__max_depth': 7, 'xgbregressor__eta': 0.3}
0.5659283801380433


#### Stacking

In [57]:
models_selected = {
    "Ridge": pipe_ridge,
    "XGB": pipe_xgb,
    "LGBM": pipe_lgbm,
    "CatBoost": pipe_catboost
#     "<>_rfecv": pipe_<>_rfecv,
#     "Poly Ridge": pipe_poly_ridge,
}

In [58]:
stacking_model = StackingRegressor(list(models_selected.items()))

In [59]:
name = "Stacking"

print(f"Start {name}!")
start_time = time.time()

_, results[name] = cross_val_scores(
    stacking_model,
    feature_set.X_train_raw,
    feature_set.y_train,
    feature_set.X_val_raw,
    feature_set.y_val,
    return_train_score=True
)

print(f"Done {name} in {round(time.time() - start_time)} secs!")

Start Stacking!
Done Stacking in 166 secs!


In [60]:
pd.DataFrame(results)

Unnamed: 0,XGB rfecv,Poly Ridge,Stacking
r2_val,0.51463,0.381328,0.546436
mse_val,1506.993403,1920.875815,1408.242515
rmse_val,38.820013,43.827797,37.526557
mape_val,44.149664,48.838148,41.849547
r2_train,0.631292,0.415892,0.667832
mse_train,1263.082954,2000.981302,1137.908182
rmse_train,35.539878,44.732329,33.732895
mape_train,45.894592,55.636894,43.783458


# FCNN

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
def linear_block(input_size, output_size):
    return nn.Sequential(
        nn.Linear(input_size, output_size),
        nn.ReLU(),
        nn.Dropout(0.2)
    )

class Extractlastcell(nn.Module):
    def forward(self, x):
        out, _ = x
        return out[-1]


class EnergyRegressor(nn.Module):
    def __init__(self, input_size):
        super(EnergyRegressor, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=2 * input_size, num_layers=2)
        self.tanh = nn.Tanh()
        self.layers = nn.Sequential(
            linear_block(2 * input_size, 3 * input_size),
            linear_block(3 * input_size, 5 * input_size),
            linear_block(5 * input_size, 10 * input_size),
            linear_block(10 * input_size, 7 * input_size),
            linear_block(7 * input_size, 5 * input_size),
            linear_block(5 * input_size, 3 * input_size),
            linear_block(3 * input_size, input_size),
            nn.Linear(input_size, 1000),
            nn.Linear(1000, 700),
            nn.Linear(700, 400), 
            nn.Linear(400, 256),
            nn.Linear(256, 128),
            nn.Linear(128, 64),
            nn.Linear(64, 1)
        )

    def forward(self, X):
        X = X.to(device)
        # X (sequence length, batch size, input size)
        X = X.reshape(1, X.shape[0], X.shape[1])
        X, _ = self.lstm(X)
        X = X[-1]
        X = self.tanh(X)
        X = self.layers(X)
        return X

In [None]:
trainloader = DataLoader(TensorDataset(feature_set.X_train_tensor, feature_set.y_train_tensor), batch_size=32, shuffle=True)
validloader = DataLoader(TensorDataset(feature_set.X_val_tensor, feature_set.y_val_tensor), batch_size=32, shuffle=True)

In [None]:
model = EnergyRegressor(feature_set.X_train_tensor.shape[1])
model.to(device)

In [None]:
def trainer(model, criterion, optimizer, trainloader, validloader, epochs):
    train_mse = 0
    train_rmse = 0
    train_r2 = 0
    val_mse = 0
    val_rmse = 0
    val_r2 = 0
    
    for epoch in range(epochs):
        train_batch_mse = []
        train_batch_rmse = []
        train_batch_r2 = []
        val_batch_mse = []
        val_batch_rmse = []
        val_batch_r2 = []

        model.train(True)

        for X, y in trainloader:
            X = X.to(device)
            y = y.to(device)
            y_hat = model(X).flatten()
            
            optimizer.zero_grad()
            loss = criterion(y_hat, y)
            loss = loss.to(device)
            loss.backward()
            optimizer.step()
            mse_train = mse_torch(y_hat, y)
            rmse_train = rmse_torch(y_hat, y)
            r2_train = r2score_torch(y_hat, y)
            train_batch_mse.append(mse_train)
            train_batch_rmse.append(rmse_train)
            train_batch_r2.append(r2_train)
        
        train_mse = torch.sum(torch.Tensor(train_batch_mse)) / len(trainloader)
        train_rmse = torch.sum(torch.Tensor(train_batch_rmse)) / len(trainloader)
        train_r2 = torch.sum(torch.Tensor(train_batch_r2)) / len(trainloader)

        model.eval()

        with torch.no_grad():
            for X_valid, y_valid in validloader:
                X_valid = X_valid.to(device)
                y_valid = y_valid.to(device).flatten()
                y_hat_val = model(X_valid)
                mse_val = mse_torch(y_hat_val, y_valid)
                rmse_val = rmse_torch(y_hat_val, y_valid)
                r2_val = r2score_torch(y_hat_val, y_valid)
                val_batch_mse.append(mse_val)
                val_batch_rmse.append(rmse_val)
                val_batch_r2.append(r2_val)
            val_mse = torch.sum(torch.Tensor(val_batch_mse)) / len(validloader)
            val_rmse = torch.sum(torch.Tensor(val_batch_rmse)) / len(validloader)
            val_r2 = torch.sum(torch.Tensor(val_batch_r2)) / len(validloader) 

        print(f"Epoch {epoch + 1}:\tTrain:\tMSE: {round(train_mse.item(), 4)}. RMSE: {round(train_rmse.item(), 4)}, R2: {round(train_r2.item(), 4)}.")
        print(f"\t\tVal:\tMSE: {round(val_mse.item(), 4)}, RMSE: {round(val_rmse.item(), 4)}, R2: {round(val_r2.item(), 4)}.")
        print("-" * 80)
    return model

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = criterion.to(device)
trained_model = trainer(model, criterion, optimizer, trainloader, validloader, epochs=50)

In [None]:
def predict(model, X):
    return model(X.type(torch.float32))

In [None]:
predictions = predict(trained_model, feature_set.X_test_tensor)
predictions

In [None]:
results_dict = {"id": feature_set.X_test_raw["id"],
               "site_eui": predictions.cpu().detach().numpy().flatten()}
pd.DataFrame(results_dict).set_index("id").to_csv("submission.csv")