In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import pandas as pd
from tqdm.notebook import tqdm
import copy

from scipy.special import expit
import numpy as np
import torch

from sklearn.linear_model import (LogisticRegression, LinearRegression)
from sklearn.model_selection import LeaveOneOut, KFold
from sklearn.metrics import classification_report
from sklearn.model_selection import ParameterGrid

from sklearn.metrics import r2_score

from torch.utils.data import TensorDataset, DataLoader
import time as time

from torchvision import datasets
from torchvision import transforms
from sklearn.model_selection import train_test_split

In [None]:
import random

seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# BOSTON dataset

In [None]:
import warnings
from sklearn.datasets import load_boston
with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    X, y = load_boston(return_X_y=True)
y = y.reshape((-1, 1))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

xScaler = StandardScaler()
yScaler = StandardScaler()
X_train_val = xScaler.fit_transform(X_train)
X_test_val = xScaler.transform(X_test)
y_train_val = yScaler.fit_transform(y_train)
y_test_val = yScaler.transform(y_test)

In [None]:
BOSTON_train = TensorDataset(torch.tensor(X_train_val, dtype=torch.float),
                             torch.tensor(y_train_val, dtype=torch.float))
BOSTON_test = TensorDataset(torch.tensor(X_test_val, dtype=torch.float),
                            torch.tensor(y_test_val, dtype=torch.float))

# Functions and classes

In [None]:
class convex_weighing_machine(torch.nn.Module):
    '''
    Builds a model that is a convex combination of models from a trained_list
    
    To get a convex combination, use softmax
    '''
    def __init__(self,
                 trained_list,
                 input_dim=784,
                 num_layers=0,
                 hidden_dim=64,
                 output_dim=1,
                 p=0.0):

        super().__init__()
        self.trained_list = trained_list
        self.conv_weights = torch.nn.Parameter(torch.ones(len(trained_list)))
        self.conv_weights.requires_grad = True

    def forward(self, x):
        outs = self.trained_list[0](x) * torch.exp(self.conv_weights[0])
        for model_id in range(1, len(self.trained_list)):
            outs += self.trained_list[model_id](x) * torch.exp(
                self.conv_weights[model_id])
        return outs / torch.exp(self.conv_weights).sum()

In [None]:
class line_weighing_machine(torch.nn.Module):
    '''
    Builds a model that is a linear combination of models from a trained_list
    
    Initial initialization by averaging
    '''
    def __init__(self,
                 trained_list,
                 input_dim=784,
                 num_layers=0,
                 hidden_dim=64,
                 output_dim=1,
                 p=0.0):

        super().__init__()
        self.trained_list = trained_list
        coefs = np.array([1 / (len(trained_list))] * len(trained_list))
        self.conv_weights = torch.nn.Parameter(torch.from_numpy(coefs))
        self.conv_weights.requires_grad = True

    def forward(self, x):
        outs = self.trained_list[0](x) * (self.conv_weights[0])
        for model_id in range(1, len(self.trained_list)):
            outs += self.trained_list[model_id](x) * (
                self.conv_weights[model_id])
        return outs

In [None]:
def make_list_of_models(num_of_models,
                        dataset,
                        loss_function,
                        epochs,
                        lr,
                        input_dim=784,
                        num_layers=0,
                        hidden_dim=20,
                        grad=True,
                        output_dim=1,
                        p=0.0):
    '''
  Builds a list of models

  Depending on the value of the parameter grad:
      *) trains and freezes the weights
      *) leaves a random initialization
  '''

    trained_list = torch.nn.ModuleList()
    for idm in range(num_of_models):
        model = Perceptron(input_dim=input_dim,
                           num_layers=num_layers,
                           hidden_dim=hidden_dim,
                           output_dim=output_dim,
                           p=p)
        if grad == False:
            model.to(device)
            _ = model.train()
            trainer(model=model,
                    dataset=dataset,
                    lr=lr,
                    loss_function=loss_function,
                    optimizer=torch.optim.Adam,
                    epochs=epochs)
            _ = model.eval()
            for par in model.parameters():
                par.requires_grad = False
        if grad == True:
            model.to(device)
            for par in model.parameters():
                par.requires_grad = True
        trained_list.append(copy.deepcopy(model))
    return trained_list

In [None]:
def classic_star_models(num_of_models,
                        dataset,
                        loss_function,
                        epochs,
                        lr,
                        warmup=False,
                        prop_warmup=0.0,
                        input_dim=784,
                        num_layers=0,
                        hidden_dim=20,
                        grad=False,
                        output_dim=1,
                        p=0.0):
    '''
  Builds a models based on star procedure: Classic Star (no wrmp) and Classic Star (new wrmp) 
  
  Depending on the value of warmup:
      *) builds only Classic Star (no wrmp)
      *) builds both models
  '''

    # Let's make list of trained models
    start_time = time.time()
    trained_list = make_list_of_models(num_of_models=num_of_models,
                                       dataset=dataset,
                                       loss_function=loss_function,
                                       epochs=epochs,
                                       lr=lr,
                                       input_dim=input_dim,
                                       num_layers=num_layers,
                                       hidden_dim=hidden_dim,
                                       grad=False,
                                       output_dim=output_dim,
                                       p=p)
    trained_list_time = round(time.time() - start_time)
    trained_list_forwrmp = copy.deepcopy(trained_list)

    # Let's append new not trained model
    model = Perceptron(input_dim=input_dim,
                       num_layers=num_layers,
                       hidden_dim=hidden_dim,
                       output_dim=output_dim,
                       p=p)

    trained_list.append(copy.deepcopy(model))
    
    # Let's do convex weighing
    star_nowrmp = convex_weighing_machine(trained_list=trained_list,
                                          input_dim=input_dim,
                                          num_layers=num_layers,
                                          hidden_dim=hidden_dim,
                                          output_dim=output_dim,
                                          p=p)
    star_nowrmp.to(device)
    _ = star_nowrmp.train()
    start_time = time.time()
    trainer(model=star_nowrmp,
            dataset=dataset,
            lr=lr,
            loss_function=loss_function,
            optimizer=torch.optim.Adam,
            epochs=epochs)
    _ = star_nowrmp.eval()
    star_nowrmp_time = round(time.time() - start_time) + trained_list_time
    if warmup:
        start_time = time.time()
        model = Perceptron(input_dim=input_dim,
                           num_layers=num_layers,
                           hidden_dim=hidden_dim,
                           output_dim=output_dim,
                           p=p)
        model.to(device)
        _ = model.train()
        trainer(model=model,
                dataset=dataset,
                lr=lr,
                loss_function=loss_function,
                optimizer=torch.optim.Adam,
                epochs=int(epochs * prop_warmup))
        _ = model.eval()
        trained_list_forwrmp.append(model)
        star_newwrmp = convex_weighing_machine(
            trained_list=trained_list_forwrmp,
            input_dim=input_dim,
            num_layers=num_layers,
            hidden_dim=hidden_dim,
            output_dim=output_dim,
            p=p)
        star_newwrmp.to(device)
        _ = star_newwrmp.train()
        start_time = time.time()
        trainer(model=star_newwrmp,
                dataset=dataset,
                lr=lr,
                loss_function=loss_function,
                optimizer=torch.optim.Adam,
                epochs=int(epochs * (1 - prop_warmup)))
        _ = star_newwrmp.eval()
        star_newwrmp_time = round(time.time() - start_time) + trained_list_time
    else:
        star_newwrmp, star_newwrmp_time = None, None

    return star_nowrmp, star_newwrmp, star_nowrmp_time, star_newwrmp_time

In [None]:
def snapshot_list(num_of_models,
                  dataset,
                  loss_function,
                  budget,
                  lr,
                  input_dim=784,
                  num_layers=0,
                  hidden_dim=20,
                  grad=True,
                  output_dim=1,
                  p=0.0):
    '''
  Builds a list of models based on the snapshot technique

  The latest model is inserted into the list twice
  Number of epochs per model train is budget//num_cycles
  Doing num_of_model-1 cycles (we leave the last cycle for the possibility of choosing the aggregation process)
  '''
    num_cycles = num_of_models
    epochs = budget // num_cycles
    trained_list = torch.nn.ModuleList()
    model = Perceptron(input_dim=input_dim,
                       num_layers=num_layers,
                       hidden_dim=hidden_dim,
                       output_dim=output_dim,
                       p=p)
    for idm in range(num_cycles - 1):
        model.to(device)
        _ = model.train()
        cycle_trainer(model=model,
                      dataset=dataset,
                      lr=lr,
                      loss_function=loss_function,
                      optimizer=torch.optim.Adam,
                      epochs=epochs)
        _ = model.eval()
        copy_model = copy.deepcopy(model)
        for par in copy_model.parameters():
            par.requires_grad = False
        trained_list.append(copy_model)
    trained_list.append(copy.deepcopy(model))
    return trained_list

In [None]:
def snap_models(num_of_models,
                dataset,
                loss_function,
                budget,
                lr,
                input_dim=784,
                num_layers=0,
                hidden_dim=20,
                grad=False,
                output_dim=1,
                p=0.0,
                warmup=True,
                prop_warmup=0.4):
    '''
  Builds based on the one list of models from the snapshot technique Snap Star (shot wrmp), Snap Ensemble and Snap Star (new wrmp)
  
  Along with the models returns the training time                                   
  '''
    # Let's make list of trained models by snapshot technique
    start_time = time.time()
    trained_list = snapshot_list(num_of_models=num_of_models,
                                 dataset=dataset,
                                 loss_function=loss_function,
                                 budget=budget,
                                 lr=lr,
                                 input_dim=input_dim,
                                 num_layers=num_layers,
                                 hidden_dim=hidden_dim,
                                 grad=False,
                                 output_dim=output_dim,
                                 p=p)

    snapstar_shotwrmp_list = copy.deepcopy(trained_list)
    snapstar_newwrmp_list = copy.deepcopy(trained_list)
    snapensemble_trained_list = copy.deepcopy(trained_list)

    # Let's do Snap Ensemble
    training_list_time = round(time.time() - start_time)
    model = snapensemble_trained_list[-1]
    model.to(device)
    _ = model.train()
    start_time = time.time()
    cycle_trainer(model=model,
                  dataset=dataset,
                  lr=lr,
                  loss_function=loss_function,
                  optimizer=torch.optim.Adam,
                  epochs=budget // num_of_models)
    _ = model.eval()
    snapensemble_trained_list[-1] = model
    last_model_ensemble = round(time.time() - start_time)
    snapensemble_time = training_list_time + last_model_ensemble
    print("Training time: Snap Ensemble", snapensemble_time, "seconds")

    # Let's do Snap Star (shot wrmp)
    snapstar_shotwrmp = convex_weighing_machine(
        trained_list=snapstar_shotwrmp_list,
        input_dim=input_dim,
        num_layers=num_layers,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        p=p)
    snapstar_shotwrmp.to(device)
    start_time = time.time()
    _ = snapstar_shotwrmp.train()
    cycle_trainer(model=snapstar_shotwrmp,
                  dataset=dataset,
                  lr=lr,
                  loss_function=loss_function,
                  optimizer=torch.optim.Adam,
                  epochs=budget // num_of_models)
    _ = snapstar_shotwrmp.eval()
    last_model_snapstar_shotwrmp = round(time.time() - start_time)
    snapstar_shotwrmp_time = training_list_time + last_model_snapstar_shotwrmp
    print("Training time: Snap Star (shot wrmp)", snapstar_shotwrmp_time,
          "seconds")

    #Let's train Snap Star (new wrmp)
    model = Perceptron(input_dim=input_dim,
                       num_layers=num_layers,
                       hidden_dim=hidden_dim,
                       output_dim=output_dim,
                       p=p)
    model.to(device)
    _ = model.train()
    start_time = time.time()
    trainer(model=model,
            dataset=dataset,
            lr=lr,
            loss_function=loss_function,
            optimizer=torch.optim.Adam,
            epochs=int((prop_warmup) * budget // num_of_models))
    _ = model.eval()
    warmup_time = round(time.time() - start_time)
    snapstar_newwrmp_list[-1] = model
    snapstar_newwrmp = convex_weighing_machine(
        trained_list=snapstar_newwrmp_list,
        input_dim=input_dim,
        num_layers=num_layers,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        p=p)
    snapstar_newwrmp.to(device)
    start_time = time.time()
    _ = snapstar_newwrmp.train()
    trainer(model=snapstar_newwrmp,
            dataset=dataset,
            lr=lr,
            loss_function=loss_function,
            optimizer=torch.optim.Adam,
            epochs=int((1 - prop_warmup) * budget // num_of_models))
    _ = snapstar_newwrmp.eval()
    last_model_time = round(time.time() - start_time) + warmup_time
    snapstar_newwrmp_time = training_list_time + last_model_time
    print("Training time: Snap Star (new wrmp)", snapstar_newwrmp_time,
          "seconds")

    return snapstar_shotwrmp, snapensemble_trained_list, snapstar_newwrmp,\
           snapstar_shotwrmp_time, snapensemble_time, snapstar_newwrmp_time

In [None]:
def ensemble(num_of_models,
             dataset,
             loss_function,
             epochs,
             lr,
             input_dim=784,
             num_layers=0,
             hidden_dim=20,
             grad=False,
             output_dim=1,
             p=0.0):
    '''
  Building a model Ensemble
  
  Returns a list of models to average
  '''

    # Let's make list of not trained models
    trained_list = make_list_of_models(num_of_models=num_of_models,
                                       dataset=dataset,
                                       loss_function=loss_function,
                                       epochs=epochs,
                                       lr=lr,
                                       input_dim=input_dim,
                                       num_layers=num_layers,
                                       hidden_dim=hidden_dim,
                                       grad=False,
                                       output_dim=output_dim,
                                       p=p)
    # Let's append new trained model
    model = Perceptron(input_dim=input_dim,
                       num_layers=num_layers,
                       hidden_dim=hidden_dim,
                       output_dim=output_dim,
                       p=p)
    model.to(device)
    _ = model.train()
    trainer(model=model,
            dataset=dataset,
            lr=lr,
            loss_function=loss_function,
            optimizer=torch.optim.Adam,
            epochs=epochs)
    _ = model.eval()
    trained_list.append(copy.deepcopy(_))
    return trained_list

In [None]:
def big_nn(num_of_models,
           dataset,
           loss_function,
           epochs,
           lr,
           input_dim=784,
           num_layers=0,
           hidden_dim=20,
           grad=False,
           output_dim=1,
           p=0.0):
    '''
  Building a model Big NN
  '''

    # Let's make list of not trained models
    trained_list = make_list_of_models(num_of_models=num_of_models,
                                       dataset=dataset,
                                       loss_function=loss_function,
                                       epochs=epochs,
                                       lr=lr,
                                       input_dim=input_dim,
                                       num_layers=num_layers,
                                       hidden_dim=hidden_dim,
                                       grad=True,
                                       output_dim=output_dim,
                                       p=p)
    # Let's append new not trained model
    model = Perceptron(input_dim=input_dim,
                       num_layers=num_layers,
                       hidden_dim=hidden_dim,
                       output_dim=output_dim,
                       p=p)
    trained_list.append(copy.deepcopy(model))
    # Let's do linear weighing
    final_model = line_weighing_machine(trained_list=trained_list,
                                        input_dim=input_dim,
                                        num_layers=num_layers,
                                        hidden_dim=hidden_dim,
                                        output_dim=output_dim,
                                        p=p)
    final_model.to(device)
    _ = final_model.train()
    trainer(model=final_model,
            dataset=dataset,
            lr=lr,
            loss_function=loss_function,
            optimizer=torch.optim.Adam,
            epochs=epochs)
    _ = final_model.eval()

    return _

In [None]:
class Perceptron(torch.nn.Module):
    @property
    def device(self):
        for par in self.parameters():
            return par.device

    def __init__(self,
                 input_dim=784,
                 num_layers=0,
                 hidden_dim=15,
                 output_dim=10,
                 p=0.0):
        super(Perceptron, self).__init__()

        self.layers = torch.nn.Sequential()

        prev_size = input_dim
        for i in range(num_layers):
            self.layers.add_module('layer{}'.format(i + 1),
                                   torch.nn.Linear(prev_size, hidden_dim))
            self.layers.add_module('batch_norm{}'.format(i + 1),
                                   torch.nn.BatchNorm1d(hidden_dim))
            self.layers.add_module('relu{}'.format(i + 1), torch.nn.ReLU())
            self.layers.add_module('dropout{}'.format(i + 1),
                                   torch.nn.Dropout(p=p))
            prev_size = hidden_dim
            hidden_dim = max(1, hidden_dim // 2)
        self.layers.add_module('classifier',
                               torch.nn.Linear(prev_size, output_dim))

    def forward(self, input):
        return self.layers(input)

# Train-functions

In [None]:
def trainer(epochs,
            dataset,
            model,
            loss_function,
            optimizer,
            batch_size=32,
            lr=0.01,
            callback=None):
    optima = optimizer(model.parameters(), lr=lr)

    iterations = tqdm(range(epochs), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True),
                               leave=False)

        epoch_loss = train_epoch(train_generator=batch_generator,
                                 model=model,
                                 loss_function=loss_function,
                                 optimizer=optima,
                                 callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})

In [None]:
def cycle_trainer(epochs,
                  dataset,
                  model,
                  loss_function,
                  optimizer,
                  batch_size=32,
                  lr=0.01,
                  callback=None):
    '''
    Learning with cyclic change of lr by cosine formula from snapshot ensemble
    Cycle length is epochs
    '''
    iterations = tqdm(range(epochs), desc='epoch')
    iterations.set_postfix({'train epoch loss': np.nan})
    for it in iterations:
        batch_generator = tqdm(torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True),
                               leave=False)
        # lets calculate lr_
        lr_ = (lr / 2) * (np.cos(np.pi * (it / epochs)) + 1)
        optima = optimizer(model.parameters(), lr=lr_)

        epoch_loss = train_epoch(train_generator=batch_generator,
                                 model=model,
                                 loss_function=loss_function,
                                 optimizer=optima,
                                 callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})

In [None]:
def train_epoch(train_generator,
                model,
                loss_function,
                optimizer,
                callback=None):
    epoch_loss = 0
    total = 0
    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(model, batch_of_x.to(device),
                                    batch_of_y.to(device), optimizer,
                                    loss_function)

        if callback is not None:
            callback(model, batch_loss)

        epoch_loss += batch_loss * len(batch_of_x)
        total += len(batch_of_x)

    return epoch_loss / total

In [None]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()
    optimizer.zero_grad()
    x = x_batch
    y = y_batch
    output = model(x)
    loss_ = loss_function(output, y)
    loss_.backward()
    optimizer.step()
    return loss_.cpu().item()

In [None]:
def loss(model, dataset, loss_function):
    test_losses = []
    train_losses = []
    generator = torch.utils.data.DataLoader(dataset,
                                            batch_size=dataset.__len__(),
                                            shuffle=True,
                                            drop_last=True)
    with torch.no_grad():
        for x, y in tqdm(generator, leave=False):
            x = x.to(device)
            y = y.to(device)
            output = model(x)
            loss = loss_function(output, y)

    return loss.cpu().item()

In [None]:
def ensemble_loss(trained_list, dataset, loss_function):
    test_losses = []
    train_losses = []
    output = 0
    generator = torch.utils.data.DataLoader(dataset,
                                            batch_size=dataset.__len__(),
                                            shuffle=True,
                                            drop_last=True)
    with torch.no_grad():
        for x, y in tqdm(generator, leave=False):
            x = x.to('cpu')
            y = y.to('cpu')
            for model in trained_list:
                output += model.to('cpu')(x)
            output = output / len(trained_list)
            loss = loss_function(output, y)

    return loss.cpu().item()

In [None]:
def test(X_test, y_test, model):
    X_test, y_test, model = X_test.to('cpu'), y_test.to('cpu'), model.to('cpu')
    y_pred = model(X_test).detach()
    y_test = y_test.reshape(-1, 1)
    sq = 0
    mape = 0
    mae = 0
    pair = []
    r2 = r2_score(yScaler.inverse_transform(y_test),
                  yScaler.inverse_transform(y_pred))
    for id in range(len(y_test)):
        mape += abs(((yScaler.inverse_transform(y_test)[id] -
                      yScaler.inverse_transform(y_pred)[id])) /
                    yScaler.inverse_transform(y_test)[id])
        mae += abs(
            yScaler.inverse_transform(y_test)[id] -
            yScaler.inverse_transform(y_pred)[id])
        sq += (yScaler.inverse_transform(y_test)[id] -
               yScaler.inverse_transform(y_pred)[id])**2
    return sq / len(y_test), mape / len(y_test), mae / len(y_test), r2

In [None]:
def ensemble_test(trained_list, X_test, y_test):
    X_test, y_test = X_test.to('cpu'), y_test.to('cpu')
    y_pred = 0
    for model in trained_list:
        y_pred += model.to('cpu')(X_test).detach()
    y_pred = y_pred / len(trained_list)
    y_test = y_test.reshape(-1, 1)
    sq = 0
    mape = 0
    mae = 0
    pair = []
    r2 = r2_score(yScaler.inverse_transform(y_test),
                  yScaler.inverse_transform(y_pred))
    for id in range(len(y_test)):
        mape += abs(((yScaler.inverse_transform(y_test)[id] -
                      yScaler.inverse_transform(y_pred)[id])) /
                    yScaler.inverse_transform(y_test)[id])
        mae += abs(
            yScaler.inverse_transform(y_test)[id] -
            yScaler.inverse_transform(y_pred)[id])
        sq += (yScaler.inverse_transform(y_test)[id] -
               yScaler.inverse_transform(y_pred)[id])**2
    return sq / len(y_test), mape / len(y_test), mae / len(y_test), r2

In [None]:
def print_params_of_model(list_of_model):
    r'''
  print params of the list of model
  '''
    idm = 0
    for model in list_of_model:
        params = [x for x in model.parameters()]
        print('params model num:', idm, params, '\n')
        idm += 1


def print_convex_params(model):
    r'''
  print convex params of model
  '''
    coefs = model.conv_weights.detach().numpy()
    convex_coefs = np.exp(coefs) / sum(np.exp(coefs))
    print('convex params of model:', convex_coefs, '\n')

# Classic models

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('std_scalar', StandardScaler())
])

X_train_ = pipeline.fit_transform(X_train)
X_test_ = pipeline.transform(X_test)
y_train_ = y_train
y_test_ = y_test

In [None]:
from sklearn import metrics
from sklearn.model_selection import cross_val_score


def cross_val(model):
    pred = cross_val_score(model, X, y, cv=10)
    return pred.mean()

def print_evaluate(true, predicted):  
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    print('MAE:', mae)
    print('MSE:', mse)
    print('RMSE:', rmse)
    print('R2 Square', r2_square)
    print('__________________________________')
    
def evaluate(true, predicted):
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    return mae, mse, rmse, r2_square

### Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
lin_reg = make_pipeline(StandardScaler(with_mean=False), LinearRegression())
#lin_reg = LinearRegression(normalize=True)
lin_reg.fit(X_train,y_train)

In [None]:
test_pred = lin_reg.predict(X_test)
train_pred = lin_reg.predict(X_train)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test, test_pred)
print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train, train_pred)

In [None]:
results_df = pd.DataFrame(data=[["Linear Regression", *evaluate(y_test, test_pred)]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square'])
results_df

### Polynomial  regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree=2)

X_train_2_d = poly_reg.fit_transform(X_train)
X_test_2_d = poly_reg.transform(X_test)

#lin_reg = LinearRegression(normalize=True)
lin_reg = make_pipeline(StandardScaler(with_mean=False), LinearRegression())
lin_reg.fit(X_train_2_d,y_train)

test_pred = lin_reg.predict(X_test_2_d)
train_pred = lin_reg.predict(X_train_2_d)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test, test_pred)
print('====================================')
print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train, train_pred)

In [None]:
results_df_2 = pd.DataFrame(data=[["Polynomial (deg 2)", *evaluate(y_test, test_pred)]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square'])

results_df = results_df.append(results_df_2, ignore_index=True)
results_df

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_reg = RandomForestRegressor(n_estimators=2000, random_state = 0)
rf_reg.fit(X_train, y_train)

test_pred = rf_reg.predict(X_test)
train_pred = rf_reg.predict(X_train)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test, test_pred)

print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train, train_pred)

In [None]:
results_df_3 = pd.DataFrame(data=[["Random Forest (n_est: 2000)", *evaluate(y_test, test_pred)]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square'])

results_df = results_df.append(results_df_3, ignore_index=True)
results_df

### SVM

In [None]:
from sklearn.svm import SVR

svm_reg = SVR(kernel='rbf', C=105, epsilon=0.001, degree = 12)
svm_reg.fit(X_train, y_train)

test_pred = svm_reg.predict(X_test)
train_pred = svm_reg.predict(X_train)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test_, test_pred)

print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train_, train_pred)

In [None]:
results_df_4 = pd.DataFrame(data=[["Support Vector Machine", *evaluate(y_test, test_pred)]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square'])

results_df = results_df.append(results_df_4, ignore_index=True)

### XGBregressor


In [None]:
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 

In [None]:
xgbr = xgb.XGBRegressor(verbosity=0, n_estimators=200,max_depth=3)
xgbr.fit(X_train_,y_train_)
test_pred = xgbr.predict(X_test_)
train_pred = xgbr.predict(X_train_)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test_, test_pred)

print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train_, train_pred)


In [None]:
results_df_5 = pd.DataFrame(data=[["XGBR (n_est: 200, max_depth:3)", *evaluate(y_test, test_pred)]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square'])

results_df = results_df.append(results_df_5, ignore_index=True)
results_df

### Gridsearch XGB (bad results)

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

xgb1 = XGBRegressor()
parameters = {'nthread':[4], #when use hyperthread, xgboost may become slower
              'objective':['reg:linear'],
              'learning_rate': [.01,.03, 0.05, .07], #so called `eta` value
              'max_depth': [3,5, 6, 7],
              'min_child_weight': [4],
              'silent': [1],
              'subsample': [0.7],
              'colsample_bytree': [0.7],
              'n_estimators': [100,300,1000]}

xgb_grid = GridSearchCV(xgb1,
                        parameters,
                        cv = 2,
                        n_jobs = 5,
                        verbose=True)

xgb_grid.fit(X_train_,
         y_train_)
best_xgb = xgb_grid.best_estimator_

test_pred = best_xgb.predict(X_test_)
train_pred = best_xgb.predict(X_train_)

print('Test set evaluation:\n_____________________________________')
print_evaluate(y_test_, test_pred)

print('Train set evaluation:\n_____________________________________')
print_evaluate(y_train_, train_pred)

## Results of other models:

In [None]:
results_df

# NN models

## Let's train all models with lr = 0.01

In [None]:
from sklearn.model_selection import ParameterGrid

grid = ParameterGrid({
    'num_layers': [4],
    'epochs': [30,75,150,200],
    'p': [0.0,0.1],
    'loss': [torch.nn.MSELoss()],
    'out_dim': [1],
    'lr': [0.01],
    'inp_dim': [13],
    'num_mod': [1,2,3,4,5],
    'hid_dim': [128],
    'grad': [False]
})

In short, the procedure we proposed can be described as follows: we run $d$ independent learning processes of neural networks, obtaining empirical risk minimizers $\widehat{g}_1,\, \dots,\,\widehat{g}_d$, freeze their weights, after that we initialize a new model and connect all $d + 1$ models with a layer of convex coefficients, after that we start the process of optimizing all non-frozen parameters. This whole procedure can be viewed as a search for an empirical minimizer in all possible $d$-dimensional simplices spanned by $d$-minimizers and a class of neural networks.
As is known, the minimization of the empirical risk with respect to the convex hull is not optimal in the same way as with respect to the original class of functions. Our method, however, minimizes over some set intermediate between the original class of functions and its convex hull, allowing us to combine the advantages of model ensembling and the star procedure.

In [None]:
results = pd.DataFrame(data=[],columns=['Name','d','epochs', 'MSE', 'MAE','MAPE','R2 Square', 'TRAIN MSE','TIME','p','lr'])

In [None]:
starts = 5
for start in tqdm(range(starts)):
    for item in tqdm(grid): 
        
        # Let's train Snap Star (new wrmp), Snap Ensemble and Snap Star (shot wrmp)
        snap_star_shotwarmup, snap_ensemble, snap_star_newwarmup, snap_star_shotwarmup_time,\
        ensemble_time, snap_star_newwarmup_time = snap_models(num_of_models = (item['num_mod']+1),
                                                              dataset=BOSTON_train, 
                                                              loss_function=item['loss'],
                                                              output_dim=item['out_dim'],
                                                              num_layers =item['num_layers'],
                                                              hidden_dim=item['hid_dim'],
                                                              budget=item['epochs']*(item['num_mod']+1),
                                                              input_dim=item['inp_dim'],
                                                              lr =item['lr'],
                                                              p=item['p'],
                                                              grad =item['grad'])

        
        
        # Let's test Snap Star (new wrmp)
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float),
                                                       model=snap_star_newwarmup)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=snap_star_newwarmup)

        snap_star_newwarmup_scores = pd.DataFrame(data=[["Snap Star (new wrmp)",item['num_mod'], 
                                                         item['epochs'],round(test_mse[0],ndigits=5),
                                                         round(test_mae[0],ndigits=5),
                                                         round(test_mape[0],ndigits=5),
                                                         round(test_r2,ndigits=5),
                                                         round(train_mse[0],ndigits=5),
                                                         snap_star_newwarmup_time,
                                                         item['p'],item['lr']]],
                                                  columns=['Name','d','epochs', 'MSE', 'MAE',
                                                           'MAPE','R2 Square', 'TRAIN MSE','TIME',
                                                           'p','lr'])
        results = pd.concat([snap_star_newwarmup_scores,results],ignore_index=False)
        print('Snap Star (new wrmp) params:')
        print_convex_params(snap_star_newwarmup)     
    
    
    
        # Let's test Snap Star (shot wrmp)    
        _ = snap_star_shotwarmup.eval()
        print('Snap Star (shot wrmp) params:')
        print_convex_params(snap_star_shotwarmup)          
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float),
                                                       model=snap_star_shotwarmup)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=snap_star_shotwarmup)

        snap_star_shotwarmup_scores = pd.DataFrame(data=[["Snap Star (shot wrmp)",item['num_mod'], 
                                                          item['epochs'],round(test_mse[0],ndigits=5),
                                                          round(test_mae[0],ndigits=5),round(test_mape[0],ndigits=5),
                                                          round(test_r2,ndigits=5), round(train_mse[0],ndigits=5),
                                                          snap_star_shotwarmup_time,item['p'],item['lr']]],
                                                   columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square',
                                                          'TRAIN MSE','TIME','p','lr'])
        results = pd.concat([snap_star_shotwarmup_scores,results],ignore_index=False)



        # Let's test Snap Ensemble    
        _ = snap_ensemble.eval()
        train_mse,train_mape,train_mae,train_r2 = ensemble_test(X_test = torch.tensor(X_train_val,dtype=torch.float),
                                                                y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                                trained_list=snap_ensemble)
        test_mse,test_mape,test_mae,test_r2 = ensemble_test(X_test = torch.tensor(X_test_val,dtype=torch.float), 
                                                            y_test=torch.tensor(y_test_val,dtype=torch.float), 
                                                            trained_list=snap_ensemble)

        snap_ensemble_scores = pd.DataFrame(data=[["Snap Ensemble",item['num_mod'], item['epochs'],
                                                   round(test_mse[0],ndigits=5), round(test_mae[0],ndigits=5),
                                                   round(test_mape[0],ndigits=5), round(test_r2,ndigits=5), 
                                                   round(train_mse[0],ndigits=5),ensemble_time,
                                                   item['p'],item['lr']]],
                                            columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square',
                                                   'TRAIN MSE','TIME','p','lr'])
        results = pd.concat([snap_ensemble_scores,results],ignore_index=False)      

        
        
        # Let's train Ensemble
        start_time = time.time()
        nn_ensemble = ensemble(num_of_models = item['num_mod'],
                               dataset=BOSTON_train, 
                               loss_function=item['loss'],
                               output_dim=item['out_dim'],
                               num_layers =item['num_layers'], 
                               epochs=item['epochs'],
                               hidden_dim=item['hid_dim'],
                               input_dim=item['inp_dim'],
                               lr =item['lr'],
                               p=item['p'],
                               grad =item['grad'])
        _=nn_ensemble.eval()
        training_time = round(time.time() - start_time)
        
        
        
        # Let's test Ensemble  
        train_mse,train_mape,train_mae,train_r2 = ensemble_test(X_test = torch.tensor(X_train_val,dtype=torch.float),
                                                                y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                                trained_list=nn_ensemble)
        test_mse,test_mape,test_mae,test_r2 = ensemble_test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                            y_test=torch.tensor(y_test_val,dtype=torch.float), 
                                                            trained_list=nn_ensemble)

        ensemble_scores = pd.DataFrame(data=[["Ensemble",item['num_mod'], item['epochs'],round(test_mse[0],ndigits=5),
                                              round(test_mae[0],ndigits=5),round(test_mape[0],ndigits=5),
                                              round(test_r2,ndigits=5), round(train_mse[0],ndigits=5),
                                              training_time,item['p'],item['lr']]],
                                       columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square', 'TRAIN MSE',
                                              'TIME','p','lr'])
        results = pd.concat([ensemble_scores,results],ignore_index=False)

        
        
        # Let's train Big NN
        start_time = time.time()
        bignn = big_nn(num_of_models = item['num_mod'],
                       dataset=BOSTON_train, 
                       loss_function=item['loss'],
                       output_dim=item['out_dim'],
                       num_layers =item['num_layers'], 
                       epochs=item['epochs'],
                       hidden_dim=item['hid_dim'],
                       input_dim=item['inp_dim'],
                       lr =item['lr'],
                       p=item['p'],
                       grad =item['grad'])
        _=bignn.eval()
        training_time = round(time.time() - start_time)
        
        
        
        #Let's test Big NN
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float),
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                       model=bignn)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=bignn)

        big_nn_scores = pd.DataFrame(data=[["Big NN",item['num_mod'], item['epochs'],round(test_mse[0],ndigits=5),
                                            round(test_mae[0],ndigits=5),round(test_mape[0],ndigits=5), 
                                            round(test_r2,ndigits=5), round(train_mse[0],ndigits=5), training_time,
                                            item['p'],item['lr']]],
                                     columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square', 'TRAIN MSE',
                                            'TIME','p','lr'])
        results = pd.concat([big_nn_scores,results],ignore_index=False)
        print('Big NN params:')
        print_convex_params(bignn)      

        

        # Let's train Classic Star (no wrmp) and Classic Star (new wrmp)
        start_time = time.time()
        star_nowrmp, star_newwrmp, star_nowrmp_time, star_newwrmp_time = classic_star_models(
                                                                             num_of_models = item['num_mod'],
                                                                             dataset=BOSTON_train, 
                                                                             loss_function=item['loss'],
                                                                             output_dim=item['out_dim'],
                                                                             num_layers =item['num_layers'], 
                                                                             epochs=item['epochs'],
                                                                             hidden_dim=item['hid_dim'],
                                                                             input_dim=item['inp_dim'],
                                                                             lr =item['lr'],
                                                                             warmup=True,
                                                                             prop_warmup=0.4,                                         
                                                                             p=item['p'],
                                                                             grad =item['grad'])
        
        
        
        # Let's test Classic Star (no wrmp)
        _=star_nowrmp.eval()
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                       model=star_nowrmp)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float), 
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=star_nowrmp)

        star_nowrmp_scores = pd.DataFrame(data=[["Classic Star (no wrmp)",item['num_mod'], item['epochs'],round(test_mse[0],ndigits=5),
                                                 round(test_mae[0],ndigits=5),round(test_mape[0],ndigits=5), 
                                                 round(test_r2,ndigits=5), round(train_mse[0],ndigits=5),
                                                 star_nowrmp_time,item['p'],item['lr']]],
                                          columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square', 'TRAIN MSE',
                                                   'TIME','p','lr'])
        results = pd.concat([star_nowrmp_scores,results],ignore_index=False)
        print('Classic Star (no wrmp) params:')
        print_convex_params(star_nowrmp)      




        # Let's test Classic Star (new wrmp)
        _=star_newwrmp.eval()
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                       model=star_newwrmp)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float), 
                                                   model=star_newwrmp)

        star_newwrmp_scores = pd.DataFrame(data=[["Classic Star (new wrmp)",item['num_mod'], item['epochs'],
                                                   round(test_mse[0],ndigits=5), 
                                                   round(test_mae[0],ndigits=5),
                                                   round(test_mape[0],ndigits=5),
                                                   round(test_r2,ndigits=5), 
                                                   round(train_mse[0],ndigits=5),star_newwrmp_time,
                                                   item['p'],item['lr']]],
                                           columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square', 'TRAIN MSE',
                                                    'TIME','p','lr'])
        results = pd.concat([star_newwrmp_scores,results],ignore_index=False)
        print('Classic Star (new wrmp):')
        print_convex_params(star_newwrmp)    

## Let's train Snap models with lr = 0.1

In [None]:
grid = ParameterGrid({
    'num_layers': [4],
    'epochs': [30,75,150,200],
    'p': [0.0,0.1],
    'loss': [torch.nn.MSELoss()],
    'out_dim': [1],
    'lr': [0.01],
    'inp_dim': [13],
    'num_mod': [1,2,3,4,5],
    'hid_dim': [128],
    'grad': [False]
})

In [None]:
starts = 5
for start in tqdm(range(starts)):
    for item in tqdm(grid): 
        
        # Let's train Snap Star (new wrmp), Snap Ensemble and Snap Star (shot wrmp)
        snap_star_shotwarmup, snap_ensemble, snap_star_newwarmup, snap_star_shotwarmup_time,\
        ensemble_time, snap_star_newwarmup_time = snap_models(num_of_models = (item['num_mod']+1),
                                                              dataset=BOSTON_train, 
                                                              loss_function=item['loss'],
                                                              output_dim=item['out_dim'],
                                                              num_layers =item['num_layers'],
                                                              hidden_dim=item['hid_dim'],
                                                              budget=item['epochs']*(item['num_mod']+1),
                                                              input_dim=item['inp_dim'],
                                                              lr =item['lr'],
                                                              p=item['p'],
                                                              grad =item['grad'])

        
        
        # Let's test Snap Star (new wrmp)
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float),
                                                       model=snap_star_newwarmup)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=snap_star_newwarmup)

        snap_star_newwarmup_scores = pd.DataFrame(data=[["Snap Star (new wrmp)",item['num_mod'], 
                                                         item['epochs'],round(test_mse[0],ndigits=5),
                                                         round(test_mae[0],ndigits=5),
                                                         round(test_mape[0],ndigits=5),
                                                         round(test_r2,ndigits=5),
                                                         round(train_mse[0],ndigits=5),
                                                         snap_star_newwarmup_time,
                                                         item['p'],item['lr']]],
                                                  columns=['Name','d','epochs', 'MSE', 'MAE',
                                                           'MAPE','R2 Square', 'TRAIN MSE','TIME',
                                                           'p','lr'])
        results = pd.concat([snap_star_newwarmup_scores,results],ignore_index=False)
        print('Snap Star (new wrmp) params:')
        print_convex_params(snap_star_newwarmup)     
    
    
    
        # Let's test Snap Star (shot wrmp)    
        _ = snap_star_shotwarmup.eval()
        print('Snap Star (shot wrmp) params:')
        print_convex_params(snap_star_shotwarmup)          
        train_mse,train_mape,train_mae,train_r2 = test(X_test = torch.tensor(X_train_val,dtype=torch.float), 
                                                       y_test=torch.tensor(y_train_val,dtype=torch.float),
                                                       model=snap_star_shotwarmup)
        test_mse,test_mape,test_mae,test_r2 = test(X_test = torch.tensor(X_test_val,dtype=torch.float),
                                                   y_test=torch.tensor(y_test_val,dtype=torch.float),
                                                   model=snap_star_shotwarmup)

        snap_star_shotwarmup_scores = pd.DataFrame(data=[["Snap Star (shot wrmp)",item['num_mod'], 
                                                          item['epochs'],round(test_mse[0],ndigits=5),
                                                          round(test_mae[0],ndigits=5),round(test_mape[0],ndigits=5),
                                                          round(test_r2,ndigits=5), round(train_mse[0],ndigits=5),
                                                          snap_star_shotwarmup_time,item['p'],item['lr']]],
                                                   columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square',
                                                          'TRAIN MSE','TIME','p','lr'])
        results = pd.concat([snap_star_shotwarmup_scores,results],ignore_index=False)



        # Let's test Snap Ensemble    
        _ = snap_ensemble.eval()
        train_mse,train_mape,train_mae,train_r2 = ensemble_test(X_test = torch.tensor(X_train_val,dtype=torch.float),
                                                                y_test=torch.tensor(y_train_val,dtype=torch.float), 
                                                                trained_list=snap_ensemble)
        test_mse,test_mape,test_mae,test_r2 = ensemble_test(X_test = torch.tensor(X_test_val,dtype=torch.float), 
                                                            y_test=torch.tensor(y_test_val,dtype=torch.float), 
                                                            trained_list=snap_ensemble)

        snap_ensemble_scores = pd.DataFrame(data=[["Snap Ensemble",item['num_mod'], item['epochs'],
                                                   round(test_mse[0],ndigits=5), round(test_mae[0],ndigits=5),
                                                   round(test_mape[0],ndigits=5), round(test_r2,ndigits=5), 
                                                   round(train_mse[0],ndigits=5),ensemble_time,
                                                   item['p'],item['lr']]],
                                            columns=['Name','d','epochs', 'MSE', 'MAE', 'MAPE','R2 Square',
                                                   'TRAIN MSE','TIME','p','lr'])
        results = pd.concat([snap_ensemble_scores,results],ignore_index=False) 

## Let's save our results

In [None]:
def build_table():
    t = pd.DataFrame(results.groupby(['Name','d','epochs','p','lr']).mean())
    t['MSE STD'] = results.groupby(['Name','d','epochs','p','lr'])['MSE'].std()
    t['MSE MIN'] = results.groupby(['Name','d','epochs','p','lr'])['MSE'].min()
    t['MSE MAX'] = results.groupby(['Name','d','epochs','p','lr'])['MSE'].max()    
    t = t[['MSE','MSE STD','MSE MIN','MSE MAX','MAE','MAPE', 'R2 Square','TRAIN MSE','TIME']]
    t = t.reset_index()
    t = t.sort_values(by=['d','epochs','p','lr','Name'])    
    t = t.round(3)    
    t['MSE'] = t['MSE'].apply(lambda x: "%.3f" % round(x, 3))
    t['MSE STD'] = t['MSE STD'].apply(lambda x: "%.3f" % round(x, 3))
    t['MAE'] = t['MAE'].apply(lambda x: "%.3f" % round(x, 3))
    t['MAPE'] = t['MAPE'].apply(lambda x: "%.3f" % round(x, 3))
    t['R2 Square'] = t['R2 Square'].apply(lambda x: "%.3f" % round(x, 3))
    t['TRAIN MSE'] = t['TRAIN MSE'].apply(lambda x: "%.3f" % round(x, 3))    
    t['MSE'] = t['MSE'].apply(str)+u"\u00B1"+t['MSE STD'].apply(str)    
    t = t [['Name','d','epochs','p','lr','MSE','MAE','MAPE', 'R2 Square','TRAIN MSE','TIME']]    
    return t

In [None]:
def save_table(t,epochs,p,lrs):
    for epoch in epochs:
        for i in p:
            for lr in lrs:
                rslt_df = t.loc[(t['p'] == i) &  (t['epochs']==epoch)&(t['lr']==lr)]
                rslt_df = rslt_df.sort_values(by = ['d','Name'])[::-1]
                rslt_df[['Name','d','MSE','MAE','MAPE', 'R2 Square','TRAIN MSE','TIME']].to_csv(f'final_res/BOSTON_ep({epoch})_p({i})_lr({lr}).csv',index=False)


In [None]:
table = build_table()

In [None]:
save_table(table,[30,75,150,200],[0.0,0.1],[0.1,0.01])

In [None]:
results.to_csv(f'final_res/BOSTON_results.csv',index=False)