In [1]:
import shutil
import apiquery
import pandas as pd
import sys
import seaborn as sns
import os
import numpy as np
import random
import torch
import gc
DATA_PATH = '../01.Data'
shutil.copy("apiquery_pyc.py", "apiquery.pyc")

module_path = "../src"
if module_path not in sys.path:
    sys.path.append(module_path)
    
from utils.training import *
from utils.encoding import *
from utils.utils import *
from utils.fetch import *
from dataset.dataset import BNPParibasText
from models.models import XLMRoberta
from utils.EarlyStopping import EarlyStopping
from utils.LoopFunctions import train_fn,valid_fn
from utils.prediction import get_prediction
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100
import math
from collections import Counter
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
import math
import time
import lightgbm as lgbm
import matplotlib.pyplot as plt
import torch.nn as nn
import config_xml
import transformers

In [2]:
%%time
df_train     = pd.read_csv(os.path.join("../01.Data",'fold.csv'))
y_submission = pd.read_csv(os.path.join(DATA_PATH,'y_test_submission_example.tsv'), index_col='Index', encoding='utf-8', sep='\t')

CPU times: user 2.24 s, sys: 256 ms, total: 2.49 s
Wall time: 2.49 s


In [3]:
def generate_col_unique(df,cols_encode):
    col_unique = ''
    suma       = ''
    for i in cols_encode:
        col_unique = col_unique + i + '_'
        suma = suma + ', '+df[i]

    df[col_unique] = suma
    return col_unique

In [4]:
def calc_oof(df,config):
    df.loc[:,'oof'] = -1
    for fold in np.sort(df.fold.unique()):
        print(f'Predicting Model: {fold}')
        valid       = df[df['fold']==fold]
        valid_index = valid.index.to_list()
        valid = valid.reset_index(drop=True)
        # Defining DataSet
        col_unique = generate_col_unique(valid,config.COLUMNS_ENCODE)
        tokenizer = transformers.XLMRobertaTokenizer.from_pretrained(config.PRETRAINED)
        valid_dataset = BNPParibasText(valid,config.MAX_LENGTH,tokenizer,col_unique)
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset,
            batch_size  = config.BATCH_SIZE,
            num_workers = config.NUM_WORKERS,
            shuffle     = False,
            pin_memory  = True,
        )

       # Defining Device
        model = XLMRoberta(pretrained_model=config.PRETRAINED,dropout = config.DROPOUT)
        model.load_state_dict(torch.load(f'../03.Models/BNP_PARIBAS_ROBERTA_MULTILINGUAL_FOLD_{fold}'))
        model.to(config.DEVICE)
        preds = get_prediction(valid_loader, model,config.DEVICE)
        df.loc[valid_index,'oof'] = preds
    oof_score = np.sqrt(mean_squared_error(df['target'],df['oof']))
    print('OOF_SCORE (RMSE): ',oof_score)
    return oof_score

In [5]:
# Calculating predictions for test
def calculate_test(test,config):
    col_unique = generate_col_unique(test,config.COLUMNS_ENCODE)   
    tokenizer     = transformers.XLMRobertaTokenizer.from_pretrained(config.PRETRAINED)
    test_dataset = BNPParibasText(test,config.MAX_LENGTH,tokenizer,col_unique)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size  = config.BATCH_SIZE,
        pin_memory  = True,
        num_workers = config.NUM_WORKERS
    )
    preds = 0
    for fold in range(0,5):
        model = XLMRoberta(pretrained_model=config.PRETRAINED,dropout = config.DROPOUT)
        model.load_state_dict(torch.load(f'../03.Models/BNP_PARIBAS_ROBERTA_MULTILINGUAL_FOLD_{fold}'))
        model.to(config.DEVICE)
        preds = preds + get_prediction(test_loader, model,config.DEVICE)
    test['preds'] = preds/5
    print(f'Real RMSE: ',math.sqrt(mean_squared_error(test['preds'].values,test['Target'].values)))

In [6]:
def run(data,fold,output_path,config,run=None):
    print(f'******************** Model Fold {fold}  *****************')
    seed_everything(seed=config.SEED)
    train = data[data['fold']!=fold].reset_index(drop=True)
    valid = data[data['fold']==fold].reset_index(drop=True)
    col_unique = generate_col_unique(train,config.COLUMNS_ENCODE)
    col_unique = generate_col_unique(valid,config.COLUMNS_ENCODE)
    
    
    print('Train: ',train.shape[0], 'Valid: ',valid.shape[0])
    # Defining DataSet
    tokenizer     = transformers.XLMRobertaTokenizer.from_pretrained(config.PRETRAINED)
    train_dataset = BNPParibasText(train,config.MAX_LENGTH,tokenizer,col_unique)
    valid_dataset = BNPParibasText(valid,config.MAX_LENGTH,tokenizer,col_unique)
        
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size  = config.BATCH_SIZE,
        pin_memory  = True,
        num_workers = config.NUM_WORKERS
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size  = config.BATCH_SIZE,
        num_workers = config.NUM_WORKERS,
        shuffle     = False,
        pin_memory  = True,
    )
    
    # Defining Device
    model = XLMRoberta(pretrained_model=config.PRETRAINED,dropout = config.DROPOUT)
    model.to(config.DEVICE)
    criterion = nn.MSELoss()
    criterion.to(config.DEVICE)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": config.WEIGHT_DECAY,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]

    num_train_steps = int((len(train) / config.BATCH_SIZE )* config.EPOCHS)
    config.SCHEDULER_PARAMETERS['NUM_TRAIN_STEPS'] = num_train_steps
    print(f'num_train_steps: {num_train_steps}')
    optimizer = fetch_optimizer(config.OPTIMIZER_NAME,config.LEARNING_RATE,optimizer_parameters)
    scheduler = fetch_scheduler(config.SCHEDULER_NAME,optimizer,config.SCHEDULER_PARAMETERS)
    
    es = EarlyStopping (patience = config.EARLY_STOPPING, mode = config.MODE,delta=0)
      
    for epoch in range(config.EPOCHS):
        print('Epoch {}, lr {}'.format(epoch, optimizer.param_groups[0]['lr']))        
        training_loss = train_fn(train_loader,model,criterion,optimizer,config.DEVICE,scheduler,mode_sched = config.MODE_SCHEDULER)
        valid_loss    = valid_fn(valid_loader,model,criterion,config.DEVICE)
        if run:
            run.log({'training_loss':training_loss,'valid_loss':valid_loss})
            
        es(valid_loss, model,output_path)
        
        if es.early_stop:
            print('Meet early stopping')
            return es.get_best_val_score()
        gc.collect()
        torch.cuda.empty_cache()
    print("Didn't meet early stopping")
    return es.get_best_val_score()

In [7]:
for i in range(0,5):
    output_path = f'../03.Models/BNP_PARIBAS_ROBERTA_MULTILINGUAL_FOLD_{i}'
    run(df_train,i,output_path,config_xml)

******************** Model Fold 0  *****************
Train:  81622 Valid:  20406


  0%|          | 0/1276 [00:00<?, ?it/s]

num_train_steps: 15304
Epoch 0, lr 0.0
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:21<00:00,  3.97it/s, LR=1.67e-5, Train_Loss=72.1]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 72.14168079071284


100%|██████████| 319/319 [00:27<00:00, 11.54it/s, Eval_Loss=34.2]


Validation -> Loss: 34.24715764395496
Validation score improved (inf --> 34.24715764395496). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 1, lr 1.6679738562091504e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.85e-5, Train_Loss=30.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 30.575736187094805


100%|██████████| 319/319 [00:28<00:00, 11.18it/s, Eval_Loss=26.7]


Validation -> Loss: 26.723944888219563
Validation score improved (34.24715764395496 --> 26.723944888219563). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 2, lr 1.8516044721939887e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.94it/s, LR=1.67e-5, Train_Loss=25.4]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 25.446724702572002


100%|██████████| 319/319 [00:29<00:00, 10.96it/s, Eval_Loss=25.6]


Validation -> Loss: 25.558437634411277
Validation score improved (26.723944888219563 --> 25.558437634411277). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 3, lr 1.6663278640917674e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=1.48e-5, Train_Loss=22.8]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 22.820410957904446


100%|██████████| 319/319 [00:28<00:00, 11.00it/s, Eval_Loss=26]  
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 25.988142964234548
EarlyStopping counter: 1 out of 5
Epoch 4, lr 1.4810512559895456e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.3e-5, Train_Loss=20.9] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 20.933106670559013


100%|██████████| 319/319 [00:28<00:00, 11.02it/s, Eval_Loss=25.2]


Validation -> Loss: 25.191499677197687
Validation score improved (25.558437634411277 --> 25.191499677197687). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 5, lr 1.2957746478873242e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=1.11e-5, Train_Loss=19.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 19.5825565874016


100%|██████████| 319/319 [00:29<00:00, 10.96it/s, Eval_Loss=25.2]


Validation -> Loss: 25.16567113930155
Validation score improved (25.191499677197687 --> 25.16567113930155). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 6, lr 1.1104980397851025e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=9.25e-6, Train_Loss=18.4]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 18.41613114440703


100%|██████████| 319/319 [00:29<00:00, 10.99it/s, Eval_Loss=24.8]


Validation -> Loss: 24.8374388105817
Validation score improved (25.16567113930155 --> 24.8374388105817). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 7, lr 9.252214316828807e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=7.4e-6, Train_Loss=17.5] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 17.47047094827909


100%|██████████| 319/319 [00:29<00:00, 10.95it/s, Eval_Loss=23.5]


Validation -> Loss: 23.525701809826316
Validation score improved (24.8374388105817 --> 23.525701809826316). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 8, lr 7.399448235806593e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=5.55e-6, Train_Loss=16.7]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.711096798364647


100%|██████████| 319/319 [00:28<00:00, 11.06it/s, Eval_Loss=23.3]


Validation -> Loss: 23.297262559490143
Validation score improved (23.525701809826316 --> 23.297262559490143). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 9, lr 5.546682154784376e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=3.69e-6, Train_Loss=16.1]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.11922672148035


100%|██████████| 319/319 [00:29<00:00, 10.94it/s, Eval_Loss=22.7]


Validation -> Loss: 22.69593770974856
Validation score improved (23.297262559490143 --> 22.69593770974856). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 10, lr 3.693916073762161e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=1.84e-6, Train_Loss=15.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.613884917994652


100%|██████████| 319/319 [00:29<00:00, 10.97it/s, Eval_Loss=22.2]


Validation -> Loss: 22.247629913043077
Validation score improved (22.69593770974856 --> 22.247629913043077). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 11, lr 1.841149992739945e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:24<00:00,  3.93it/s, LR=0, Train_Loss=15.3]      
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.273804533444228


100%|██████████| 319/319 [00:28<00:00, 11.09it/s, Eval_Loss=22.4]


Validation -> Loss: 22.424253230558293
EarlyStopping counter: 1 out of 5
Didn't meet early stopping
******************** Model Fold 1  *****************
Train:  81622 Valid:  20406


  0%|          | 0/1276 [00:00<?, ?it/s]

num_train_steps: 15304
Epoch 0, lr 0.0
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:25<00:00,  3.92it/s, LR=1.67e-5, Train_Loss=71.7]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 71.74262105409628


100%|██████████| 319/319 [00:28<00:00, 11.28it/s, Eval_Loss=35]  


Validation -> Loss: 35.04295958396409
Validation score improved (inf --> 35.04295958396409). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 1, lr 1.6679738562091504e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.85e-5, Train_Loss=30.3]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 30.271344462905933


100%|██████████| 319/319 [00:28<00:00, 11.28it/s, Eval_Loss=27.1]


Validation -> Loss: 27.070906172725476
Validation score improved (35.04295958396409 --> 27.070906172725476). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 2, lr 1.8516044721939887e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.67e-5, Train_Loss=25]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 24.958066819977237


100%|██████████| 319/319 [00:27<00:00, 11.42it/s, Eval_Loss=25.3]


Validation -> Loss: 25.26345479002567
Validation score improved (27.070906172725476 --> 25.26345479002567). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 3, lr 1.6663278640917674e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=1.48e-5, Train_Loss=22.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 22.601844803292924


100%|██████████| 319/319 [00:28<00:00, 11.33it/s, Eval_Loss=24.8]


Validation -> Loss: 24.767041257182633
Validation score improved (25.26345479002567 --> 24.767041257182633). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 4, lr 1.4810512559895456e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.3e-5, Train_Loss=20.9] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 20.86537601207865


100%|██████████| 319/319 [00:27<00:00, 11.41it/s, Eval_Loss=24.5]


Validation -> Loss: 24.50147569889559
Validation score improved (24.767041257182633 --> 24.50147569889559). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 5, lr 1.2957746478873242e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=1.11e-5, Train_Loss=19.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 19.511992819256918


100%|██████████| 319/319 [00:28<00:00, 11.38it/s, Eval_Loss=23.8]


Validation -> Loss: 23.839570153095877
Validation score improved (24.50147569889559 --> 23.839570153095877). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 6, lr 1.1104980397851025e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=9.25e-6, Train_Loss=18.3]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 18.285801823983746


100%|██████████| 319/319 [00:27<00:00, 11.41it/s, Eval_Loss=23.7]


Validation -> Loss: 23.66585559306847
Validation score improved (23.839570153095877 --> 23.66585559306847). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 7, lr 9.252214316828807e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=7.4e-6, Train_Loss=17.3] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 17.315743133939545


100%|██████████| 319/319 [00:28<00:00, 11.37it/s, Eval_Loss=23.4]


Validation -> Loss: 23.356474975059772
Validation score improved (23.66585559306847 --> 23.356474975059772). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 8, lr 7.399448235806593e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=5.55e-6, Train_Loss=16.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.61854158150365


100%|██████████| 319/319 [00:27<00:00, 11.46it/s, Eval_Loss=23.5]
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 23.53880213644811
EarlyStopping counter: 1 out of 5
Epoch 9, lr 5.546682154784376e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=3.69e-6, Train_Loss=16]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.954463119043451


100%|██████████| 319/319 [00:28<00:00, 11.38it/s, Eval_Loss=23.1]


Validation -> Loss: 23.09646807045772
Validation score improved (23.356474975059772 --> 23.09646807045772). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 10, lr 3.693916073762161e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=1.84e-6, Train_Loss=15.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.480697954990273


100%|██████████| 319/319 [00:28<00:00, 11.36it/s, Eval_Loss=23]  


Validation -> Loss: 22.976819418067095
Validation score improved (23.09646807045772 --> 22.976819418067095). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 11, lr 1.841149992739945e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=0, Train_Loss=15.1]      
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.127453786452362


100%|██████████| 319/319 [00:27<00:00, 11.42it/s, Eval_Loss=22.7]


Validation -> Loss: 22.697544011202726
Validation score improved (22.976819418067095 --> 22.697544011202726). Saving model!
Didn't meet early stopping
******************** Model Fold 2  *****************
Train:  81622 Valid:  20406


  0%|          | 0/1276 [00:00<?, ?it/s]

num_train_steps: 15304
Epoch 0, lr 0.0
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.67e-5, Train_Loss=73]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 73.01953844291663


100%|██████████| 319/319 [00:27<00:00, 11.44it/s, Eval_Loss=33.8]


Validation -> Loss: 33.83570984986882
Validation score improved (inf --> 33.83570984986882). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 1, lr 1.6679738562091504e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.85e-5, Train_Loss=30]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 30.00392857240659


100%|██████████| 319/319 [00:27<00:00, 11.58it/s, Eval_Loss=26.6]


Validation -> Loss: 26.635949239461773
Validation score improved (33.83570984986882 --> 26.635949239461773). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 2, lr 1.8516044721939887e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=1.67e-5, Train_Loss=25]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 24.98349136058066


100%|██████████| 319/319 [00:27<00:00, 11.50it/s, Eval_Loss=24.4]


Validation -> Loss: 24.383064945663403
Validation score improved (26.635949239461773 --> 24.383064945663403). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 3, lr 1.6663278640917674e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.48e-5, Train_Loss=22.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 22.562884160717452


100%|██████████| 319/319 [00:27<00:00, 11.48it/s, Eval_Loss=24.6]
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 24.613327555521902
EarlyStopping counter: 1 out of 5
Epoch 4, lr 1.4810512559895456e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.3e-5, Train_Loss=20.7] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 20.701737792140637


100%|██████████| 319/319 [00:27<00:00, 11.52it/s, Eval_Loss=24.1]


Validation -> Loss: 24.12882026983279
Validation score improved (24.383064945663403 --> 24.12882026983279). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 5, lr 1.2957746478873242e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=1.11e-5, Train_Loss=19.3]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 19.321181885501062


100%|██████████| 319/319 [00:27<00:00, 11.46it/s, Eval_Loss=23.5]


Validation -> Loss: 23.450917118395385
Validation score improved (24.12882026983279 --> 23.450917118395385). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 6, lr 1.1104980397851025e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=9.25e-6, Train_Loss=18.2]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 18.244385788059535


100%|██████████| 319/319 [00:28<00:00, 11.37it/s, Eval_Loss=23.1]


Validation -> Loss: 23.06654219791807
Validation score improved (23.450917118395385 --> 23.06654219791807). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 7, lr 9.252214316828807e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=7.4e-6, Train_Loss=17.4] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 17.35863978081736


100%|██████████| 319/319 [00:27<00:00, 11.47it/s, Eval_Loss=22.8]


Validation -> Loss: 22.781064682246
Validation score improved (23.06654219791807 --> 22.781064682246). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 8, lr 7.399448235806593e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=5.55e-6, Train_Loss=16.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.56329779864105


100%|██████████| 319/319 [00:27<00:00, 11.46it/s, Eval_Loss=22.6]


Validation -> Loss: 22.55104812335071
Validation score improved (22.781064682246 --> 22.55104812335071). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 9, lr 5.546682154784376e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=3.69e-6, Train_Loss=16]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.000642076181393


100%|██████████| 319/319 [00:27<00:00, 11.58it/s, Eval_Loss=22]  


Validation -> Loss: 21.991469245719312
Validation score improved (22.55104812335071 --> 21.991469245719312). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 10, lr 3.693916073762161e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.84e-6, Train_Loss=15.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.524369444219296


100%|██████████| 319/319 [00:27<00:00, 11.50it/s, Eval_Loss=21.9]


Validation -> Loss: 21.94037721747515
Validation score improved (21.991469245719312 --> 21.94037721747515). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 11, lr 1.841149992739945e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=0, Train_Loss=15.2]      
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.183205813543177


100%|██████████| 319/319 [00:27<00:00, 11.55it/s, Eval_Loss=22]  


Validation -> Loss: 21.97310228258091
EarlyStopping counter: 1 out of 5
Didn't meet early stopping
******************** Model Fold 3  *****************
Train:  81623 Valid:  20405


  0%|          | 0/1276 [00:00<?, ?it/s]

num_train_steps: 15304
Epoch 0, lr 0.0
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=1.67e-5, Train_Loss=72.2]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 72.21252906883025


100%|██████████| 319/319 [00:28<00:00, 11.31it/s, Eval_Loss=35.8]


Validation -> Loss: 35.7909181289912
Validation score improved (inf --> 35.7909181289912). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 1, lr 1.6679738562091504e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.85e-5, Train_Loss=30]  
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 29.983493896113668


100%|██████████| 319/319 [00:27<00:00, 11.61it/s, Eval_Loss=27.8]


Validation -> Loss: 27.773884285953724
Validation score improved (35.7909181289912 --> 27.773884285953724). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 2, lr 1.8516044721939887e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=1.67e-5, Train_Loss=24.9]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 24.89728327305713


100%|██████████| 319/319 [00:27<00:00, 11.67it/s, Eval_Loss=26.1]


Validation -> Loss: 26.063523627373865
Validation score improved (27.773884285953724 --> 26.063523627373865). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 3, lr 1.6663278640917674e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.48e-5, Train_Loss=22.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 22.53069764469111


100%|██████████| 319/319 [00:27<00:00, 11.51it/s, Eval_Loss=25.1]


Validation -> Loss: 25.134219023127542
Validation score improved (26.063523627373865 --> 25.134219023127542). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 4, lr 1.4810512559895456e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.3e-5, Train_Loss=20.7] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 20.71786942676317


100%|██████████| 319/319 [00:27<00:00, 11.54it/s, Eval_Loss=24.6]


Validation -> Loss: 24.591851587205845
Validation score improved (25.134219023127542 --> 24.591851587205845). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 5, lr 1.2957746478873242e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=1.11e-5, Train_Loss=19.2]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 19.21526801847739


100%|██████████| 319/319 [00:27<00:00, 11.60it/s, Eval_Loss=24.3]


Validation -> Loss: 24.26546380512393
Validation score improved (24.591851587205845 --> 24.26546380512393). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 6, lr 1.1104980397851025e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=9.25e-6, Train_Loss=18.2]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 18.150394995384456


100%|██████████| 319/319 [00:27<00:00, 11.56it/s, Eval_Loss=23.8]


Validation -> Loss: 23.801059809598055
Validation score improved (24.26546380512393 --> 23.801059809598055). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 7, lr 9.252214316828807e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=7.4e-6, Train_Loss=17.2] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 17.231132318981015


100%|██████████| 319/319 [00:27<00:00, 11.57it/s, Eval_Loss=23.6]


Validation -> Loss: 23.64374775722109
Validation score improved (23.801059809598055 --> 23.64374775722109). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 8, lr 7.399448235806593e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=5.55e-6, Train_Loss=16.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.5570808779857


100%|██████████| 319/319 [00:27<00:00, 11.62it/s, Eval_Loss=24]  
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 24.00534340131993
EarlyStopping counter: 1 out of 5
Epoch 9, lr 5.546682154784376e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=3.69e-6, Train_Loss=15.9]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.93255532535266


100%|██████████| 319/319 [00:27<00:00, 11.53it/s, Eval_Loss=23.4]


Validation -> Loss: 23.38432802301963
Validation score improved (23.64374775722109 --> 23.38432802301963). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 10, lr 3.693916073762161e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.84e-6, Train_Loss=15.4]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.381379615176808


100%|██████████| 319/319 [00:27<00:00, 11.57it/s, Eval_Loss=23.2]


Validation -> Loss: 23.226445924525724
Validation score improved (23.38432802301963 --> 23.226445924525724). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 11, lr 1.841149992739945e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=0, Train_Loss=15]        
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.021322443567474


100%|██████████| 319/319 [00:27<00:00, 11.50it/s, Eval_Loss=23.1]


Validation -> Loss: 23.124623618529508
Validation score improved (23.226445924525724 --> 23.124623618529508). Saving model!
Didn't meet early stopping
******************** Model Fold 4  *****************
Train:  81623 Valid:  20405


  0%|          | 0/1276 [00:00<?, ?it/s]

num_train_steps: 15304
Epoch 0, lr 0.0
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.94it/s, LR=1.67e-5, Train_Loss=73.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 73.5010647026349


100%|██████████| 319/319 [00:27<00:00, 11.51it/s, Eval_Loss=33.5]


Validation -> Loss: 33.457854507111456
Validation score improved (inf --> 33.457854507111456). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 1, lr 1.6679738562091504e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.85e-5, Train_Loss=30.3]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 30.33029447977072


100%|██████████| 319/319 [00:27<00:00, 11.47it/s, Eval_Loss=26.8]


Validation -> Loss: 26.768773853218295
Validation score improved (33.457854507111456 --> 26.768773853218295). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 2, lr 1.8516044721939887e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.67e-5, Train_Loss=25.3]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 25.29649917533779


100%|██████████| 319/319 [00:27<00:00, 11.44it/s, Eval_Loss=25.3]


Validation -> Loss: 25.28253132870952
Validation score improved (26.768773853218295 --> 25.28253132870952). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 3, lr 1.6663278640917674e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.48e-5, Train_Loss=22.9]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 22.940265870393258


100%|██████████| 319/319 [00:27<00:00, 11.52it/s, Eval_Loss=25.9]
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 25.91538207433814
EarlyStopping counter: 1 out of 5
Epoch 4, lr 1.4810512559895456e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.3e-5, Train_Loss=21]   
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 21.03903352018434


100%|██████████| 319/319 [00:27<00:00, 11.42it/s, Eval_Loss=24.9]


Validation -> Loss: 24.935529646081235
Validation score improved (25.28253132870952 --> 24.935529646081235). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 5, lr 1.2957746478873242e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=1.11e-5, Train_Loss=19.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 19.62182917789232


100%|██████████| 319/319 [00:27<00:00, 11.54it/s, Eval_Loss=23.7]


Validation -> Loss: 23.736058946687226
Validation score improved (24.935529646081235 --> 23.736058946687226). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 6, lr 1.1104980397851025e-05
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=9.25e-6, Train_Loss=18.5]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 18.479689816322445


100%|██████████| 319/319 [00:27<00:00, 11.52it/s, Eval_Loss=22.7]


Validation -> Loss: 22.674996692932513
Validation score improved (23.736058946687226 --> 22.674996692932513). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 7, lr 9.252214316828807e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=7.4e-6, Train_Loss=17.5] 
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 17.497332436911364


100%|██████████| 319/319 [00:27<00:00, 11.43it/s, Eval_Loss=22.7]
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 22.71947129467812
EarlyStopping counter: 1 out of 5
Epoch 8, lr 7.399448235806593e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.95it/s, LR=5.55e-6, Train_Loss=16.7]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.73222555486386


100%|██████████| 319/319 [00:28<00:00, 11.31it/s, Eval_Loss=23]  
  0%|          | 0/1276 [00:00<?, ?it/s]

Validation -> Loss: 23.018047063701953
EarlyStopping counter: 2 out of 5
Epoch 9, lr 5.546682154784376e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=3.69e-6, Train_Loss=16.1]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 16.081167665768568


100%|██████████| 319/319 [00:27<00:00, 11.53it/s, Eval_Loss=22.4]


Validation -> Loss: 22.371608115288904
Validation score improved (22.674996692932513 --> 22.371608115288904). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 10, lr 3.693916073762161e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:23<00:00,  3.95it/s, LR=1.84e-6, Train_Loss=15.6]
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.59901050004092


100%|██████████| 319/319 [00:27<00:00, 11.47it/s, Eval_Loss=22.4]


Validation -> Loss: 22.360721881113083
Validation score improved (22.371608115288904 --> 22.360721881113083). Saving model!


  0%|          | 0/1276 [00:00<?, ?it/s]

Epoch 11, lr 1.841149992739945e-06
Mode Scheduler: OK


100%|██████████| 1276/1276 [05:22<00:00,  3.96it/s, LR=0, Train_Loss=15.2]      
  0%|          | 0/319 [00:00<?, ?it/s]

Training -> Loss: 15.228412944134499


100%|██████████| 319/319 [00:27<00:00, 11.52it/s, Eval_Loss=22.5]


Validation -> Loss: 22.469302141554305
EarlyStopping counter: 1 out of 5
Didn't meet early stopping


In [9]:
calc_oof(df_train,config_xml)

Predicting Model: 0


HBox(children=(FloatProgress(value=0.0, max=319.0), HTML(value='')))


Predicting Model: 1


HBox(children=(FloatProgress(value=0.0, max=319.0), HTML(value='')))


Predicting Model: 2


HBox(children=(FloatProgress(value=0.0, max=319.0), HTML(value='')))


Predicting Model: 3


HBox(children=(FloatProgress(value=0.0, max=319.0), HTML(value='')))


Predicting Model: 4


HBox(children=(FloatProgress(value=0.0, max=319.0), HTML(value='')))


OOF_SCORE (RMSE):  4.7406423398095985


4.7406423398095985

In [10]:
test           = pd.read_csv(os.path.join(DATA_PATH,'test_preprocessed.csv'))
test['target'] = -1
calculate_test(test,config_xml)

HBox(children=(FloatProgress(value=0.0, max=399.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=399.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=399.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=399.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=399.0), HTML(value='')))


Real RMSE:  4.6459127404990275


## Combining Word Embeddings with More Features

In [None]:
def get_embedding(data_loader, model, device):
    from tqdm.notebook import tqdm
    # Put the model in eval mode
    model.to(device)
    model.eval()
    # List for store final predictions
    final_predictions = []
    with torch.no_grad():
        tk0 = tqdm(data_loader, total=len(data_loader))
        for b_idx, data in enumerate(tk0):
            for key,value in data.items():
                data[key] = value.to(device)
            predictions = model._embeddings(data['ids'],data['mask'])
            predictions = predictions.cpu()
            final_predictions.append(predictions)
    return np.vstack(final_predictions)

In [None]:
col_unique = generate_col_unique(df_train,config.COLUMNS_ENCODE)
tokenizer     = transformers.RobertaTokenizer.from_pretrained(config.PRETRAINED)
train_dataset = BNPParibasText(df_train,config.MAX_LENGTH,tokenizer,col_unique)
train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size  = 32,
            pin_memory  = True,
            num_workers = 72
        )
embedding_all = 0
for fold in np.sort(df_train['fold'].unique()):
    model         = Roberta_Model(pretrained_model=config.PRETRAINED)
    model.load_state_dict(torch.load(f'../03.Models/BNP_PARIBAS_ROBERTA_FOLD_{fold}'))
    embedding_all += get_embedding(train_loader, model, 'cuda')
    del model
    torch.cuda.empty_cache()
embedding_all = embedding_all/len(df_train['fold'].unique())
df_train[[f'emb_{col_unique}_{i}' for i in range(embedding_all.shape[1])]] = embedding_all

In [None]:
df_test      = pd.read_csv(os.path.join(DATA_PATH,'test_preprocessed.csv'))
df_test['target'] = -1

col_unique = generate_col_unique(df_test,config.COLUMNS_ENCODE)
tokenizer     = transformers.RobertaTokenizer.from_pretrained(config.PRETRAINED)
test_dataset = BNPParibasText(df_test,config.MAX_LENGTH,tokenizer,col_unique)
test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size  = 32,
            pin_memory  = True,
            num_workers = 72
        )
embedding_all = 0
for fold in np.sort(df_train['fold'].unique()):
    model         = Roberta_Model(pretrained_model=config.PRETRAINED)
    model.load_state_dict(torch.load(f'../03.Models/BNP_PARIBAS_ROBERTA_FOLD_{fold}'))
    embedding_all += get_embedding(test_loader, model, 'cuda')
    del model
    torch.cuda.empty_cache()
embedding_all = embedding_all/len(df_train['fold'].unique())
df_test[[f'emb_{col_unique}_{i}' for i in range(embedding_all.shape[1])]] = embedding_all

In [None]:
columns_modeling = ['additives_n','ingredients_from_palm_oil_n',
                    'ingredients_that_may_be_from_palm_oil_n','target',
                    'states_en_brands','states_en_categories','states_en_characteristics','states_en_expiration date',
                    'states_en_general_complete','states_en_ingredients','pnns_groups_1','pnns_groups_2',
                    'states_en_packaging','states_en_packaging-code-','states_en_photo_upload',
                    'states_en_photo_validate','states_en_product name','states_en_quantity','diff_t'] + [f'emb_{col_unique}_{i}' for i in range(embedding_all.shape[1])]
columns_label = df_train[columns_modeling].select_dtypes(include=['object']).columns.to_list()
print(columns_label)

In [None]:
df_train,dict_le = label_encoding(df_train,label_cols = columns_label, drop_original = True, missing_new_cat = True)

In [None]:
df_test = apply_label_encoder(df_test,dict_le,drop_original = True, missing_new_cat = True)

In [None]:
params = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'metric': {'rmse'},
        'num_leaves':12,
        'learning_rate': 0.001,
        "min_child_samples": 150,
        "max_depth" : 5,
        'feature_fraction':  0.5,
        "bagging_freq": 1,
        'bagging_fraction': 0.75,
        "is_unbalance" : False,
        'force_col_wise':True,
        'num_threads':18,
        #"scale_pos_weight":5 -> Generally  is the ratio of number of negative class to the positive class.
        'bagging_seed':42,
        'lambda_l1':1.5,
        'lambda_l2':1,
        'verbose': 1

}
cat_columns = [i for i in df_train.columns.to_list() if i.startswith('label_')]
columns_modeling_last = list(set(columns_modeling)-set(columns_label)) + ['fold'] + cat_columns 

In [None]:
results,models,importances,oof,feature_list = Training_Lightgbm(df_train[columns_modeling_last],params,fold_column = 'fold',target_column = 'target',cat_vars = cat_columns ,metric = 'RMSE',early_stopping = 200,max_boost_round = 8000)

In [None]:
probs = 0
for i in models:
    probs = probs + (i.predict(df_test[feature_list]))
    print('fin_predict')
y_test_pred = probs/5.0
print(f'Real: ',math.sqrt(mean_squared_error(y_test_pred,df_test['Target'].values)))