# Import

In [1]:
import os
import gc
import copy
import re
import time
import random
import string
import warnings

warnings.filterwarnings("ignore")
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader


from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

from bs4 import BeautifulSoup
from tqdm import tqdm
from collections import defaultdict
import copy
from copy import deepcopy

import nltk
# from nltk.stem import SnowballStemmer, WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer

from transformers import AutoTokenizer, AutoModel, AdamW

from colorama import Fore, Back, Style
b_ = Fore.BLUE
y_ = Fore.YELLOW
sr_ = Style.RESET_ALL

# id_generator

In [2]:
def id_generator(size = 12, chars = string.ascii_lowercase + string.digits):
    return ''.join(random.SystemRandom().choice(chars) for _ in range(size))

HASH_NAME = id_generator(size = 12)
print(HASH_NAME)

spi33b520z76


# Config

In [3]:
CONFIG = {"seed": 2021,
          "epochs": 10,
#           "model_name": "GroNLP/hateBERT",
          "model_name": "roberta-base",
          "train_batch_size": 32,
          "valid_batch_size": 64,
          "max_length": 128,
          "learning_rate": 1e-4,
          "scheduler": 'CosineAnnealingLR',
          "min_lr": 1e-6,
          "T_max": 500,
          "weight_decay": 1e-6,
          "n_fold": 5,
          "n_accumulate": 1,
          "num_classes": 1,
          "margin": 0.5,
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          "hash_name": HASH_NAME
          }

CONFIG["tokenizer"] = AutoTokenizer.from_pretrained(CONFIG['model_name'])
CONFIG['group'] = f'{HASH_NAME}-Baseline'

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

# Random Seed

In [4]:
def set_seed(seed = 42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deteministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(CONFIG['seed'])

# Data

In [5]:
df = pd.read_csv('../input/ruddit-jigsaw-dataset/Dataset/ruddit_with_text.csv')
print(df.shape)
df.head(10)

(5838, 5)


Unnamed: 0,post_id,comment_id,txt,url,offensiveness_score
0,42g75o,cza1q49,> The difference in average earnings between m...,https://www.reddit.com/r/changemyview/comments...,-0.083
1,42g75o,cza1wdh,"The myth is that the ""gap"" is entirely based o...",https://www.reddit.com/r/changemyview/comments...,-0.022
2,42g75o,cza23qx,[deleted],https://www.reddit.com/r/changemyview/comments...,0.167
3,42g75o,cza2bw8,The assertion is that women get paid less for ...,https://www.reddit.com/r/changemyview/comments...,-0.146
4,42g75o,cza2iji,You said in the OP that's not what they're mea...,https://www.reddit.com/r/changemyview/comments...,-0.083
5,42g75o,cza2jj3,>Men and women are not payed less for the same...,https://www.reddit.com/r/changemyview/comments...,-0.042
6,42g75o,cza31e2,"> All the wage gap is, is an average of total ...",https://www.reddit.com/r/changemyview/comments...,-0.021
7,42g75o,cza321d,[deleted],https://www.reddit.com/r/changemyview/comments...,-0.021
8,42g75o,cza336e,[deleted],https://www.reddit.com/r/changemyview/comments...,0.208
9,42g75o,cza34dq,[deleted],https://www.reddit.com/r/changemyview/comments...,-0.191


In [6]:
# # Give more weight to severe toxic 
# df['severe_toxic'] = df.severe_toxic * 10
# df['toxic'] = df.toxic * 6 
# df['y'] = (df[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1) ).astype(int)
# df['y'] = df['y']/df['y'].max()

# df = df[['comment_text', 'y']].rename(columns={'comment_text': 'text'})
# df.sample(5)

In [7]:
df = df.dropna()
df = df[['txt', 'offensiveness_score']]
df.columns = ['text', 'score']
df.head()

Unnamed: 0,text,score
0,> The difference in average earnings between m...,-0.083
1,"The myth is that the ""gap"" is entirely based o...",-0.022
2,[deleted],0.167
3,The assertion is that women get paid less for ...,-0.146
4,You said in the OP that's not what they're mea...,-0.083


In [8]:
df.score.value_counts()

-0.021    166
 0.000    154
-0.167    139
-0.125    130
-0.208    127
         ... 
-0.044      1
-0.761      1
 0.422      1
-0.543      1
 0.578      1
Name: score, Length: 288, dtype: int64

In [9]:
# import numpy as np
# import pandas as pd

# df1 = pd.read_csv('../input/ruddit-jigsaw-dataset-combined-cleaned/toxic_train.csv')
# df1 = df1[['txt', 'offensiveness_score']]
# df1.columns = ['text', 'score']
# print(df1.shape)
# df1.head()

In [10]:
# from copy import deepcopy
# print(df1.loc[df1.score!=0.0].shape)
# df2 = deepcopy(df1.loc[df1.score!=0.0])
# df2.head()

In [11]:
# from copy import deepcopy
# df3 = deepcopy(df1.loc[df1.score == 0])
# print(df3.shape)
# df3.head()

In [12]:
# df = pd.concat([df, df2],axis = 0)
# df = df.reset_index(drop=True)
# print(df.shape)
# df.tail()

# Preprocessing

In [13]:
# nltk.download('stopwords')
# STOPWORDS = nltk.corpus.stopwords.words('english')

# ## kesha_mandal's code
# def washing(comment):

#     comment = re.sub('[^a-zA-Z]', ' ', comment)
#     comment = comment.lower()
#     comment = comment.split()
#     stemmer = SnowballStemmer('english')
#     lemmatizer = WordNetLemmatizer()
#     comment = [stemmer.stem(word) for word in comment if not word in set(STOPWORDS)]
#     comment = [lemmatizer.lemmatize(word) for word in comment]
#     comment = ' '.join(comment)
#     # corpus.append(comment)
#     # return corpus
#     return comment


In [14]:
# ##  https://www.kaggle.com/manabendrarout/pytorch-roberta-ranking-baseline-jrstc-infer/notebook
# def text_cleaning(text):
    
#     template = re.compile(r'https?://\S+|www\.\S+') #Removes website links
#     text = template.sub(r'', text)
    
#     soup = BeautifulSoup(text, 'lxml') #Removes HTML tags
#     only_text = soup.get_text()
#     text = only_text
    
#     emoji_pattern = re.compile("["
#                                u"\U0001F600-\U0001F64F"  # emoticons
#                                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
#                                u"\U0001F680-\U0001F6FF"  # transport & map symbols
#                                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
#                                u"\U00002702-\U000027B0"
#                                u"\U000024C2-\U0001F251"
#                                "]+", flags=re.UNICODE)
#     text = emoji_pattern.sub(r'', text)
    
#     text = re.sub(r"[^a-zA-Z\d]", " ", text) #Remove special Charecters
#     text = re.sub(' +', ' ', text) #Remove Extra Spaces
#     text = text.strip() # remove spaces at the beginning and at the end of string

#     return text

In [15]:
## https://www.kaggle.com/kishalmandal/most-detailed-eda-tf-idf-and-logistic-reg

# df["less_toxic"] = df["less_toxic"].str.replace('fk', 'fuck')
# df["less_toxic"] = df["less_toxic"].str.replace('fuk', 'fuck')

# df.head(10)

In [16]:
# df['less_toxic'] = df['less_toxic'].apply(text_cleaning)
# df['more_toxic'] = df['more_toxic'].apply(text_cleaning)

# df.head(10)

In [17]:
# df['less_toxic'] = df['less_toxic'].apply(washing)
# df['more_toxic'] = df['more_toxic'].apply(washing)

# df.head(10)

# KFold

In [18]:
k = CONFIG['n_fold']
skf = KFold(n_splits = k, shuffle = True, random_state = CONFIG['seed'])
for fold, (k, v) in enumerate(skf.split(X = df)):
    df.loc[v, 'kfold'] = int(fold)

df['kfold'] = df['kfold'].astype(int)
df.head()

Unnamed: 0,text,score,kfold
0,> The difference in average earnings between m...,-0.083,0
1,"The myth is that the ""gap"" is entirely based o...",-0.022,0
2,[deleted],0.167,4
3,The assertion is that women get paid less for ...,-0.146,1
4,You said in the OP that's not what they're mea...,-0.083,2


# Dataset

In [19]:
class JDataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        
        self.df = df
        self.max_len = max_length
        self.tokenizer = tokenizer
        self.text = df['text'].values
        self.score = df['score']
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(text, truncation = True,
                                            add_special_tokens = True, 
                                            max_length = self.max_len,
                                            padding = 'max_length')
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        target = self.score[index]
        
        return {'ids' : torch.tensor(ids, dtype = torch.long), 
                'mask' : torch.tensor(mask, dtype = torch.long),
                'target' : torch.tensor(target, dtype = torch.float)
               }


# prepare_loader Function

In [20]:
def prepare_loaders(fold):
    
    df_train = df[df.kfold != fold].reset_index(drop = True)
    df_valid = df[df.kfold == fold].reset_index(drop = True)
    
    train_dataset = JDataset(df_train, tokenizer = CONFIG['tokenizer'], max_length = CONFIG['max_length'])
    valid_dataset = JDataset(df_valid, tokenizer = CONFIG['tokenizer'], max_length = CONFIG['max_length'])
    
    train_loader = DataLoader(train_dataset, 
                              batch_size = CONFIG['train_batch_size'],
                              num_workers = os.cpu_count(),
                              shuffle = True, 
                              pin_memory = True,
                              drop_last = True)
    
    valid_loader = DataLoader(valid_dataset, 
                              batch_size = CONFIG['train_batch_size'],
                              num_workers = os.cpu_count(),
                              shuffle = False,
                              pin_memory = True)
    
    return train_loader, valid_loader

# Model

In [21]:
class Model(nn.Module):
    
    def __init__(self, model_name):
        super().__init__()
        self.model = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(p = 0.2)
        self.linear = nn.Linear(768, CONFIG['num_classes'])
#         self.sigmoid = nn.Sigmoid()
    
    def forward(self, ids, mask):
        model_out = self.model(input_ids = ids,
                               attention_mask = mask,
                               output_hidden_states = False)
        
        out = self.dropout(model_out[1])
        output = self.linear(out)
#         outputs = self.sigmoid(output)
        return output

# Loss Function

In [22]:
loss_fn = nn.MSELoss()
# loss_fn = nn.BCELoss()
# loss_fn= nn.BCEWithLogitsLoss()

# Train one Epoch Function

In [23]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    
    model.train()
    
    dataset_size = 0
    running_loss = 0.
    
    bar = tqdm(enumerate(dataloader), total = len(dataloader))
    
    for step, data in bar:
        
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        
        targets = data['target'].to(device, dtype = torch.float)
        targets= targets.reshape(-1, 1)
        
        batch_size = ids.size(0)
        
        outputs = model(ids, mask)
        
        loss = loss_fn(outputs, targets)
        loss = loss / CONFIG['n_accumulate']
        loss.backward()
        
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            optimizer.step()
            optimizer.zero_grad()
            
            if scheduler is not None:
                scheduler.step()
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        bar.set_postfix(Epoch = epoch, Train_Loss = epoch_loss, LR = optimizer.param_groups[0]['lr'])
        
    gc.collect()
    
    return epoch_loss

# Validation Function

In [24]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    
    model.eval()
    
    dataset_size = 0
    running_loss = 0.
    
    bar = tqdm(enumerate(dataloader), total = len(dataloader))
    
    for step, data in bar:
        
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        
        targets = data['target'].to(device, dtype = torch.float)
        targets= targets.reshape(-1, 1)
        batch_size = ids.size(0)
        
        outputs = model(ids, mask)
#         outputs
        loss = loss_fn(outputs, targets)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        bar.set_postfix(Epoch = epoch, Train_Loss = epoch_loss, LR = optimizer.param_groups[0]['lr'])
        
    gc.collect()
    
    return epoch_loss

# Run Training Function

In [25]:
def run_training(model, optimizer, scheduler, device, num_epochs, fold):
    
    if torch.cuda.is_available():
        print(f"Using GPU: {torch.cuda.get_device_name()}")
        print()
        
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_loss = np.inf
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1):
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, dataloader = train_loader, device = CONFIG['device'], epoch = epoch)
        valid_epoch_loss = valid_one_epoch(model, dataloader = valid_loader, device = CONFIG['device'], epoch = epoch)
        
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(valid_epoch_loss)
        
        if valid_epoch_loss <= best_epoch_loss:
            print(f"{b_} Validation Loss Improved: [{best_epoch_loss} ---> {valid_epoch_loss}]")
            best_epoch_loss = valid_epoch_loss
#             run.summary['Best Loss'] = best_epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"Loss-Fold-{fold}.bin"
            torch.save(model.state_dict(), PATH)
            print(f"Model Saved{sr_}")
            
        print()
        
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Loss: {:.4f}".format(best_epoch_loss))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

# fetch_scheduler function

In [26]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, 
                                                   T_max = CONFIG['T_max'],
                                                   eta_min = CONFIG['min_lr'])
        
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
                                                             T_0 = CONFIG['T_0'],
                                                             eta_min = CONFIG['min_lr'])
        
    
    elif CONFIG['scheduler'] == None:
        return None
    
    return scheduler

# Let's Try Run

In [27]:
foldss = CONFIG['n_fold']

for fold in range(0, foldss):

    print(f"{y_}===== Fold: {fold} ====={sr_}")
    
    train_loader, valid_loader = prepare_loaders(fold = fold)
    
    model = Model(CONFIG['model_name'])
    model.to(CONFIG['device'])
    
    optimizer = AdamW(model.parameters(), lr = CONFIG['learning_rate'], weight_decay = CONFIG['weight_decay'])
    scheduler = fetch_scheduler(optimizer)
    
    model, history = run_training(model,
                                  optimizer,
                                  scheduler,
                                  device = CONFIG['device'],
                                  num_epochs = CONFIG['epochs'],
                                  fold = fold)
    
    del model, train_loader, valid_loader
    gc.collect()
    print()

[33m===== Fold: 0 =====[0m


Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.118]
100%|██████████| 37/37 [00:05<00:00,  6.85it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0882]


[34m Validation Loss Improved: [inf ---> 0.08819059553007556]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.112]
100%|██████████| 37/37 [00:05<00:00,  7.14it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0771]


[34m Validation Loss Improved: [0.08819059553007556 ---> 0.07707314509643268]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.22it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0681]
100%|██████████| 37/37 [00:05<00:00,  7.03it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.05]


[34m Validation Loss Improved: [0.07707314509643268 ---> 0.0500470710127321]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.20it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0489]
100%|██████████| 37/37 [00:05<00:00,  7.13it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0439]


[34m Validation Loss Improved: [0.0500470710127321 ---> 0.04393011469342937]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.048]
100%|██████████| 37/37 [00:05<00:00,  7.14it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0478]





100%|██████████| 145/145 [01:05<00:00,  2.22it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0436]
100%|██████████| 37/37 [00:05<00:00,  6.48it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0391]


[34m Validation Loss Improved: [0.04393011469342937 ---> 0.039109795724284156]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0438]
100%|██████████| 37/37 [00:05<00:00,  7.15it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0485]





100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.047]
100%|██████████| 37/37 [00:05<00:00,  7.00it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0402]





100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0276]
100%|██████████| 37/37 [00:05<00:00,  7.09it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0457]





100%|██████████| 145/145 [01:06<00:00,  2.20it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0216]
100%|██████████| 37/37 [00:05<00:00,  6.56it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0375]


[34m Validation Loss Improved: [0.039109795724284156 ---> 0.03746478637195613]
Model Saved[0m

Training complete in 0h 12m 5s
Best Loss: 0.0375

[33m===== Fold: 1 =====[0m
Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0706]
100%|██████████| 37/37 [00:05<00:00,  6.95it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0396]


[34m Validation Loss Improved: [inf ---> 0.03961909751165403]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0347]
100%|██████████| 37/37 [00:05<00:00,  7.12it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0324]


[34m Validation Loss Improved: [0.03961909751165403 ---> 0.03241206674951397]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0286]
100%|██████████| 37/37 [00:05<00:00,  6.90it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0282]


[34m Validation Loss Improved: [0.03241206674951397 ---> 0.028225470334291458]
Model Saved[0m



100%|██████████| 145/145 [01:05<00:00,  2.21it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0225]
100%|██████████| 37/37 [00:05<00:00,  6.98it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0285]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0231]
100%|██████████| 37/37 [00:05<00:00,  7.10it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0293]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0269]
100%|██████████| 37/37 [00:05<00:00,  6.86it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0356]





100%|██████████| 145/145 [01:06<00:00,  2.20it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0246]
100%|██████████| 37/37 [00:05<00:00,  7.15it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0292]





100%|██████████| 145/145 [01:06<00:00,  2.20it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0193]
100%|██████████| 37/37 [00:05<00:00,  6.66it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0307]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0149]
100%|██████████| 37/37 [00:05<00:00,  6.96it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0317]





100%|██████████| 145/145 [01:05<00:00,  2.20it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.011]
100%|██████████| 37/37 [00:05<00:00,  7.10it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0294]



Training complete in 0h 12m 3s
Best Loss: 0.0282

[33m===== Fold: 2 =====[0m
Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 145/145 [01:05<00:00,  2.20it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0626]
100%|██████████| 37/37 [00:05<00:00,  6.90it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0389]


[34m Validation Loss Improved: [inf ---> 0.03894520563724106]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0332]
100%|██████████| 37/37 [00:05<00:00,  6.97it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0306]


[34m Validation Loss Improved: [0.03894520563724106 ---> 0.030592967233020966]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.024]
100%|██████████| 37/37 [00:05<00:00,  6.58it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0283]


[34m Validation Loss Improved: [0.030592967233020966 ---> 0.02832654488515364]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.20it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0197]
100%|██████████| 37/37 [00:05<00:00,  7.03it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0285]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0209]
100%|██████████| 37/37 [00:05<00:00,  7.06it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0275]


[34m Validation Loss Improved: [0.02832654488515364 ---> 0.0275099923808689]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0263]
100%|██████████| 37/37 [00:05<00:00,  6.78it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0597]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0301]
100%|██████████| 37/37 [00:05<00:00,  7.00it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0409]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0232]
100%|██████████| 37/37 [00:05<00:00,  7.10it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0305]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0158]
100%|██████████| 37/37 [00:05<00:00,  7.04it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0285]





100%|██████████| 145/145 [01:07<00:00,  2.16it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0123]
100%|██████████| 37/37 [00:05<00:00,  7.03it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0284]



Training complete in 0h 12m 9s
Best Loss: 0.0275

[33m===== Fold: 3 =====[0m
Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0681]
100%|██████████| 37/37 [00:05<00:00,  6.59it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0339]


[34m Validation Loss Improved: [inf ---> 0.03393202022535227]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.20it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0364]
100%|██████████| 37/37 [00:05<00:00,  6.41it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0291]


[34m Validation Loss Improved: [0.03393202022535227 ---> 0.029087875114998045]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0238]
100%|██████████| 37/37 [00:05<00:00,  6.86it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0258]


[34m Validation Loss Improved: [0.029087875114998045 ---> 0.02576421913726777]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.02]
100%|██████████| 37/37 [00:05<00:00,  7.02it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0274]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0211]
100%|██████████| 37/37 [00:05<00:00,  7.02it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0338]





100%|██████████| 145/145 [01:07<00:00,  2.15it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0219]
100%|██████████| 37/37 [00:05<00:00,  7.05it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0312]





100%|██████████| 145/145 [01:06<00:00,  2.17it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0249]
100%|██████████| 37/37 [00:05<00:00,  7.05it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0294]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0208]
100%|██████████| 37/37 [00:05<00:00,  7.17it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0411]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0154]
100%|██████████| 37/37 [00:05<00:00,  6.86it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0278]





100%|██████████| 145/145 [01:06<00:00,  2.17it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0105]
100%|██████████| 37/37 [00:05<00:00,  7.16it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0276]



Training complete in 0h 12m 11s
Best Loss: 0.0258

[33m===== Fold: 4 =====[0m
Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 145/145 [01:07<00:00,  2.16it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0658]
100%|██████████| 37/37 [00:05<00:00,  7.16it/s, Epoch=1, LR=8.08e-5, Train_Loss=0.0335]


[34m Validation Loss Improved: [inf ---> 0.0334996381565456]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.17it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0332]
100%|██████████| 37/37 [00:05<00:00,  7.16it/s, Epoch=2, LR=3.82e-5, Train_Loss=0.0304]


[34m Validation Loss Improved: [0.0334996381565456 ---> 0.030379997025265486]
Model Saved[0m



100%|██████████| 145/145 [01:06<00:00,  2.17it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.0228]
100%|██████████| 37/37 [00:05<00:00,  6.93it/s, Epoch=3, LR=5.07e-6, Train_Loss=0.027]


[34m Validation Loss Improved: [0.030379997025265486 ---> 0.026985657705439394]
Model Saved[0m



100%|██████████| 145/145 [01:07<00:00,  2.16it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0193]
100%|██████████| 37/37 [00:05<00:00,  7.04it/s, Epoch=4, LR=7.12e-6, Train_Loss=0.0273]





100%|██████████| 145/145 [01:06<00:00,  2.17it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0201]
100%|██████████| 37/37 [00:05<00:00,  7.11it/s, Epoch=5, LR=4.28e-5, Train_Loss=0.0276]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0234]
100%|██████████| 37/37 [00:05<00:00,  7.02it/s, Epoch=6, LR=8.44e-5, Train_Loss=0.0339]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0242]
100%|██████████| 37/37 [00:05<00:00,  7.16it/s, Epoch=7, LR=9.98e-5, Train_Loss=0.0297]





100%|██████████| 145/145 [01:06<00:00,  2.18it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0186]
100%|██████████| 37/37 [00:05<00:00,  6.85it/s, Epoch=8, LR=7.7e-5, Train_Loss=0.0393]





100%|██████████| 145/145 [01:06<00:00,  2.19it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0139]
100%|██████████| 37/37 [00:05<00:00,  6.61it/s, Epoch=9, LR=3.37e-5, Train_Loss=0.0282]





100%|██████████| 145/145 [01:05<00:00,  2.20it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.00992]
100%|██████████| 37/37 [00:05<00:00,  6.46it/s, Epoch=10, LR=3.42e-6, Train_Loss=0.0287]



Training complete in 0h 12m 11s
Best Loss: 0.0270



In [28]:
print("Training Over")

Training Over
