In [None]:
!pip install torchtext==0.11.0
!pip install torch==1.10.0
!pip install -U deep_translator

In [2]:
# Custom BERT model (https://www.kaggle.com/code/shineiarakawa/pytorch-bert-by-transformers)
# Feature engineering basic (https://github.com/codemunic/Natural-Language-Inference/blob/main/bert_nli_pytorch.ipynb)

import os
import re
import math
import time
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.legacy import data
from transformers import BertTokenizer, BertModel, BertConfig, AdamW, get_constant_schedule_with_warmup
from deep_translator import GoogleTranslator
from dask import bag, diagnostics

SEED = 8071
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

df_train = pd.read_csv('../input/contradictory-my-dear-watson/train.csv')
df_train

Unnamed: 0,id,premise,hypothesis,lang_abv,language,label
0,5130fd2cb5,and these comments were considered in formulat...,The rules developed in the interim were put to...,en,English,0
1,5b72532a0b,These are issues that we wrestle with in pract...,Practice groups are not permitted to work on t...,en,English,2
2,3931fbe82a,Des petites choses comme celles-là font une di...,J'essayais d'accomplir quelque chose.,fr,French,0
3,5622f0c60b,you know they can't really defend themselves l...,They can't defend themselves because of their ...,en,English,0
4,86aaa48b45,ในการเล่นบทบาทสมมุติก็เช่นกัน โอกาสที่จะได้แสด...,เด็กสามารถเห็นได้ว่าชาติพันธุ์แตกต่างกันอย่างไร,th,Thai,1
...,...,...,...,...,...,...
12115,2b78e2a914,The results of even the most well designed epi...,All studies have the same amount of uncertaint...,en,English,2
12116,7e9943d152,But there are two kinds of the pleasure of do...,But there are two kinds of the pleasure of doi...,en,English,0
12117,5085923e6c,The important thing is to realize that it's wa...,"It cannot be moved, now or ever.",en,English,2
12118,fc8e2fd1fe,At the west end is a detailed model of the who...,The model temple complex is at the east end.,en,English,2


In [3]:
class FeatureEngineeringNLI:

    TRIM_CHARACTER = 128

    def __init__(self, df_path: str, is_train: bool):
        self.df_path = df_path
        self.df = pd.read_csv(self.df_path)
        self.is_train = is_train
        self.tokenizer = BertTokenizer.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)

    def tokenize_bert(self, sentence):
        tokens = self.tokenizer.tokenize(sentence)
        return tokens
    
    def trim_sentence(self, sentence):
        try:
            sent = re.sub(r'\(|\)|\[|\]|\{|\}', '', sentence)
            sent = sent.split()
            sent = sent[:self.TRIM_CHARACTER]
            return " ".join(sent)
        except:
            return sentence
    
    def premise_token_type(self, sentence):
        return [0] * len(sentence)
    
    def hypothesis_token_type(self, sentence):
        return [1] * len(sentence)

    def combine_sequence(self, sentence):
        return " ".join(sentence)

    def combine_mask(self, mask):
        mask = [str(m) for m in mask]
        return " ".join(mask)
    
    #TODO: use a dask dataframe instead of all this
    def trans_parallel(self, df, translator):
        premise_bag = bag.from_sequence(df.premise.tolist()).map(translator.translate)
        hypo_bag =  bag.from_sequence(df.hypothesis.tolist()).map(translator.translate)
        with diagnostics.ProgressBar():
            premises = premise_bag.compute()
            hypos = hypo_bag.compute()
        df[['premise', 'hypothesis']] = list(zip(premises, hypos))
        return df
    
    def english_translate(self, df):
        translator = GoogleTranslator(target='en')
        df[df.lang_abv != "en"] =  df.loc[df.lang_abv != "en"].copy().pipe(self.trans_parallel, translator)
        df['lang_abv'] = ['en']*len(df)
        df['language'] = ['English']*len(df)
        return df
    
    def process_data(self):
        self.df = self.english_translate(self.df)
        self.df['premise_trim'] = self.df['premise'].apply(self.trim_sentence)
        self.df['hypothesis_trim'] = self.df['hypothesis'].apply(self.trim_sentence)

        self.df['premise_format'] = '[CLS] '  + self.df['premise_trim'] + ' [SEP] '
        self.df['hypothesis_format'] = self.df['hypothesis_trim'] + ' [SEP]'

        self.df['premise_tokenizer'] = self.df['premise_format'].apply(self.tokenize_bert)
        self.df['hypothesis_tokenizer'] = self.df['hypothesis_format'].apply(self.tokenize_bert)
        self.df['sequence'] = self.df['premise_tokenizer'] + self.df['hypothesis_tokenizer']

        self.df['premise_token_type'] = self.df['premise_tokenizer'].apply(self.premise_token_type)
        self.df['hypothesis_token_type'] = self.df['hypothesis_tokenizer'].apply(self.hypothesis_token_type)
        self.df['token_type'] = self.df['premise_token_type'] + self.df['hypothesis_token_type']

        self.df['attention_mask'] = self.df['sequence'].apply(self.hypothesis_token_type)
        self.df['attention_mask'] = self.df['attention_mask'].apply(self.combine_mask)
        self.df['token_type'] = self.df['token_type'].apply(self.combine_mask)
        self.df['sequence'] = self.df['sequence'].apply(self.combine_sequence)
        
        if self.is_train:
            self.df = self.df[['label', 'sequence', 'attention_mask', 'token_type']]
            random_state = np.random.RandomState()
            train = self.df.sample(frac=0.8, random_state=random_state)
            train.to_csv('train_feature_engineering.csv', index=False)
            valid = self.df.loc[~self.df.index.isin(train.index)]
            valid.to_csv('valid_feature_engineering.csv', index=False)
        else:
            self.df = self.df[['sequence', 'attention_mask', 'token_type']]
            self.df.to_csv('test_feature_engineering.csv', index=False)

In [4]:
class BERTNLIModel(nn.Module):
    def __init__(self, output_dim, dropout_rate):
        super(BERTNLIModel, self).__init__()
        self.bertnli_model = SingleBERT(use_pooling=False)
        self.bert_config = self.bertnli_model.get_config()
        self.hidden_size = self.bert_config.hidden_size
        self.output_dim = output_dim
        self.classifier = Classifier(hidden_size=self.hidden_size, num_classes=self.output_dim, dropout_rate=dropout_rate)

    def forward(self, sequence, attention_mask, token_type):
        output = self.bertnli_model(input=sequence, attention_mask=attention_mask, token_type_ids=token_type)
        output = self.classifier(output)
        return output

    def count_parameters(self):
        total_params = sum(p.numel() for p in self.bertnli_model.parameters() if p.requires_grad)
        return print(f'The model has {total_params:,} trainable parameters')

class SingleBERT(nn.Module):
    def __init__(self, use_pooling=False):
        super(SingleBERT, self).__init__()
        self.use_pooling = use_pooling
        self.bert_config = BertConfig.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)
        self.bert_model = BertModel.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)

        for param in self.bert_model.parameters():
            param.requires_grad = True

    def forward(self, input, attention_mask, token_type_ids):
        last_layer_output, pooling_output = self.bert_model(input, attention_mask=attention_mask, token_type_ids=token_type_ids, return_dict=False)
        if self.use_pooling:
            return pooling_output
        return last_layer_output[:, 0, :]

    def get_config(self):
        return self.bert_config

        
class Classifier(nn.Module):
    def __init__(self, hidden_size, num_classes, dropout_rate):
        super(Classifier, self).__init__()
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.linear1 = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        self.batchNorm = nn.BatchNorm1d(num_features=hidden_size, eps=1e-05, momentum=0.1, affine=False)
        self.activation = nn.LeakyReLU(negative_slope=0.2, inplace=True)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.linear2 = nn.Linear(in_features=hidden_size, out_features=num_classes)

        nn.init.normal_(self.linear1.weight, std=0.04)
        nn.init.normal_(self.linear2.weight, mean=0.5, std=0.04)
        nn.init.normal_(self.linear1.bias, 0)
        nn.init.normal_(self.linear2.bias, 0)

    def forward(self, input):
        output = self.dropout1(input)
        output = self.linear1(output)
        output = self.batchNorm(output)
        output = self.activation(output)
        output = self.dropout2(output)
        output = self.linear2(output)
        return output


In [5]:
MAX_INPUT_LENGTH = 512

def convert_to_int(tok_ids):
    tok_ids = [int(x) for x in tok_ids]
    return tok_ids

def split_and_cut(sentence):
    tokens = sentence.strip().split(" ")
    tokens = tokens[:MAX_INPUT_LENGTH]
    return tokens

def save_logs(logs, NUM_EPOCHS):
    time_now = datetime.datetime.now()
    time_info = f'{time_now.year}-{time_now.month}-{time_now.day}_{time_now.hour}-{time_now.minute}-{time_now.second}'

    save_logs_path = './logs/' + str(time_info)
    if not os.path.exists(save_logs_path):
        os.makedirs(save_logs_path)

    save_path_loss = save_logs_path  + '/loss' + '.jpg'
    save_path_acurracy = save_logs_path  + '/accuracy' + '.jpg'

    x = [num for num in range(NUM_EPOCHS)]
    epoch_train_losses = logs[0].tolist()
    epoch_eval_losses = logs[1].tolist()
    epoch_train_accuracies = logs[2].tolist()
    epoch_eval_accuracies = logs[3].tolist()

    # Plot Loss
    plt.plot(x, epoch_train_losses, color='red', label='Train Loss')
    plt.plot(x, epoch_eval_losses, color='blue', label='Eval Loss')

    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.grid()

    plt.savefig(save_path_loss)
    plt.clf()

    # Plot Accuracy
    plt.plot(x, epoch_train_accuracies, color='red', label='Train Accuracy')
    plt.plot(x, epoch_eval_accuracies, color='blue', label='Eval Accuracy')

    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.grid()
 
    plt.savefig(save_path_acurracy)
    
    return None

In [12]:
class BERTNLITrainer:

    BATCH_SIZE = 16
    # HIDDEN_DIM = 512
    DROPOUT_RATE = 0.3
    OUTPUT_DIM = 3
    LEARNING_RATE = 2e-5
    WEIGHT_DECAY = 0.001
    EPSILON = 1e-6
    WARMUP_PERCENT = 0.2
    NUM_EPOCHS = 30
    SAVE_PATH_MODEL = './weight'
    PATIENCE = 10
    best_val_loss = None
    counter = 0

    def __init__(self):
        super(BERTNLITrainer, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)
        self.label_data = data.LabelField()
        self.text_data = data.Field(batch_first = True,
                        use_vocab = False,
                        tokenize = split_and_cut,
                        preprocessing = self.tokenizer.convert_tokens_to_ids,
                        pad_token = self.tokenizer.pad_token_id,
                        unk_token = self.tokenizer.unk_token_id)
        self.attention_data = data.Field(batch_first = True,
                        use_vocab = False,
                        tokenize = split_and_cut,
                        preprocessing = convert_to_int,
                        pad_token = self.tokenizer.pad_token_id)
        self.token_type_data = data.Field(batch_first = True,
                        use_vocab = False,
                        tokenize = split_and_cut,
                        preprocessing = convert_to_int,
                        pad_token = 1)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def setup_data_training(self):
        fields = [
            ('label', self.label_data), \
            ('sequence', self.text_data), \
            ('attention_mask', self.attention_data), \
            ('token_type', self.token_type_data) \
        ]
        self.train_data, self.valid_data = data.TabularDataset.splits(
            path='../input/contradictory-my-dear-watson-feature-engineering', 
            train='train_feature_engineering.csv',
            validation ='valid_feature_engineering.csv',
            # test='test_feature_engineering.csv',
            format='csv',
            fields=fields,
            skip_header=True
        )

        self.label_data.build_vocab(self.train_data)
        self.train_iterator = data.BucketIterator(
            (self.train_data), 
            batch_size = self.BATCH_SIZE,
            sort_key = lambda x: len(x.sequence),
            sort=False,
            shuffle=True,
            sort_within_batch = False, 
            device = self.device)
        
        self.valid_iterator = data.BucketIterator(
            (self.valid_data), 
            batch_size = self.BATCH_SIZE,
            sort_key = lambda x: len(x.sequence),
            sort=False,
            shuffle=False,
            sort_within_batch = False, 
            device = self.device)
        return self.train_iterator, self.valid_iterator

    def setup_data_inference(self):
        fields = [
            ('sequence', self.text_data), \
            ('attention_mask', self.attention_data), \
            ('token_type', self.token_type_data) \
            ]
        self.test_data = data.TabularDataset.splits(
            path='/kaggle/working',
            test='test_feature_engineering.csv',
            format='csv',
            fields=fields,
            skip_header=True
        )[0]

        self.test_iterator = data.BucketIterator(
            (self.test_data), 
            batch_size = self.BATCH_SIZE,
            sort_key = lambda x: len(x.sequence),
            sort=False,
            shuffle=False,
            sort_within_batch = False, 
            device = self.device)
        return self.test_iterator

    def __len__(self):
        print(f"Number of training data: {len(self.train_data)}")
        print(f"Number of validation data: {len(self.valid_data)}")
        # print(f"Number of testing data: {len(self.test_data)}")
        return len(self.train_data), len(self.valid_data)# , len(self.test_data)

    def compute_accuracy(self, preds, ground_truth):
        max_preds = preds.argmax(dim = 1, keepdim = True)
        correct = (max_preds.squeeze(1)==ground_truth).float()
        return correct.sum() / len(ground_truth)

    def epoch_time(self, start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def setup_training(self):
        self.model = BERTNLIModel(self.OUTPUT_DIM, self.DROPOUT_RATE)
        self.model.count_parameters()
        self.model.to(self.device)
        self.criterion = nn.CrossEntropyLoss().to(self.device)
        # self.optimizer = optim.Adam(self.model.parameters(), lr=self.LEARNING_RATE, eps=self.EPSILON)
        self.optimizer = AdamW(self.model.parameters(), lr=self.LEARNING_RATE, weight_decay=self.WEIGHT_DECAY, eps=self.EPSILON, correct_bias=False)
        total_steps = math.ceil(self.NUM_EPOCHS*len(self.train_data)*1./self.BATCH_SIZE)
        warmup_steps = int(total_steps*self.WARMUP_PERCENT)
        self.scheduler = get_constant_schedule_with_warmup(self.optimizer, num_warmup_steps=warmup_steps)
        return self.model, self.criterion, self.optimizer, self.scheduler
    
    def early_stopping(self, val_loss, model):
        current_loss = val_loss
        early_stopping = False
        
        if self.best_val_loss is None:
            self.best_val_loss = current_loss
            if not os.path.exists(self.SAVE_PATH_MODEL):
                os.makedirs(self.SAVE_PATH_MODEL)
            torch.save(model.state_dict(), self.SAVE_PATH_MODEL + '/bert-nli.pt')
        elif current_loss > self.best_val_loss:
            self.counter += 1
            print(f'[Early Stopping Counter]: {self.counter} out of {self.PATIENCE}')
            if self.counter >= self.PATIENCE:
                early_stopping = True
        else:
            self.best_val_loss = current_loss
            if not os.path.exists(self.SAVE_PATH_MODEL):
                os.makedirs(self.SAVE_PATH_MODEL)
            torch.save(model.state_dict(), self.SAVE_PATH_MODEL + '/bert-nli.pt')
            self.counter = 0
        return early_stopping

    def train(self, model, iterator, criterion, optimizer, scheduler):
        epoch_loss = 0
        epoch_accuracy = 0
        model.train()
        
        for batch in iterator:
            optimizer.zero_grad()
            torch.cuda.empty_cache()
            
            sequence = batch.sequence
            attn_mask = batch.attention_mask
            token_type = batch.token_type
            label = batch.label
            
            predictions = model(sequence, attn_mask, token_type)
            loss = criterion(predictions, label)
            accuracy = self.compute_accuracy(predictions, label)

            loss.backward()
            optimizer.step()
            scheduler.step()
            
            epoch_loss += loss.item()
            epoch_accuracy += accuracy.item()
        return epoch_loss / len(iterator), epoch_accuracy / len(iterator)
    
    def evaluate(self, model, iterator, criterion):
        epoch_loss = 0
        epoch_accuracy = 0
        model.eval()
        
        with torch.no_grad():
            for batch in iterator:
                sequence = batch.sequence
                attn_mask = batch.attention_mask
                token_type = batch.token_type
                labels = batch.label
                            
                predictions = model(sequence, attn_mask, token_type)
                loss = criterion(predictions, labels)
                accuracy = self.compute_accuracy(predictions, labels)
                
                epoch_loss += loss.item()
                epoch_accuracy += accuracy.item()
        return epoch_loss / len(iterator), epoch_accuracy / len(iterator)
    
    def predict_submission(self, iterator):
        self.model = BERTNLIModel(self.OUTPUT_DIM, self.DROPOUT_RATE)
        self.model.to(self.device)
        self.model.load_state_dict(torch.load('../input/bert-nli/bert-nli.pt'))
        self.model.eval()
        predictions = []
        df_submission = pd.read_csv('../input/contradictory-my-dear-watson/sample_submission.csv')
        with torch.no_grad():
            for batch in iterator:
                sequence = batch.sequence
                attn_mask = batch.attention_mask
                token_type = batch.token_type
                            
                prediction = self.model(sequence, attn_mask, token_type)
                _, prediction = torch.max(prediction, dim=1)
                prediction = prediction.flatten().tolist()
                predictions += prediction
        
        df_submission['prediction'] = predictions
        df_submission.to_csv('submission.csv', index=False)
    
    def predict_submission_df(self):
        df_submission = pd.read_csv('../input/contradictory-my-dear-watson/sample_submission.csv')
        df_test = pd.read_csv('../input/contradictory-my-dear-watson/test.csv')
        tokenizer = BertTokenizer.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)
        self.model = BERTNLIModel(self.OUTPUT_DIM, self.DROPOUT_RATE)
        self.model.to(self.device)
        self.model.load_state_dict(torch.load('../input/bert-nli/bert-nli.pt'))
        self.model.eval()
        predictions = []
        premises = df_test['premise'].to_list()
        hypothesises = df_test['hypothesis'].to_list()
        
        for i in range(len(premises)):
            premise = '[CLS] ' + premises[i] + ' [SEP] '
            hypothesis = hypothesises[i] + ' [SEP]'

            premise_token = tokenizer.tokenize(premise)
            hypothesis_token = tokenizer.tokenize(hypothesis)

            premise_type = [0] * len(premise_token)
            hypothesis_type = [1] * len(hypothesis_token)

            indexes = premise_token + hypothesis_token
            indexes = tokenizer.convert_tokens_to_ids(indexes)
            indexes_type = premise_type + hypothesis_type
            attn_mask = [1] * len(indexes)
            indexes = torch.LongTensor(indexes).unsqueeze(0).to(self.device)
            indexes_type = torch.LongTensor(indexes_type).unsqueeze(0).to(self.device)
            attn_mask = torch.LongTensor(attn_mask).unsqueeze(0).to(self.device)

            prediction = self.model(indexes, attn_mask, indexes_type)
            prediction = prediction.argmax(dim=-1).item()
            predictions.append(prediction)
        
        df_submission['prediction'] = predictions
        df_submission.to_csv('submission.csv', index=False)
    
    def predict_inference(self, premise, hypothesis):
        tokenizer = BertTokenizer.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)
        label = ['entailment', 'neutral', 'contradiction']
        self.model.load_state_dict(torch.load('../input/bert-nli/bert-nli.pt'))
        self.model.eval()
        
        premise = '[CLS] ' + premise + ' [SEP] '
        hypothesis = hypothesis + ' [SEP]'

        premise_token = tokenizer.tokenize(premise)
        hypothesis_token = tokenizer.tokenize(hypothesis)

        premise_type = [0] * len(premise_token)
        hypothesis_type = [1] * len(hypothesis_token)

        indexes = premise_token + hypothesis_token
        indexes = tokenizer.convert_tokens_to_ids(indexes)
        indexes_type = premise_type + hypothesis_type
        attn_mask = [1] * len(indexes)
        indexes = torch.LongTensor(indexes).unsqueeze(0).to(self.device)
        indexes_type = torch.LongTensor(indexes_type).unsqueeze(0).to(self.device)
        attn_mask = torch.LongTensor(attn_mask).unsqueeze(0).to(self.device)
        
        prediction = self.model(indexes, attn_mask, indexes_type)
        prediction = prediction.argmax(dim=-1).item()
        return label[prediction]

    def train_process(self, model, train_iterator, valid_iterator, optimizer, criterion, scheduler):
        start_time = time.time()
        early_stopping = False
        
        # For progress record.
        train_loss_logs = np.zeros(shape=self.NUM_EPOCHS, dtype=np.float)
        eval_loss_logs = np.zeros(shape=self.NUM_EPOCHS, dtype=np.float)
        train_accuracy_logs = np.zeros(shape=self.NUM_EPOCHS, dtype=np.float)
        eval_accuracy_logs = np.zeros(shape=self.NUM_EPOCHS, dtype=np.float)

        for epoch in tqdm(range(self.NUM_EPOCHS)):
            train_loss, train_acc = self.train(model, train_iterator, criterion, optimizer, scheduler)
            valid_loss, valid_acc = self.evaluate(model, valid_iterator, criterion)
            
            end_time = time.time()
            epoch_mins, epoch_secs = self.epoch_time(start_time, end_time)
            
            print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
            print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
            print(f'\tValid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%')

            train_loss_logs[epoch] = train_loss
            train_accuracy_logs[epoch] = train_acc
            eval_loss_logs[epoch] = valid_loss
            eval_accuracy_logs[epoch] = valid_acc
            
            early_stopping = self.early_stopping(valid_loss, model)
            if early_stopping:
                print("Early stopping")
                break
            
        logs = [train_loss_logs, eval_loss_logs, train_accuracy_logs, eval_accuracy_logs]
        save_logs(logs=logs, NUM_EPOCHS=self.NUM_EPOCHS)


In [7]:
tokenizer = BertTokenizer.from_pretrained('../input/bert-base-multilingual-cased/bert-base-multilingual-cased', local_files_only=True)
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

print(init_token_idx, eos_token_idx, pad_token_idx, unk_token_idx)

101 102 0 100


In [8]:
df_train_path = "../input/contradictory-my-dear-watson/train.csv"
feature_engineering_train = FeatureEngineeringNLI(df_train_path, is_train=True)
feature_engineering_train.process_data()

[########################################] | 100% Completed | 23min 19.9s
[########################################] | 100% Completed | 21min 23.5s


In [13]:
trainer = BERTNLITrainer()
train_iterator, valid_iterator = trainer.setup_data_training()
model, criterion, optimizer, scheduler = trainer.setup_training()
trainer.train_process(model, train_iterator, valid_iterator, optimizer, criterion, scheduler)

<torchtext.legacy.data.dataset.TabularDataset object at 0x7f8c4df25c50>


  0%|          | 0/30 [00:00<?, ?it/s]

The model has 177,853,440 trainable parameters
Epoch: 01 | Epoch Time: 2m 13s
	Train Loss: 1.521 | Train Acc: 37.72%
	Valid Loss: 1.206 | Valid Acc: 54.11%


  3%|▎         | 1/30 [02:15<1:05:25, 135.35s/it]

Epoch: 02 | Epoch Time: 4m 28s
	Train Loss: 1.033 | Train Acc: 56.64%
	Valid Loss: 0.919 | Valid Acc: 60.44%


  7%|▋         | 2/30 [04:30<1:03:14, 135.50s/it]

Epoch: 03 | Epoch Time: 6m 44s
	Train Loss: 0.840 | Train Acc: 64.98%
	Valid Loss: 0.880 | Valid Acc: 64.60%


 10%|█         | 3/30 [06:47<1:01:07, 135.82s/it]

Epoch: 04 | Epoch Time: 9m 1s
	Train Loss: 0.699 | Train Acc: 71.32%
	Valid Loss: 0.800 | Valid Acc: 68.09%


 13%|█▎        | 4/30 [09:03<58:55, 136.00s/it]  

Epoch: 05 | Epoch Time: 11m 17s
	Train Loss: 0.567 | Train Acc: 77.83%
	Valid Loss: 0.903 | Valid Acc: 68.71%


 17%|█▋        | 5/30 [11:19<56:40, 136.02s/it]

Epoch: 06 | Epoch Time: 13m 33s
	Train Loss: 0.458 | Train Acc: 82.55%
	Valid Loss: 1.056 | Valid Acc: 66.37%


 20%|██        | 6/30 [13:35<54:25, 136.05s/it]

Epoch: 07 | Epoch Time: 15m 49s
	Train Loss: 0.343 | Train Acc: 87.60%
	Valid Loss: 1.154 | Valid Acc: 65.87%


 23%|██▎       | 7/30 [15:51<52:06, 135.92s/it]

Epoch: 08 | Epoch Time: 18m 4s
	Train Loss: 0.247 | Train Acc: 91.40%
	Valid Loss: 1.153 | Valid Acc: 67.02%


 27%|██▋       | 8/30 [18:06<49:47, 135.80s/it]

Epoch: 09 | Epoch Time: 20m 20s
	Train Loss: 0.184 | Train Acc: 93.92%
	Valid Loss: 1.294 | Valid Acc: 67.93%


 30%|███       | 9/30 [20:23<47:34, 135.93s/it]

Epoch: 10 | Epoch Time: 22m 36s
	Train Loss: 0.160 | Train Acc: 94.31%
	Valid Loss: 1.367 | Valid Acc: 67.39%


 33%|███▎      | 10/30 [22:39<45:19, 135.95s/it]

Epoch: 11 | Epoch Time: 24m 52s
	Train Loss: 0.130 | Train Acc: 95.69%
	Valid Loss: 1.427 | Valid Acc: 68.83%


 37%|███▋      | 11/30 [24:54<43:01, 135.87s/it]

Epoch: 12 | Epoch Time: 27m 8s
	Train Loss: 0.108 | Train Acc: 96.42%
	Valid Loss: 1.482 | Valid Acc: 68.46%


 40%|████      | 12/30 [27:10<40:46, 135.90s/it]

Epoch: 13 | Epoch Time: 29m 24s
	Train Loss: 0.092 | Train Acc: 96.97%
	Valid Loss: 1.469 | Valid Acc: 67.31%


 43%|████▎     | 13/30 [29:26<38:28, 135.82s/it]

Epoch: 14 | Epoch Time: 31m 40s
	Train Loss: 0.074 | Train Acc: 97.55%
	Valid Loss: 1.612 | Valid Acc: 67.02%


 47%|████▋     | 14/30 [33:37<38:25, 144.10s/it]


KeyboardInterrupt: 

In [None]:
"""df_test_path = "../input/contradictory-my-dear-watson/test.csv"
feature_engineering_test = FeatureEngineeringNLI(df_test_path, is_train=False)
feature_engineering_test.process_data()
predictor = BERTNLITrainer()
test_iterator = predictor.setup_data_inference()
predictor.predict_submission(test_iterator)"""

In [None]:
!zip -r -j /kaggle/working/output.zip ./