In [None]:
from google.colab import drive
drive.mount("/content/drive/")

In [None]:
# !pip install transformers

In [None]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
import shutil
import sys
import pickle
import ast

from torch import cuda
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import DistilBertTokenizer, DistilBertModel
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# # Setting up the device for GPU usage

device = 'cuda' if cuda.is_available() else 'cpu'
device

In [None]:
dataset = pd.read_csv('drive/MyDrive/NLP_Final_Project/dataset/final_classification.csv', delimiter=',')
dataset = dataset.sample(frac=1).reset_index(drop=True)


dataset['tags_list'] = dataset['tags_list'].apply(ast.literal_eval)
tags_to_keep = ['love', 'life', 'inspirational', 'philosophy', 'humor']

for tag in tags_to_keep:
    dataset[tag] = dataset['tags_list'].apply(lambda x: 1 if tag in x else 0)

print('Shape of dataset : ',dataset.shape)
dataset.head(5)

In [None]:
def split_data(df, random_seed):

  train_val, test_df = train_test_split(df, test_size=0.2, random_state=random_seed, stratify=df['tags'])
  train_df, val_df = train_test_split(train_val, test_size=0.15, random_state=random_seed, stratify=train_val['tags'])

  for d in [train_df, val_df, test_df]:
    tag_count = {}
    for tag in d['tags_list']:
        for t in tag:
            tag_count[t] = tag_count.get(t, 0) + 1
    print('Tag Distribution: ', tag_count)

  train_df = train_df.reset_index()
  val_df = val_df.reset_index()
  test_df = test_df.reset_index()

  return train_df, val_df, test_df

In [None]:
train_df, valid_df, test_df = split_data(dataset, 204)

In [None]:

# hyperparameters
MAX_LEN = 256
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32


EPOCHS = 10
LEARNING_RATE = 2e-05

target_labels = tags_to_keep

In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.quote = dataframe.quote
        self.targets = self.data[tags_to_keep].values
        self.max_len = max_len

    def __len__(self):
        return len(self.quote)

    def __getitem__(self, index):
        quote = str(self.quote[index])
        quote = " ".join(quote.split())

        inputs = self.tokenizer.encode_plus(
            quote,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )


        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index]),
            'quote' : quote
        }

In [None]:
# tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN)
# valid_dataset = CustomDataset(valid_df, tokenizer, MAX_LEN)
# test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN)

In [None]:
class DistilBERTClass(torch.nn.Module):
    def __init__(self):
        super(DistilBERTClass, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.1)
        self.classifier = torch.nn.Linear(768, 5)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.Tanh()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [None]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [None]:
# model = DistilBERTClass()
# model.to(device)

# optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [None]:
def load_ckp(checkpoint_fpath, model, optimizer):

    checkpoint = torch.load(checkpoint_fpath)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    valid_loss_min = checkpoint['valid_loss_min']
    return model, optimizer#, checkpoint['epoch'], valid_loss_min.item()

def save_ckp(state, is_best, checkpoint_path, best_model_path):

    f_path = checkpoint_path
    torch.save(state, f_path)
    if is_best:
        best_fpath = best_model_path
        shutil.copyfile(f_path, best_fpath)

In [None]:
import tqdm.notebook as tq
from sklearn.metrics import jaccard_score
def train_model(training_loader, model, optimizer):

    losses = []

    model.train()

    loop = tq.tqdm(enumerate(training_loader), total=len(training_loader),
                      leave=True, colour='steelblue')

    count = 0
    accuracy = 0
    for batch_idx, data in loop:
        ids = data['input_ids'].to(device, dtype = torch.long)
        mask = data['attention_mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        target_labels = data['targets'].to(device, dtype = torch.float)

        # forward
        outputs = model(ids, mask, token_type_ids) # (batch,predict)=(32,8)
        loss = loss_fn(outputs, target_labels)
        losses.append(loss.item())

        # training accuracy
        sigmoid_output = torch.sigmoid(outputs)
        label_threshold = 0.5
        predicted_labels = torch.where(sigmoid_output > label_threshold, torch.tensor(1), torch.tensor(0))

        count+=1
        accuracy += jaccard_score(target_labels.cpu(), predicted_labels.cpu(), average='samples')
        # backward
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        # grad descent step
        optimizer.step()

        # Update progress bar
        loop.set_description(f"")
        loop.set_postfix(batch_loss=loss)

    # returning: trained model, model accuracy, mean loss
    return model, float(accuracy)/count, np.mean(losses)

In [None]:
def eval_model(validation_loader, model, optimizer):
    losses = []
    # set model to eval mode (turn off dropout, fix batch norm)
    model.eval()

    count = 0
    accuracy = 0
    with torch.no_grad():
        for batch_idx, data in enumerate(validation_loader, 0):
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            target_labels = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)

            loss = loss_fn(outputs, target_labels)
            losses.append(loss.item())

            # validation accuracy
            sigmoid_output = torch.sigmoid(outputs)
            label_threshold = 0.5
            predicted_labels = torch.where(sigmoid_output > label_threshold, torch.tensor(1), torch.tensor(0))

            count+=1
            accuracy += jaccard_score(target_labels.cpu(), predicted_labels.cpu(), average='samples')

    return float(accuracy)/count, np.mean(losses)

In [None]:
# for epoch in range(1, EPOCHS+1):
#     print(f'Epoch {epoch}/{EPOCHS}')
#     model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
#     val_acc, val_loss = eval_model(val_data_loader, model, optimizer)

#     print(f'train_loss={train_loss:.4f}, val_loss={val_loss:.4f} train_acc={train_acc:.4f}, val_acc={val_acc:.4f}')



In [None]:
def get_predictions(model, data_loader):
    model = model.eval()

    quotes = []
    predictions = []
    prediction_probs = []
    target_values = []

    with torch.no_grad():
      for data in data_loader:
        # quote = data["quote"]
        ids = data["input_ids"].to(device, dtype = torch.long)
        mask = data["attention_mask"].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        target_labels = data["targets"].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)
        sigmoid_output = torch.sigmoid(outputs)
        label_threshold = 0.5
        predicted_labels = torch.where(sigmoid_output > label_threshold, torch.tensor(1), torch.tensor(0))

        # quotes.extend(quote)
        predictions.extend(predicted_labels)
        prediction_probs.extend(sigmoid_output)
        target_values.extend(target_labels)

    predictions = torch.stack(predictions).cpu()
    prediction_probs = torch.stack(prediction_probs).cpu()
    target_values = torch.stack(target_values).cpu()

    return quotes, predictions, prediction_probs, target_values


In [None]:
# quotes, predictions, prediction_probs, target_values = get_predictions(model, test_data_loader)

In [None]:
# print(classification_report(target_values, predictions, target_names=tags_to_keep))

In [None]:
import seaborn as sns
def show_confusion_matrix(confusion_matrix):
    hmap = sns.heatmap(confusion_matrix, annot=True, fmt="d", cmap="Blues")
    hmap.yaxis.set_ticklabels(hmap.yaxis.get_ticklabels(), rotation=0, ha='right')
    hmap.xaxis.set_ticklabels(hmap.xaxis.get_ticklabels(), rotation=30, ha='right')
    plt.ylabel('True category')
    plt.xlabel('Predicted category');

In [None]:
# import matplotlib.pyplot as plt
# plt.rcParams["figure.figsize"] = (10,7)
# # cm = confusion_matrix(target_values, predictions)
# cm = confusion_matrix(torch.argmax(target_values, dim=1), torch.argmax(predictions, dim=1))
# df_cm = pd.DataFrame(cm, index=tags_to_keep, columns=tags_to_keep)
# show_confusion_matrix(df_cm)

In [None]:
checkpoint_path = 'drive/MyDrive/NLP_Final_Project/checkpoints/'

seeds = [204, 9, 17]
metrics_df = pd.DataFrame(columns=['Seed', 'Epoch', 'Training_Accuracy', 'Training_Loss', 'Validation_Accuracy', 'Validation_Loss'])

test_metrics_df = pd.DataFrame(columns=['Seed', 'Test_Accuracy', 'Test_Loss'])

for seed in seeds:

    print(f"----------Seed {seed}----------")

    train_df, valid_df, test_df = split_data(dataset, seed)

    print('Training dataset label proportion:')
    print(train_df[tags_to_keep].mean())
    print('\nTesting dataset label proportion: ')
    print(test_df[tags_to_keep].mean())
    print('\nValidation dataset label proportion: ')
    print(valid_df[tags_to_keep].mean())

    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

    train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN)
    valid_dataset = CustomDataset(valid_df, tokenizer, MAX_LEN)
    test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN)

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TRAIN_BATCH_SIZE,
        shuffle=True,
        num_workers=0
    )

    val_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=VALID_BATCH_SIZE,
        shuffle=False,
        num_workers=0
    )

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=VALID_BATCH_SIZE,
        shuffle=False,
        num_workers=0
    )

    model = DistilBERTClass()
    model.to(device)

    optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)


    for epoch in range(1, EPOCHS+1):
        print(f'Seed - {seed} , Epoch {epoch}/{EPOCHS}')
        model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
        val_acc, val_loss = eval_model(val_data_loader, model, optimizer)

        print(f'Train Accuracy={train_acc:.4f}, Train Loss={train_loss:.4f}, Validation Accuracy={val_acc:.4f}, Validation Loss={val_loss:.4f}')

        epoch_data = {
            'Seed': seed,
            'Epoch': epoch,
            'Training_Accuracy': train_acc,
            'Training_Loss': train_loss,
            'Validation_Accuracy': val_acc,
            'Validation_Loss': val_loss
        }

        metrics_df.loc[len(metrics_df.index)] = epoch_data

    test_acc, test_loss = eval_model(test_data_loader, model, optimizer)

    test_metrics_data = {
            'Seed': seed,
            'Test_Accuracy': test_acc,
            'Test_Loss': test_loss,
        }
    test_metrics_df.loc[len(test_metrics_df.index)] = test_metrics_data

    print(f'Test Accuracy={test_acc:.4f}, Test Loss={test_loss:.4f}')

    checkpoint = {
        'test_accuracy ': test_acc,
        'test_loss': test_loss,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }

    save_ckp(checkpoint, False, checkpoint_path + f'distilBert-{seed}.pt', '')

    quotes, predictions, prediction_probs, target_values = get_predictions(model, test_data_loader)

    with open(checkpoint_path+f'distilBert-{seed}-prediction-data.pkl', 'wb') as f:
        pickle.dump((predictions, prediction_probs, target_values,metrics_df , test_metrics_df), f)


In [None]:
# ----------Seed 204----------
# {'inspirational': 8147, 'humor': 4613, 'life': 7695, 'philosophy': 6535, 'love': 7320}
# {'inspirational': 1437, 'philosophy': 1155, 'love': 1293, 'humor': 814, 'life': 1359}
# {'life': 2266, 'humor': 1357, 'love': 2154, 'inspirational': 2396, 'philosophy': 1923}
# Seed - 204 , Epoch 1/10
# 100%
#  638/638 [07:01<00:00,  1.99it/s, batch_loss=tensor(0.4994, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.4829, Train Loss=0.4829, Validation Accuracy=0.5588, Validation Loss=0.4374
# Seed - 204 , Epoch 2/10
# 100%
#  638/638 [07:00<00:00,  1.98it/s, batch_loss=tensor(0.4973, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6131, Train Loss=0.6131, Validation Accuracy=0.5966, Validation Loss=0.4228
# Seed - 204 , Epoch 3/10
# 100%
#  638/638 [07:03<00:00,  1.99it/s, batch_loss=tensor(0.5584, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6871, Train Loss=0.6871, Validation Accuracy=0.6161, Validation Loss=0.4195
# Seed - 204 , Epoch 4/10
# 100%
#  638/638 [07:00<00:00,  2.00it/s, batch_loss=tensor(0.3606, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.7547, Train Loss=0.7547, Validation Accuracy=0.6202, Validation Loss=0.4390
# Seed - 204 , Epoch 5/10
# 100%
#  638/638 [07:00<00:00,  1.97it/s, batch_loss=tensor(0.1249, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8113, Train Loss=0.8113, Validation Accuracy=0.6281, Validation Loss=0.4694
# Seed - 204 , Epoch 6/10
# 100%
#  638/638 [07:01<00:00,  2.02it/s, batch_loss=tensor(0.0941, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8619, Train Loss=0.8619, Validation Accuracy=0.6290, Validation Loss=0.5133
# Seed - 204 , Epoch 7/10
# 100%
#  638/638 [07:00<00:00,  2.00it/s, batch_loss=tensor(0.0354, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8985, Train Loss=0.8985, Validation Accuracy=0.6371, Validation Loss=0.5421
# Seed - 204 , Epoch 8/10
# 100%
#  638/638 [07:00<00:00,  2.01it/s, batch_loss=tensor(0.0161, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9251, Train Loss=0.9251, Validation Accuracy=0.6283, Validation Loss=0.6104
# Seed - 204 , Epoch 9/10
# 100%
#  638/638 [07:00<00:00,  2.00it/s, batch_loss=tensor(0.1120, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9439, Train Loss=0.9439, Validation Accuracy=0.6309, Validation Loss=0.6822
# Seed - 204 , Epoch 10/10
# 100%
#  638/638 [07:00<00:00,  2.00it/s, batch_loss=tensor(0.1564, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9584, Train Loss=0.9584, Validation Accuracy=0.6316, Validation Loss=0.7278
# Test Accuracy=0.6199, Test Loss=0.7444
# ----------Seed 9----------
# {'inspirational': 8147, 'love': 7320, 'life': 7695, 'philosophy': 6535, 'humor': 4613}
# {'love': 1293, 'life': 1359, 'inspirational': 1437, 'philosophy': 1155, 'humor': 814}
# {'love': 2154, 'life': 2266, 'inspirational': 2396, 'philosophy': 1923, 'humor': 1357}
# Seed - 9 , Epoch 1/10
# 100%
#  638/638 [07:00<00:00,  1.98it/s, batch_loss=tensor(0.3955, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.4821, Train Loss=0.4821, Validation Accuracy=0.5599, Validation Loss=0.4446
# Seed - 9 , Epoch 2/10
# 100%
#  638/638 [07:03<00:00,  1.95it/s, batch_loss=tensor(0.4635, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6112, Train Loss=0.6112, Validation Accuracy=0.5851, Validation Loss=0.4273
# Seed - 9 , Epoch 3/10
# 100%
#  638/638 [07:00<00:00,  1.97it/s, batch_loss=tensor(0.3743, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6819, Train Loss=0.6819, Validation Accuracy=0.5962, Validation Loss=0.4280
# Seed - 9 , Epoch 4/10
# 100%
#  638/638 [07:00<00:00,  1.96it/s, batch_loss=tensor(0.1169, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.7524, Train Loss=0.7524, Validation Accuracy=0.6065, Validation Loss=0.4632
# Seed - 9 , Epoch 5/10
# 100%
#  638/638 [07:00<00:00,  2.01it/s, batch_loss=tensor(0.2218, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8127, Train Loss=0.8127, Validation Accuracy=0.6131, Validation Loss=0.4757
# Seed - 9 , Epoch 6/10
# 100%
#  638/638 [07:00<00:00,  2.02it/s, batch_loss=tensor(0.2330, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8637, Train Loss=0.8637, Validation Accuracy=0.6097, Validation Loss=0.5281
# Seed - 9 , Epoch 7/10
# 100%
#  638/638 [07:02<00:00,  2.00it/s, batch_loss=tensor(0.0455, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9015, Train Loss=0.9015, Validation Accuracy=0.6135, Validation Loss=0.6088
# Seed - 9 , Epoch 8/10
# 100%
#  638/638 [07:00<00:00,  2.01it/s, batch_loss=tensor(0.0427, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9261, Train Loss=0.9261, Validation Accuracy=0.6088, Validation Loss=0.6467
# Seed - 9 , Epoch 9/10
# 100%
#  638/638 [07:00<00:00,  2.01it/s, batch_loss=tensor(0.0820, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9453, Train Loss=0.9453, Validation Accuracy=0.6042, Validation Loss=0.7117
# Seed - 9 , Epoch 10/10
# 100%
#  638/638 [07:00<00:00,  1.95it/s, batch_loss=tensor(0.0501, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9579, Train Loss=0.9579, Validation Accuracy=0.6088, Validation Loss=0.7929
# Test Accuracy=0.6178, Test Loss=0.7745
# ----------Seed 17----------
# {'humor': 4613, 'love': 7320, 'inspirational': 8147, 'philosophy': 6535, 'life': 7695}
# {'inspirational': 1437, 'philosophy': 1155, 'humor': 814, 'love': 1293, 'life': 1359}
# {'love': 2154, 'life': 2266, 'inspirational': 2396, 'philosophy': 1923, 'humor': 1357}
# Seed - 17 , Epoch 1/10
# 100%
#  638/638 [07:00<00:00,  2.01it/s, batch_loss=tensor(0.3524, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.4768, Train Loss=0.4768, Validation Accuracy=0.5604, Validation Loss=0.4357
# Seed - 17 , Epoch 2/10
# 100%
#  638/638 [07:01<00:00,  2.01it/s, batch_loss=tensor(0.2469, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6103, Train Loss=0.6103, Validation Accuracy=0.5964, Validation Loss=0.4197
# Seed - 17 , Epoch 3/10
# 100%
#  638/638 [07:00<00:00,  2.00it/s, batch_loss=tensor(0.2025, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.6820, Train Loss=0.6820, Validation Accuracy=0.6132, Validation Loss=0.4151
# Seed - 17 , Epoch 4/10
# 100%
#  638/638 [07:01<00:00,  2.00it/s, batch_loss=tensor(0.0879, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.7483, Train Loss=0.7483, Validation Accuracy=0.6135, Validation Loss=0.4505
# Seed - 17 , Epoch 5/10
# 100%
#  638/638 [07:01<00:00,  2.00it/s, batch_loss=tensor(0.0710, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8082, Train Loss=0.8082, Validation Accuracy=0.6149, Validation Loss=0.4744
# Seed - 17 , Epoch 6/10
# 100%
#  638/638 [07:01<00:00,  1.99it/s, batch_loss=tensor(0.4595, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8570, Train Loss=0.8570, Validation Accuracy=0.6293, Validation Loss=0.5137
# Seed - 17 , Epoch 7/10
# 100%
#  638/638 [07:01<00:00,  2.01it/s, batch_loss=tensor(0.0308, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.8948, Train Loss=0.8948, Validation Accuracy=0.6280, Validation Loss=0.5693
# Seed - 17 , Epoch 8/10
# 100%
#  638/638 [07:01<00:00,  1.95it/s, batch_loss=tensor(0.1092, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9233, Train Loss=0.9233, Validation Accuracy=0.6225, Validation Loss=0.6264
# Seed - 17 , Epoch 9/10
# 100%
#  638/638 [07:01<00:00,  1.95it/s, batch_loss=tensor(0.1136, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9409, Train Loss=0.9409, Validation Accuracy=0.6281, Validation Loss=0.6690
# Seed - 17 , Epoch 10/10
# 100%
#  638/638 [07:01<00:00,  1.97it/s, batch_loss=tensor(0.0475, device='cuda:0',        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)]
# Train Accuracy=0.9528, Train Loss=0.9528, Validation Accuracy=0.6218, Validation Loss=0.7199
# Test Accuracy=0.6309, Test Loss=0.7132