In [1]:

import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib.pyplot as plt
import spacy  # Import spaCy

from sentence_transformers import SentenceTransformer
from transformers import BertTokenizer, BertForSequenceClassification, AdamW

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

nlp = spacy.load("en_core_web_sm")


In [2]:
df = pd.read_csv('datasetmltask1.csv')

In [3]:
from transformers import BertTokenizer, BertModel

# Load the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')



In [4]:

df.shape
df.nunique()

premise       317
hypothesis    309
label           1
dtype: int64

In [5]:
duplicate_hypotheses = df[df.duplicated(subset='hypothesis', keep=False)]

# Print the rows with duplicate hypotheses
print(duplicate_hypotheses)

                                               premise  \
70   A sculptor is shaping a block of marble into a...   
71   A parent is helping their child fly a kite in ...   
72   A soccer player is dribbling the ball down the...   
76   A dancer is performing a graceful ballet routine.   
77   A barber is giving a customer a haircut at the...   
80   A group of friends is laughing and chatting ar...   
89   A sculptor is chiseling a piece of marble into...   
90   A parent is helping their child build a sandca...   
91   A soccer player is taking a penalty kick durin...   
95    A dancer is performing a lively hip-hop routine.   
96   A barber is giving a customer a stylish haircu...   
99   A group of friends is sharing stories around a...   
208  A group of psychologists is conducting studies...   
280  A team of artists is creating a public mural t...   
306  A team of psychologists is researching the eff...   
316  A group of artists is creating a series of pub...   

             

In [6]:
#defining the function
def generate_negative_samples(df, model, sentence_vecs1, sentence_vecs2):
    negative_samples = []

    for i in range(len(df)):
        negative_samples.append((df['premise'][i], df['hypothesis'][i], 1))

    for premise_idx, premise in enumerate(df['premise']):
        premise_vec = sentence_vecs1[premise_idx].reshape(1, -1)
        hypothesis_vecs = sentence_vecs2

        # Compute cosine similarities between the premise and all hypotheses
        similarities = cosine_similarity(premise_vec, hypothesis_vecs)[0]

        # Find the indices of 5 hypotheses with the least similarity
        least_similar_indices = np.argpartition(similarities, 5)[:5]

        # Append these hypotheses to negative_samples with label 0
        for idx in least_similar_indices:
            negative_samples.append((premise, df['hypothesis'][idx], 0))

    return negative_samples


In [7]:


# Load the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Tokenize the sentences
tokenized_sentences1 = tokenizer(df['premise'].tolist(), padding=True, truncation=True, return_tensors='pt')
tokenized_sentences2 = tokenizer(df['hypothesis'].tolist(), padding=True, truncation=True, return_tensors='pt')

# Encode the tokenized sequences
encoded_sentences1 = model(**tokenized_sentences1).last_hidden_state
encoded_sentences2 = model(**tokenized_sentences2).last_hidden_state

# Average pooling to get sentence-level embeddings
sentence_vecs1 = np.mean(encoded_sentences1.detach().numpy(), axis=1)
sentence_vecs2 = np.mean(encoded_sentences2.detach().numpy(), axis=1)

# Generate negative samples
negative_samples = generate_negative_samples(df, model, sentence_vecs1, sentence_vecs2)

# Create a DataFrame from the negative_samples list
df_negative_samples = pd.DataFrame(negative_samples, columns=['premise', 'hypothesis', 'label'])

# Specify the output file path
output_file_path = 'augmented_dataset_ml_task1.csv'

# Save the DataFrame to a CSV file
df_negative_samples.to_csv(output_file_path, index=False)

print(f"Negative samples saved to {output_file_path}")


Negative samples saved to augmented_dataset_ml_task1.csv


In [8]:
# Load your dataset from 'augmented_dataset_ml_task1.csv' (replace with your actual file path)
df = pd.read_csv('augmented_dataset_ml_task1.csv')

# Split the dataset into train and test sets (80% train, 20% test)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Identify common rows between train and test datasets
common_rows = pd.merge(train_df, test_df, on=['premise', 'hypothesis'], how='inner')

# Remove common rows from the test dataset
test_df = test_df[~test_df.index.isin(common_rows.index)]

# Save the train and test sets to CSV files
train_df.to_csv('train_dataset.csv', index=False)  # Save train dataset to 'train_dataset.csv'
test_df.to_csv('test_dataset.csv', index=False)    # Save updated test dataset to 'test_dataset.csv'

In [11]:

class CustomDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        premise = str(self.data.loc[index, 'premise'])
        hypothesis = str(self.data.loc[index, 'hypothesis'])

        encoding = self.tokenizer(
            premise,
            hypothesis,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(self.data.loc[index, 'label'])
        }


In [12]:
import torch
from torch.utils.data import DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
import torch.nn as nn
import matplotlib.pyplot as plt

# Define the model with a custom classifier
def define_model(hidden_size, num_labels):
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
    
    classifier = nn.Sequential(
        nn.Linear(model.config.hidden_size, hidden_size),
        nn.BatchNorm1d(hidden_size),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(hidden_size, num_labels)
    )
    
    model.classifier = classifier
    
    return model

# Train and validate the model for one epoch
def train_epoch(model, train_dataloader, val_dataloader, optimizer, loss_fn, device):
    model.train()
    total_train_loss = 0
    correct_predictions_train = 0
    total_train_samples = 0
    
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_train_loss += loss.item()

        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=1)
        correct_predictions_train += torch.sum(predicted_labels == labels).item()
        total_train_samples += labels.size(0)

        loss.backward()
        optimizer.step()

    average_train_loss = total_train_loss / len(train_dataloader)
    train_accuracy = correct_predictions_train / total_train_samples

    model.eval()
    total_val_loss = 0
    correct_predictions_val = 0
    total_val_samples = 0

    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            predicted_labels = torch.argmax(logits, dim=1)
            correct_predictions_val += torch.sum(predicted_labels == labels).item()
            total_val_samples += labels.size(0)

            val_loss = loss_fn(logits, labels)
            total_val_loss += val_loss.item()

    average_val_loss = total_val_loss / len(val_dataloader)
    val_accuracy = correct_predictions_val / total_val_samples

    return average_train_loss, train_accuracy, average_val_loss, val_accuracy

# Perform hyperparameter tuning
def hyperparameter_tuning(learning_rates, batch_sizes, hidden_sizes, num_epochs, max_length, weight_decay, df):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    best_accuracy = 0.0
    best_learning_rate = 0.0
    best_batch_size = 0
    best_hidden_size = 0
    best_model = None

    train_losses_all = []
    val_losses_all = []
    train_accuracies_all = []
    val_accuracies_all = []

    idx = 0

    for learning_rate in learning_rates:
        for batch_size in batch_sizes:
            for hidden_size in hidden_sizes:
                print(f"Training with learning rate: {learning_rate}, batch size: {batch_size}, hidden size: {hidden_size}")

                # Set up the model
                model = define_model(hidden_size, num_labels=2).to(device)
                optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
                loss_fn = torch.nn.CrossEntropyLoss()

                # Create the dataset and dataloaders
                dataset = CustomDataset(df, tokenizer, max_length)
                train_size = int(0.7 * len(dataset))
                val_size = len(dataset) - train_size
                train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
                train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
                val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

                # Lists to store training and validation metrics for each epoch
                train_losses = []
                val_losses = []
                train_accuracies = []
                val_accuracies = []

                # Training loop
                for epoch in range(num_epochs):
                    train_loss, train_accuracy, val_loss, val_accuracy = train_epoch(
                        model, train_dataloader, val_dataloader, optimizer, loss_fn, device
                    )

                    print(f"Epoch {epoch + 1}/{num_epochs}")
                    print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.4f}")
                    print(f"Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_accuracy:.4f}")
                    print("*****************************************************")

                    train_losses.append(train_loss)
                    val_losses.append(val_loss)
                    train_accuracies.append(train_accuracy)
                    val_accuracies.append(val_accuracy)

                # Check if the current validation accuracy is the best so far
                if val_accuracy > best_accuracy:
                    best_accuracy = val_accuracy
                    best_learning_rate = learning_rate
                    best_batch_size = batch_size
                    best_hidden_size = hidden_size
                    best_model = model.state_dict()

                   # Save the model with the best validation accuracy
                    best_model_path = f"best_model_lr_{best_learning_rate}_batch_{best_batch_size}_hidden_{best_hidden_size}.pt"
                    torch.save(model, best_model_path)

                # Store the metrics for this combination
                train_losses_all.append(train_losses)
                val_losses_all.append(val_losses)
                train_accuracies_all.append(train_accuracies)
                val_accuracies_all.append(val_accuracies)

                idx += 1
                # Set the model back to training mode
                model.train()
            print("--------------------------------------------------------------------")

    # Print the best hyperparameters and accuracy
    print(f"Best Learning Rate: {best_learning_rate}")
    print(f"Best Batch Size: {best_batch_size}")
    print(f"Best Hidden Size: {best_hidden_size}")
    print(f"Best Validation Accuracy: {best_accuracy:.4f}")

    # Return relevant information
    return best_model, best_learning_rate, best_batch_size, best_hidden_size, train_losses_all, val_losses_all, train_accuracies_all, val_accuracies_all
                   


In [18]:
# Example hyperparameter values
learning_rates = [1e-4, 2e-4, 3e-4]
batch_sizes = [8, 16, 32]
hidden_sizes = [64, 128, 256]
num_epochs = 5
max_length = 128
weight_decay = 0.01

# Call the hyperparameter tuning function
best_model, best_lr, best_batch_size, best_hidden_size, train_losses, val_losses, train_accuracies, val_accuracies = hyperparameter_tuning(
    learning_rates, batch_sizes, hidden_sizes, num_epochs, max_length, weight_decay, df
)



Training with learning rate: 0.0001, batch size: 8, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6328 | Train Accuracy: 0.6353
Validation Loss: 0.7148 | Validation Accuracy: 0.1707
*****************************************************
Epoch 2/5
Train Loss: 0.5570 | Train Accuracy: 0.7481
Validation Loss: 0.2924 | Validation Accuracy: 0.8753
*****************************************************
Epoch 3/5
Train Loss: 0.3328 | Train Accuracy: 0.9041
Validation Loss: 0.2212 | Validation Accuracy: 0.9453
*****************************************************
Epoch 4/5
Train Loss: 0.2615 | Train Accuracy: 0.9370
Validation Loss: 0.2198 | Validation Accuracy: 0.9562
*****************************************************
Epoch 5/5
Train Loss: 0.2015 | Train Accuracy: 0.9615
Validation Loss: 0.1396 | Validation Accuracy: 0.9803
*****************************************************
Training with learning rate: 0.0001, batch size: 8, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5494 | Train Accuracy: 0.7105
Validation Loss: 0.2938 | Validation Accuracy: 0.9300
*****************************************************
Epoch 2/5
Train Loss: 0.4184 | Train Accuracy: 0.8496
Validation Loss: 0.4398 | Validation Accuracy: 0.8315
*****************************************************
Epoch 3/5
Train Loss: 0.3149 | Train Accuracy: 0.8966
Validation Loss: 0.3303 | Validation Accuracy: 0.9059
*****************************************************
Epoch 4/5
Train Loss: 0.1996 | Train Accuracy: 0.9539
Validation Loss: 0.2353 | Validation Accuracy: 0.9475
*****************************************************
Epoch 5/5
Train Loss: 0.1306 | Train Accuracy: 0.9803
Validation Loss: 0.1416 | Validation Accuracy: 0.9869
*****************************************************
Training with learning rate: 0.0001, batch size: 8, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5940 | Train Accuracy: 0.6889
Validation Loss: 0.4949 | Validation Accuracy: 0.8293
*****************************************************
Epoch 2/5
Train Loss: 0.4877 | Train Accuracy: 0.8233
Validation Loss: 0.5140 | Validation Accuracy: 0.8293
*****************************************************
Epoch 3/5
Train Loss: 0.4847 | Train Accuracy: 0.8299
Validation Loss: 0.6212 | Validation Accuracy: 0.8293
*****************************************************
Epoch 4/5
Train Loss: 0.4802 | Train Accuracy: 0.8252
Validation Loss: 0.5245 | Validation Accuracy: 0.8293
*****************************************************
Epoch 5/5
Train Loss: 0.4935 | Train Accuracy: 0.8327
Validation Loss: 0.5728 | Validation Accuracy: 0.8293
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0001, batch size: 16, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.4675 | Train Accuracy: 0.8083
Validation Loss: 0.2892 | Validation Accuracy: 0.9540
*****************************************************
Epoch 2/5
Train Loss: 0.2824 | Train Accuracy: 0.9605
Validation Loss: 0.1932 | Validation Accuracy: 0.9912
*****************************************************
Epoch 3/5
Train Loss: 0.1810 | Train Accuracy: 0.9887
Validation Loss: 0.0987 | Validation Accuracy: 0.9956
*****************************************************
Epoch 4/5
Train Loss: 0.1664 | Train Accuracy: 0.9821
Validation Loss: 0.0982 | Validation Accuracy: 0.9847
*****************************************************
Epoch 5/5
Train Loss: 0.1599 | Train Accuracy: 0.9784
Validation Loss: 0.0830 | Validation Accuracy: 0.9891
*****************************************************
Training with learning rate: 0.0001, batch size: 16, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.4604 | Train Accuracy: 0.7820
Validation Loss: 0.0895 | Validation Accuracy: 0.9847
*****************************************************
Epoch 2/5
Train Loss: 0.1641 | Train Accuracy: 0.9737
Validation Loss: 0.2462 | Validation Accuracy: 0.9694
*****************************************************
Epoch 3/5
Train Loss: 0.1586 | Train Accuracy: 0.9671
Validation Loss: 0.0786 | Validation Accuracy: 0.9803
*****************************************************
Epoch 4/5
Train Loss: 0.2693 | Train Accuracy: 0.9173
Validation Loss: 0.1701 | Validation Accuracy: 0.9781
*****************************************************
Epoch 5/5
Train Loss: 0.0706 | Train Accuracy: 0.9953
Validation Loss: 0.1138 | Validation Accuracy: 0.9869
*****************************************************
Training with learning rate: 0.0001, batch size: 16, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5758 | Train Accuracy: 0.7049
Validation Loss: 0.4647 | Validation Accuracy: 0.8337
*****************************************************
Epoch 2/5
Train Loss: 0.4979 | Train Accuracy: 0.8045
Validation Loss: 0.6317 | Validation Accuracy: 0.8337
*****************************************************
Epoch 3/5
Train Loss: 0.4866 | Train Accuracy: 0.8224
Validation Loss: 0.6247 | Validation Accuracy: 0.8337
*****************************************************
Epoch 4/5
Train Loss: 0.4932 | Train Accuracy: 0.8299
Validation Loss: 0.5740 | Validation Accuracy: 0.8337
*****************************************************
Epoch 5/5
Train Loss: 0.4819 | Train Accuracy: 0.8261
Validation Loss: 0.6347 | Validation Accuracy: 0.8337
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0001, batch size: 32, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5399 | Train Accuracy: 0.7547
Validation Loss: 0.5645 | Validation Accuracy: 0.8337
*****************************************************
Epoch 2/5
Train Loss: 0.3737 | Train Accuracy: 0.8835
Validation Loss: 0.1420 | Validation Accuracy: 0.9540
*****************************************************
Epoch 3/5
Train Loss: 0.1922 | Train Accuracy: 0.9765
Validation Loss: 0.1546 | Validation Accuracy: 0.9781
*****************************************************
Epoch 4/5
Train Loss: 0.1562 | Train Accuracy: 0.9878
Validation Loss: 0.1383 | Validation Accuracy: 0.9891
*****************************************************
Epoch 5/5
Train Loss: 0.1250 | Train Accuracy: 0.9944
Validation Loss: 0.0761 | Validation Accuracy: 0.9912
*****************************************************
Training with learning rate: 0.0001, batch size: 32, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6873 | Train Accuracy: 0.5949
Validation Loss: 0.5336 | Validation Accuracy: 0.8315
*****************************************************
Epoch 2/5
Train Loss: 0.5735 | Train Accuracy: 0.7321
Validation Loss: 0.4004 | Validation Accuracy: 0.8403
*****************************************************
Epoch 3/5
Train Loss: 0.3459 | Train Accuracy: 0.8919
Validation Loss: 0.2266 | Validation Accuracy: 0.9606
*****************************************************
Epoch 4/5
Train Loss: 0.1314 | Train Accuracy: 0.9887
Validation Loss: 0.2358 | Validation Accuracy: 0.9628
*****************************************************
Epoch 5/5
Train Loss: 0.1404 | Train Accuracy: 0.9840
Validation Loss: 0.0735 | Validation Accuracy: 0.9934
*****************************************************
Training with learning rate: 0.0001, batch size: 32, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6207 | Train Accuracy: 0.6297
Validation Loss: 0.3421 | Validation Accuracy: 0.9190
*****************************************************
Epoch 2/5
Train Loss: 0.1864 | Train Accuracy: 0.9737
Validation Loss: 0.0789 | Validation Accuracy: 0.9891
*****************************************************
Epoch 3/5
Train Loss: 0.1500 | Train Accuracy: 0.9756
Validation Loss: 0.0728 | Validation Accuracy: 0.9912
*****************************************************
Epoch 4/5
Train Loss: 0.1225 | Train Accuracy: 0.9812
Validation Loss: 0.0701 | Validation Accuracy: 0.9869
*****************************************************
Epoch 5/5
Train Loss: 0.0591 | Train Accuracy: 0.9972
Validation Loss: 0.0561 | Validation Accuracy: 0.9912
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0002, batch size: 8, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5966 | Train Accuracy: 0.7021
Validation Loss: 0.5484 | Validation Accuracy: 0.8249
*****************************************************
Epoch 2/5
Train Loss: 0.5063 | Train Accuracy: 0.8139
Validation Loss: 0.5974 | Validation Accuracy: 0.8249
*****************************************************
Epoch 3/5
Train Loss: 0.5036 | Train Accuracy: 0.8280
Validation Loss: 0.5805 | Validation Accuracy: 0.8249
*****************************************************
Epoch 4/5
Train Loss: 0.4681 | Train Accuracy: 0.8318
Validation Loss: 0.5043 | Validation Accuracy: 0.8249
*****************************************************
Epoch 5/5
Train Loss: 0.4712 | Train Accuracy: 0.8365
Validation Loss: 0.5339 | Validation Accuracy: 0.8249
*****************************************************
Training with learning rate: 0.0002, batch size: 8, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5954 | Train Accuracy: 0.6842
Validation Loss: 0.5147 | Validation Accuracy: 0.8118
*****************************************************
Epoch 2/5
Train Loss: 0.4775 | Train Accuracy: 0.8271
Validation Loss: 0.6046 | Validation Accuracy: 0.8118
*****************************************************
Epoch 3/5
Train Loss: 0.4706 | Train Accuracy: 0.8365
Validation Loss: 0.5591 | Validation Accuracy: 0.8118
*****************************************************
Epoch 4/5
Train Loss: 0.4688 | Train Accuracy: 0.8402
Validation Loss: 0.5618 | Validation Accuracy: 0.8118
*****************************************************
Epoch 5/5
Train Loss: 0.4702 | Train Accuracy: 0.8412
Validation Loss: 0.5345 | Validation Accuracy: 0.8118
*****************************************************
Training with learning rate: 0.0002, batch size: 8, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5281 | Train Accuracy: 0.7820
Validation Loss: 0.5462 | Validation Accuracy: 0.8315
*****************************************************
Epoch 2/5
Train Loss: 0.4882 | Train Accuracy: 0.8336
Validation Loss: 0.4517 | Validation Accuracy: 0.8315
*****************************************************
Epoch 3/5
Train Loss: 0.5053 | Train Accuracy: 0.8261
Validation Loss: 0.5274 | Validation Accuracy: 0.8315
*****************************************************
Epoch 4/5
Train Loss: 0.4866 | Train Accuracy: 0.8289
Validation Loss: 0.5321 | Validation Accuracy: 0.8315
*****************************************************
Epoch 5/5
Train Loss: 0.4924 | Train Accuracy: 0.8299
Validation Loss: 0.5633 | Validation Accuracy: 0.8315
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0002, batch size: 16, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.7191 | Train Accuracy: 0.5395
Validation Loss: 0.6257 | Validation Accuracy: 0.8293
*****************************************************
Epoch 2/5
Train Loss: 0.5884 | Train Accuracy: 0.7039
Validation Loss: 0.6180 | Validation Accuracy: 0.8293
*****************************************************
Epoch 3/5
Train Loss: 0.5395 | Train Accuracy: 0.7829
Validation Loss: 0.6188 | Validation Accuracy: 0.8293
*****************************************************
Epoch 4/5
Train Loss: 0.5171 | Train Accuracy: 0.8130
Validation Loss: 0.5339 | Validation Accuracy: 0.8293
*****************************************************
Epoch 5/5
Train Loss: 0.4937 | Train Accuracy: 0.8271
Validation Loss: 0.5788 | Validation Accuracy: 0.8293
*****************************************************
Training with learning rate: 0.0002, batch size: 16, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6246 | Train Accuracy: 0.6570
Validation Loss: 0.5447 | Validation Accuracy: 0.7484
*****************************************************
Epoch 2/5
Train Loss: 0.4220 | Train Accuracy: 0.8496
Validation Loss: 0.7973 | Validation Accuracy: 0.4967
*****************************************************
Epoch 3/5
Train Loss: 0.4805 | Train Accuracy: 0.8167
Validation Loss: 0.5115 | Validation Accuracy: 0.8293
*****************************************************
Epoch 4/5
Train Loss: 0.4933 | Train Accuracy: 0.8214
Validation Loss: 0.4492 | Validation Accuracy: 0.8293
*****************************************************
Epoch 5/5
Train Loss: 0.4827 | Train Accuracy: 0.8289
Validation Loss: 0.5004 | Validation Accuracy: 0.8293
*****************************************************
Training with learning rate: 0.0002, batch size: 16, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5142 | Train Accuracy: 0.7434
Validation Loss: 0.2290 | Validation Accuracy: 0.9475
*****************************************************
Epoch 2/5
Train Loss: 0.2330 | Train Accuracy: 0.9267
Validation Loss: 0.0338 | Validation Accuracy: 0.9891
*****************************************************
Epoch 3/5
Train Loss: 0.2217 | Train Accuracy: 0.9211
Validation Loss: 0.1822 | Validation Accuracy: 0.9125
*****************************************************
Epoch 4/5
Train Loss: 0.1823 | Train Accuracy: 0.9483
Validation Loss: 0.4520 | Validation Accuracy: 0.8403
*****************************************************
Epoch 5/5
Train Loss: 0.5177 | Train Accuracy: 0.8318
Validation Loss: 0.4436 | Validation Accuracy: 0.8403
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0002, batch size: 32, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.7189 | Train Accuracy: 0.5498
Validation Loss: 3.1446 | Validation Accuracy: 0.1707
*****************************************************
Epoch 2/5
Train Loss: 0.4606 | Train Accuracy: 0.8308
Validation Loss: 0.2298 | Validation Accuracy: 0.9431
*****************************************************
Epoch 3/5
Train Loss: 0.2941 | Train Accuracy: 0.9549
Validation Loss: 0.3779 | Validation Accuracy: 0.9781
*****************************************************
Epoch 4/5
Train Loss: 0.2294 | Train Accuracy: 0.9859
Validation Loss: 0.2117 | Validation Accuracy: 0.9759
*****************************************************
Epoch 5/5
Train Loss: 0.2005 | Train Accuracy: 0.9784
Validation Loss: 0.2605 | Validation Accuracy: 0.9606
*****************************************************
Training with learning rate: 0.0002, batch size: 32, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.4458 | Train Accuracy: 0.8214
Validation Loss: 0.1901 | Validation Accuracy: 0.9584
*****************************************************
Epoch 2/5
Train Loss: 0.2272 | Train Accuracy: 0.9511
Validation Loss: 0.2128 | Validation Accuracy: 0.9497
*****************************************************
Epoch 3/5
Train Loss: 0.2161 | Train Accuracy: 0.9455
Validation Loss: 0.2291 | Validation Accuracy: 0.9344
*****************************************************
Epoch 4/5
Train Loss: 0.1889 | Train Accuracy: 0.9521
Validation Loss: 0.1172 | Validation Accuracy: 0.9716
*****************************************************
Epoch 5/5
Train Loss: 0.1411 | Train Accuracy: 0.9690
Validation Loss: 0.0514 | Validation Accuracy: 0.9847
*****************************************************
Training with learning rate: 0.0002, batch size: 32, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.4852 | Train Accuracy: 0.7566
Validation Loss: 0.5710 | Validation Accuracy: 0.8228
*****************************************************
Epoch 2/5
Train Loss: 0.1697 | Train Accuracy: 0.9652
Validation Loss: 0.0428 | Validation Accuracy: 0.9934
*****************************************************
Epoch 3/5
Train Loss: 0.3300 | Train Accuracy: 0.8872
Validation Loss: 0.7724 | Validation Accuracy: 0.8118
*****************************************************
Epoch 4/5
Train Loss: 0.4773 | Train Accuracy: 0.8374
Validation Loss: 0.6564 | Validation Accuracy: 0.8118
*****************************************************
Epoch 5/5
Train Loss: 0.4697 | Train Accuracy: 0.8318
Validation Loss: 0.6653 | Validation Accuracy: 0.8118
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0003, batch size: 8, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5841 | Train Accuracy: 0.7171
Validation Loss: 0.6485 | Validation Accuracy: 0.7987
*****************************************************
Epoch 2/5
Train Loss: 0.4883 | Train Accuracy: 0.8280
Validation Loss: 0.6074 | Validation Accuracy: 0.7987
*****************************************************
Epoch 3/5
Train Loss: 0.4731 | Train Accuracy: 0.8412
Validation Loss: 0.4981 | Validation Accuracy: 0.7987
*****************************************************
Epoch 4/5
Train Loss: 0.4578 | Train Accuracy: 0.8440
Validation Loss: 0.5904 | Validation Accuracy: 0.7987
*****************************************************
Epoch 5/5
Train Loss: 0.4670 | Train Accuracy: 0.8430
Validation Loss: 0.5573 | Validation Accuracy: 0.7987
*****************************************************
Training with learning rate: 0.0003, batch size: 8, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5696 | Train Accuracy: 0.7368
Validation Loss: 0.5980 | Validation Accuracy: 0.8359
*****************************************************
Epoch 2/5
Train Loss: 0.4935 | Train Accuracy: 0.8233
Validation Loss: 0.5791 | Validation Accuracy: 0.8359
*****************************************************
Epoch 3/5
Train Loss: 0.4951 | Train Accuracy: 0.8261
Validation Loss: 0.6025 | Validation Accuracy: 0.8359
*****************************************************
Epoch 4/5
Train Loss: 0.4977 | Train Accuracy: 0.8261
Validation Loss: 0.5531 | Validation Accuracy: 0.8359
*****************************************************
Epoch 5/5
Train Loss: 0.4836 | Train Accuracy: 0.8308
Validation Loss: 0.5049 | Validation Accuracy: 0.8359
*****************************************************
Training with learning rate: 0.0003, batch size: 8, hidden size: 256


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 1302540d-33e6-49da-ad4a-7c9c864888b0)')' thrown while requesting HEAD https://huggingface.co/bert-base-uncased/resolve/main/config.json
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5618 | Train Accuracy: 0.7538
Validation Loss: 0.4715 | Validation Accuracy: 0.8293
*****************************************************
Epoch 2/5
Train Loss: 0.4790 | Train Accuracy: 0.8289
Validation Loss: 0.5734 | Validation Accuracy: 0.8293
*****************************************************
Epoch 3/5
Train Loss: 0.4834 | Train Accuracy: 0.8299
Validation Loss: 0.4934 | Validation Accuracy: 0.8293
*****************************************************
Epoch 4/5
Train Loss: 0.4821 | Train Accuracy: 0.8299
Validation Loss: 0.5574 | Validation Accuracy: 0.8293
*****************************************************
Epoch 5/5
Train Loss: 0.4731 | Train Accuracy: 0.8327
Validation Loss: 0.4536 | Validation Accuracy: 0.8293
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0003, batch size: 16, hidden size: 64


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 92c5ca7d-a362-4303-929c-6e97c8723596)')' thrown while requesting HEAD https://huggingface.co/bert-base-uncased/resolve/main/config.json
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6614 | Train Accuracy: 0.6165
Validation Loss: 0.4521 | Validation Accuracy: 0.8534
*****************************************************
Epoch 2/5
Train Loss: 0.5606 | Train Accuracy: 0.7707
Validation Loss: 0.5604 | Validation Accuracy: 0.8534
*****************************************************
Epoch 3/5
Train Loss: 0.4975 | Train Accuracy: 0.8111
Validation Loss: 0.4529 | Validation Accuracy: 0.8534
*****************************************************
Epoch 4/5
Train Loss: 0.5191 | Train Accuracy: 0.7998
Validation Loss: 0.6819 | Validation Accuracy: 0.8534
*****************************************************
Epoch 5/5
Train Loss: 0.5020 | Train Accuracy: 0.8130
Validation Loss: 0.5449 | Validation Accuracy: 0.8534
*****************************************************
Training with learning rate: 0.0003, batch size: 16, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5760 | Train Accuracy: 0.7274
Validation Loss: 0.4746 | Validation Accuracy: 0.8359
*****************************************************
Epoch 2/5
Train Loss: 0.4887 | Train Accuracy: 0.8177
Validation Loss: 0.4510 | Validation Accuracy: 0.8359
*****************************************************
Epoch 3/5
Train Loss: 0.4798 | Train Accuracy: 0.8271
Validation Loss: 0.5738 | Validation Accuracy: 0.8359
*****************************************************
Epoch 4/5
Train Loss: 0.4836 | Train Accuracy: 0.8289
Validation Loss: 0.5623 | Validation Accuracy: 0.8359
*****************************************************
Epoch 5/5
Train Loss: 0.4956 | Train Accuracy: 0.8261
Validation Loss: 0.5144 | Validation Accuracy: 0.8359
*****************************************************
Training with learning rate: 0.0003, batch size: 16, hidden size: 256


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.5166 | Train Accuracy: 0.7697
Validation Loss: 1.0648 | Validation Accuracy: 0.8162
*****************************************************
Epoch 2/5
Train Loss: 0.4655 | Train Accuracy: 0.8374
Validation Loss: 0.5349 | Validation Accuracy: 0.8162
*****************************************************
Epoch 3/5
Train Loss: 0.4699 | Train Accuracy: 0.8383
Validation Loss: 0.5832 | Validation Accuracy: 0.8162
*****************************************************
Epoch 4/5
Train Loss: 0.4723 | Train Accuracy: 0.8393
Validation Loss: 0.4731 | Validation Accuracy: 0.8162
*****************************************************
Epoch 5/5
Train Loss: 0.4713 | Train Accuracy: 0.8355
Validation Loss: 0.5403 | Validation Accuracy: 0.8162
*****************************************************
--------------------------------------------------------------------
Training with learning rate: 0.0003, batch size: 32, hidden size: 64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6298 | Train Accuracy: 0.6466
Validation Loss: 0.6602 | Validation Accuracy: 0.8381
*****************************************************
Epoch 2/5
Train Loss: 0.5497 | Train Accuracy: 0.7641
Validation Loss: 0.4910 | Validation Accuracy: 0.8381
*****************************************************
Epoch 3/5
Train Loss: 0.5205 | Train Accuracy: 0.8148
Validation Loss: 0.4478 | Validation Accuracy: 0.8381
*****************************************************
Epoch 4/5
Train Loss: 0.5140 | Train Accuracy: 0.8205
Validation Loss: 0.4763 | Validation Accuracy: 0.8381
*****************************************************
Epoch 5/5
Train Loss: 0.5202 | Train Accuracy: 0.8233
Validation Loss: 0.4477 | Validation Accuracy: 0.8381
*****************************************************
Training with learning rate: 0.0003, batch size: 32, hidden size: 128


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5
Train Loss: 0.6067 | Train Accuracy: 0.6795
Validation Loss: 0.6225 | Validation Accuracy: 0.8534
*****************************************************
Epoch 2/5
Train Loss: 0.5288 | Train Accuracy: 0.7942
Validation Loss: 0.5285 | Validation Accuracy: 0.8534
*****************************************************
Epoch 3/5
Train Loss: 0.5080 | Train Accuracy: 0.8148
Validation Loss: 0.5395 | Validation Accuracy: 0.8534
*****************************************************
Epoch 4/5
Train Loss: 0.4982 | Train Accuracy: 0.8186
Validation Loss: 0.5016 | Validation Accuracy: 0.8534
*****************************************************
Epoch 5/5
Train Loss: 0.5025 | Train Accuracy: 0.8186
Validation Loss: 0.5654 | Validation Accuracy: 0.8534
*****************************************************
Training with learning rate: 0.0003, batch size: 32, hidden size: 256


SSLError: (MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-uncased/resolve/main/config.json (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))"), '(Request ID: ec4f54d8-e891-4a8a-b785-058762dd5bd9)')

In [36]:
# Determine the index of the best hyperparameter combination
best_idx = np.argmax(val_accuracies)

# Plotting the best combination
plt.figure(figsize=(12, 6))

# Plot training and validation accuracy
plt.subplot(1, 2, 1)
plt.plot(train_accuracies[best_idx], label='Train Accuracy', marker='o')
plt.plot(val_accuracies[best_idx], label='Validation Accuracy', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train and Validation Accuracy')
plt.legend()

# Plot training and validation loss
plt.subplot(1, 2, 2)
plt.plot(train_losses[best_idx], label='Train Loss', marker='o')
plt.plot(val_losses[best_idx], label='Validation Loss', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train and Validation Loss')
plt.legend()

# Save or display the plots
plt.tight_layout()
plt.show()


NameError: name 'val_accuracies' is not defined

In [28]:

# Specify the path to the saved model
best_model_path = 'best_model_lr_0.0001_batch_32_hidden_128.pt'

# Load the model
best_model = torch.load(best_model_path)

# Ensure the model is in evaluation mode
best_model.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_model.to(device)


test_df = pd.read_csv('test_dataset.csv')

# Set the tokenizer and the model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Prepare the test data
test_inputs = tokenizer.batch_encode_plus(
    test_df[['premise', 'hypothesis']].values.tolist(),
    max_length=max_length,
    padding='max_length',
    truncation=True,
    return_tensors='pt'
)

test_input_ids = test_inputs['input_ids'].to(device)
test_attention_mask = test_inputs['attention_mask'].to(device)
test_labels = torch.tensor(test_df['label'].tolist()).to(device)

# Evaluate the model on the test dataset
best_model.eval()
with torch.no_grad():
    outputs = best_model(test_input_ids, attention_mask=test_attention_mask)
    logits = outputs.logits

    predicted_labels = torch.argmax(logits, dim=1)
    predicted_labels = predicted_labels.cpu().numpy()
    test_labels = test_labels.cpu().numpy()

# Calculate TP, TN, FP, FN
TP = np.sum((test_labels == 1) & (predicted_labels == 1))
TN = np.sum((test_labels == 0) & (predicted_labels == 0))
FP = np.sum((test_labels == 0) & (predicted_labels == 1))
FN = np.sum((test_labels == 1) & (predicted_labels == 0))

# Print the confusion matrix
print("Confusion Matrix:")
print("          Predicted 0   Predicted 1")
print(f"Actual 0     {TN}            {FP}")
print(f"Actual 1     {FN}            {TP}")

TP=TP.item()
TN=TN.item()
FP=FP.item()
FN=FN.item()

precision = TP/(TP+FP)
recall = TP/(TP+FN)
accuracy = (TP+TN)/(TP+TN+FP+FN)
f1 = (2*precision*accuracy)/(precision+accuracy)

print("Classification report: ")
print('Precision: ', precision)
print('Recall: ', recall)
print('Test accuracy: ', accuracy)
print('f1 score: ', f1)


Confusion Matrix:
          Predicted 0   Predicted 1
Actual 0     317            0
Actual 1     2            61
Classification report: 
Precision:  1.0
Recall:  0.9682539682539683
Test accuracy:  0.9947368421052631
f1 score:  0.9973614775725593


In [35]:

device = torch.device('cpu')
best_model.to(device)

# Input premise and hypothesis (modify as needed)
premise = input("Enter the premise: ")
hypothesis = input("Enter the hypothesis: ")

# Tokenize and encode the input
inputs = tokenizer(premise, hypothesis, return_tensors="pt")

# Make the prediction
with torch.no_grad():
    logits = best_model(**inputs).logits

# Determine the predicted class
predicted_class = torch.argmax(logits, dim=1).item()

# Map the predicted class to human-readable labels
class_labels = ["Not Satisfying", "Satisfying"]
predicted_label = class_labels[predicted_class]

# Print the prediction
print(f"Prediction: {predicted_label}")

Enter the premise:  Vardan is doing hard work this semester
Enter the hypothesis:  This semester Vardan is working hard


Prediction: Satisfying
