## import librairies


In [1]:
!pip install transformers
!pip install scikit-learn
!pip install pandas


[0m

In [2]:
import pandas as pd
import time  
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


In [3]:
import pandas as pd
df_final = pd.read_csv('/workspace/data/final_diag10.csv')
#batching data into samples
df_final1 = df_final.sample(frac=0.3)
df_final2 = df_final.sample(frac=0.7)
df_final3 = df_final.sample(frac=1)
#checking size
print(df_final1.shape)
print(df_final2.shape)
print(df_final3.shape)


(10111, 3)
(23593, 3)
(33704, 3)


In [4]:
# Load 30% of the dataset
data1 = df_final1.copy()
# Load 70% of the dataset
data2 = df_final2.copy()
# Load full dataset
data3 = df_final3.copy()
#train/test split was 80/20%
# Split the dataset into training and testing sets
train_df1, test_df1 = train_test_split(data1, test_size=0.2, random_state=42)
train_df2, test_df2 = train_test_split(data2, test_size=0.2, random_state=42)
train_df3, test_df3 = train_test_split(data3, test_size=0.2, random_state=42)


In [5]:
from transformers import BertTokenizer

# Load the ClinicalBERT tokenizer
tokenizer = BertTokenizer.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')

def tokenize_texts(texts):
    return tokenizer(texts, padding='max_length', truncation=True, max_length=512, return_tensors='pt')

train_encodings1 = tokenize_texts(train_df1['clean_text'].tolist())
test_encodings1 = tokenize_texts(test_df1['clean_text'].tolist())
train_encodings2 = tokenize_texts(train_df2['clean_text'].tolist())
test_encodings2 = tokenize_texts(test_df2['clean_text'].tolist())
train_encodings3 = tokenize_texts(train_df3['clean_text'].tolist())
test_encodings3 = tokenize_texts(test_df3['clean_text'].tolist())


## Working on first sample=30% dataset

In [6]:
from torch.utils.data import Dataset, DataLoader

class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = TextDataset(train_encodings1, train_df1['labels'].tolist())
test_dataset = TextDataset(test_encodings1, test_df1['labels'].tolist())

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [7]:
##Setting models definition
import torch.nn as nn
from transformers import BertModel

class SimpleRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        return self.fc(output[:, -1, :])

class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        return self.fc(output[:, -1, :])

class BERTForClassification(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        self.bert = BertModel.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')
        self.fc = nn.Linear(self.bert.config.hidden_size, output_dim)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        return self.fc(outputs['pooler_output'])


In [8]:
#RNN Tuned MODEL FOR SAMPLE 1
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedRNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.4):
        super(AdjustedRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)  # Use nn.RNN instead of nn.LSTM
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        rnn_out, _ = self.rnn(embedded)  # Use rnn_out instead of lstm_out
        # Use the last hidden state for classification
        output = self.fc(self.dropout(rnn_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedRNNModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.4).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.00002)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    # Measure the time at the start of the epoch
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report1 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    #print(report)
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report1)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report1)  # Print the classification report after the last epoch




Epoch: 0, Training Time: 4.541696548461914 seconds, Testing Time: 0.5304920673370361 seconds
Epoch: 1, Training Time: 4.363293886184692 seconds, Testing Time: 0.5299208164215088 seconds
Epoch: 2, Training Time: 4.361943960189819 seconds, Testing Time: 0.5304784774780273 seconds
Epoch: 3, Training Time: 4.3600006103515625 seconds, Testing Time: 0.5308542251586914 seconds
Epoch: 4, Training Time: 4.360978841781616 seconds, Testing Time: 0.5304121971130371 seconds
Epoch: 5, Training Time: 4.3587048053741455 seconds, Testing Time: 0.5310385227203369 seconds
Epoch: 6, Training Time: 4.361249923706055 seconds, Testing Time: 0.5299441814422607 seconds
Epoch: 7, Training Time: 4.359501123428345 seconds, Testing Time: 0.5310101509094238 seconds
Epoch: 8, Training Time: 4.359088182449341 seconds, Testing Time: 0.5295190811157227 seconds
Epoch: 9, Training Time: 4.359435558319092 seconds, Testing Time: 0.530311107635498 seconds
Training completed.
              precision    recall  f1-score   sup

In [9]:
##LSTM tined MODEL FOR SAMPLE 1
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class AdjustedLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification
        output = self.fc(self.dropout(lstm_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report2 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report2)
            
            break  # Exit the loop if patience_counter exceeds the limit
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report2)  # Print the classification report after the last epoch




Epoch: 0, Training Time: 13.198179006576538 seconds, Testing Time: 1.5351340770721436 seconds
Epoch: 1, Training Time: 13.199532508850098 seconds, Testing Time: 1.5354061126708984 seconds
Epoch: 2, Training Time: 13.208195447921753 seconds, Testing Time: 1.535597324371338 seconds
Epoch: 3, Training Time: 13.208856344223022 seconds, Testing Time: 1.5362322330474854 seconds
Epoch: 4, Training Time: 13.207075595855713 seconds, Testing Time: 1.5366137027740479 seconds
Epoch: 5, Training Time: 13.207062482833862 seconds, Testing Time: 1.53670334815979 seconds
Epoch: 6, Training Time: 13.211395263671875 seconds, Testing Time: 1.5366365909576416 seconds
Epoch: 7, Training Time: 13.210636615753174 seconds, Testing Time: 1.5361075401306152 seconds
Early stopping
              precision    recall  f1-score   support

           0       0.42      0.14      0.21       596
           1       0.44      0.32      0.37       207
           2       0.59      0.39      0.47       221
           3       

In [10]:
##BiLSTM tuned MODEL FOR SAMPLE 1
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau

class AdjustedBiLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedBiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Add this line to store the hidden_dim as an instance variable
        self.hidden_dim = hidden_dim
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(2*hidden_dim, output_dim)  # Multiply by 2 because it's bidirectional
        self.dropout = nn.Dropout(dropout)



    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification. We concatenate the last hidden state from both directions
        output = self.fc(self.dropout(torch.cat((lstm_out[:, -1, :self.hidden_dim], lstm_out[:, 0, self.hidden_dim:]), dim=1)))
        return output



# Instantiate model
model = AdjustedBiLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)

#model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):

    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report3 = classification_report(true_labels, predictions)
    #print(f"Epoch: {epoch}, F1 Score: {val_f1}")
            # Measure the time at the end of the epoch and calculate the total epoch time
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report3)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report3)  # Print the classification report after the last epoch



Epoch: 0, Training Time: 14.084446668624878 seconds, Testing Time: 1.534104585647583 seconds
Epoch: 1, Training Time: 14.099526643753052 seconds, Testing Time: 1.5337960720062256 seconds
Epoch: 2, Training Time: 14.101390361785889 seconds, Testing Time: 1.5349996089935303 seconds
Epoch: 3, Training Time: 14.145069599151611 seconds, Testing Time: 1.5351905822753906 seconds
Epoch: 4, Training Time: 14.132375955581665 seconds, Testing Time: 1.5357341766357422 seconds
Epoch: 5, Training Time: 14.127297401428223 seconds, Testing Time: 1.5351934432983398 seconds
Epoch: 6, Training Time: 14.126686096191406 seconds, Testing Time: 1.534231424331665 seconds
Epoch: 7, Training Time: 14.122629642486572 seconds, Testing Time: 1.535069465637207 seconds
Epoch: 8, Training Time: 14.112920761108398 seconds, Testing Time: 1.5343022346496582 seconds
Epoch: 9, Training Time: 14.121893644332886 seconds, Testing Time: 1.5345916748046875 seconds
Training completed.
              precision    recall  f1-score

In [11]:
##BERT MODEL FOR SAMPLE 1
import torch.optim as optim
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BERTForClassification(10).to(device)  # replace NUM_CLASSES with the number of unique labels in your dataset
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# Early stopping parameters
patience = 3
best_valid_loss = float('inf')
counter = 0

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        criterion = nn.CrossEntropyLoss()
        inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
        outputs = model(inputs, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    # Validation phase
    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    for batch in test_loader:
        with torch.no_grad():
            inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
            outputs = model(inputs, attention_mask)
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    report4 = classification_report(true_labels, predictions)
    #print(f"Epoch: {epoch}, F1 Score for sample1: {val_f1}")
    # Measure the time at the end of the epoch and calculate the total epoch time
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    # Early stopping logic
    if val_f1 < best_valid_loss:
        best_valid_loss = val_f1
        counter = 0
        #torch.save(model.state_dict(), 'best_model_bert2.pkl')  # Save the model
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered.")
            print(report4)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report4)  # Print the classification report after the last epoch



Epoch: 0, Training Time: 945.988831281662 seconds, Testing Time: 88.88681030273438 seconds
Epoch: 1, Training Time: 1067.7041203975677 seconds, Testing Time: 104.17880487442017 seconds
Epoch: 2, Training Time: 1156.3965866565704 seconds, Testing Time: 116.53806471824646 seconds
Epoch: 3, Training Time: 1264.6754972934723 seconds, Testing Time: 186.50725984573364 seconds
Early stopping triggered.
              precision    recall  f1-score   support

           0       0.85      0.81      0.83       596
           1       0.80      0.68      0.74       207
           2       0.78      0.92      0.84       221
           3       0.92      0.82      0.86       109
           4       0.85      0.96      0.90       106
           5       0.66      0.55      0.60       163
           6       0.45      0.52      0.48       126
           7       0.72      0.76      0.74       359
           8       0.83      0.85      0.84       117
           9       1.00      1.00      1.00        19

    a

## Working on first sample=70% dataset

In [12]:
train_dataset = TextDataset(train_encodings2, train_df2['labels'].tolist())
test_dataset = TextDataset(test_encodings2, test_df2['labels'].tolist())

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [13]:
#RNN Tuned MODEL FOR SAMPLE 2
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedRNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.4):
        super(AdjustedRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)  # Use nn.RNN instead of nn.LSTM
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        rnn_out, _ = self.rnn(embedded)  # Use rnn_out instead of lstm_out
        # Use the last hidden state for classification
        output = self.fc(self.dropout(rnn_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedRNNModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.4).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.00002)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    # Measure the time at the start of the epoch
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report5 = classification_report(true_labels, predictions)
    #print(f"Epoch: {epoch}, F1 Score: {val_f1}")
        # Measure the time at the end of the epoch and calculate the total epoch time
    
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report5)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report5)  # Print the classification report after the last epoch



Epoch: 0, Training Time: 57.0829803943634 seconds, Testing Time: 9.094545125961304 seconds
Epoch: 1, Training Time: 56.624675035476685 seconds, Testing Time: 9.064682722091675 seconds
Epoch: 2, Training Time: 56.65733861923218 seconds, Testing Time: 9.00195837020874 seconds
Epoch: 3, Training Time: 56.8730583190918 seconds, Testing Time: 9.017818450927734 seconds
Epoch: 4, Training Time: 56.736207485198975 seconds, Testing Time: 9.016145944595337 seconds
Epoch: 5, Training Time: 56.97301697731018 seconds, Testing Time: 9.023436784744263 seconds
Epoch: 6, Training Time: 57.03428077697754 seconds, Testing Time: 9.010648012161255 seconds
Epoch: 7, Training Time: 57.14561605453491 seconds, Testing Time: 9.0681471824646 seconds
Epoch: 8, Training Time: 57.098625898361206 seconds, Testing Time: 9.121687412261963 seconds
Epoch: 9, Training Time: 57.15428137779236 seconds, Testing Time: 9.118422746658325 seconds
Training completed.
              precision    recall  f1-score   support

       

In [14]:
##LSTM tined MODEL FOR SAMPLE 1
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class AdjustedLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification
        output = self.fc(self.dropout(lstm_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report6 = classification_report(true_labels, predictions)
    #print(f"Epoch: {epoch}, F1 Score for sample2: {val_f1}")
    # Measure the time at the end of the epoch and calculate the total epoch time
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report6)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report6)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 141.80748558044434 seconds, Testing Time: 18.859255075454712 seconds
Epoch: 1, Training Time: 141.75663423538208 seconds, Testing Time: 18.7702374458313 seconds
Epoch: 2, Training Time: 140.7245569229126 seconds, Testing Time: 18.531009912490845 seconds
Epoch: 3, Training Time: 140.87819862365723 seconds, Testing Time: 18.686228275299072 seconds
Epoch: 4, Training Time: 141.34877490997314 seconds, Testing Time: 18.936192512512207 seconds
Epoch: 5, Training Time: 141.31698036193848 seconds, Testing Time: 18.768061876296997 seconds
Epoch: 6, Training Time: 141.2521812915802 seconds, Testing Time: 18.670800924301147 seconds
Epoch: 7, Training Time: 141.54669332504272 seconds, Testing Time: 18.845343112945557 seconds
Epoch: 8, Training Time: 142.21890544891357 seconds, Testing Time: 18.731409549713135 seconds
Epoch: 9, Training Time: 147.49264073371887 seconds, Testing Time: 20.34921884536743 seconds
Training completed.
              precision    recall  f1-score  

In [15]:
##BiLSTM tuned MODEL FOR SAMPLE 1
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedBiLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedBiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Add this line to store the hidden_dim as an instance variable
        self.hidden_dim = hidden_dim
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(2*hidden_dim, output_dim)  # Multiply by 2 because it's bidirectional
        self.dropout = nn.Dropout(dropout)



    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification. We concatenate the last hidden state from both directions
        output = self.fc(self.dropout(torch.cat((lstm_out[:, -1, :self.hidden_dim], lstm_out[:, 0, self.hidden_dim:]), dim=1)))
        return output



# Instantiate model
model = AdjustedBiLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)

#model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report7 = classification_report(true_labels, predictions)
    #print(f"Epoch: {epoch}, F1 Score for sample2: {val_f1}")
    # Measure the time at the end of the epoch and calculate the total epoch time
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        #torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report7)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report7)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 178.0331208705902 seconds, Testing Time: 19.486204862594604 seconds
Epoch: 1, Training Time: 168.6105773448944 seconds, Testing Time: 19.171525955200195 seconds
Epoch: 2, Training Time: 168.56853222846985 seconds, Testing Time: 19.549827814102173 seconds
Epoch: 3, Training Time: 169.33317685127258 seconds, Testing Time: 19.34743046760559 seconds
Epoch: 4, Training Time: 168.88169169425964 seconds, Testing Time: 19.31159019470215 seconds
Epoch: 5, Training Time: 168.58642506599426 seconds, Testing Time: 19.321823120117188 seconds
Epoch: 6, Training Time: 168.25208640098572 seconds, Testing Time: 19.324920892715454 seconds
Epoch: 7, Training Time: 168.42181491851807 seconds, Testing Time: 19.4970064163208 seconds
Epoch: 8, Training Time: 167.69868302345276 seconds, Testing Time: 19.209219932556152 seconds
Epoch: 9, Training Time: 167.69934964179993 seconds, Testing Time: 19.322603940963745 seconds
Training completed.
              precision    recall  f1-score   

In [16]:
##BERT MODEL FOR SAMPLE 2
import torch.optim as optim
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BERTForClassification(10).to(device)  # replace NUM_CLASSES with the number of unique labels in your dataset
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# Early stopping parameters
patience = 3
best_valid_loss = float('inf')
counter = 0

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        criterion = nn.CrossEntropyLoss()
        inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
        outputs = model(inputs, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    # Validation phase
    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    for batch in test_loader:
        with torch.no_grad():
            inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
            outputs = model(inputs, attention_mask)
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    report8 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    # Early stopping logic
    if val_f1 < best_valid_loss:
        best_valid_loss = val_f1
        counter = 0
        #torch.save(model.state_dict(), 'best_model_bert2.pkl')  # Save the model
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered.")
            print(report8)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report8)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 5821.8458042144775 seconds, Testing Time: 523.3689856529236 seconds
Epoch: 1, Training Time: 5841.6384217739105 seconds, Testing Time: 548.2840623855591 seconds
Epoch: 2, Training Time: 2656.381799697876 seconds, Testing Time: 243.84410643577576 seconds
Epoch: 3, Training Time: 2751.931289434433 seconds, Testing Time: 266.76108479499817 seconds
Early stopping triggered.
              precision    recall  f1-score   support

           0       0.88      0.90      0.89      1360
           1       0.71      0.87      0.78       506
           2       0.90      0.77      0.83       569
           3       0.87      0.88      0.87       227
           4       0.96      0.96      0.96       267
           5       0.77      0.75      0.76       407
           6       0.78      0.69      0.73       270
           7       0.85      0.82      0.83       786
           8       0.90      0.87      0.88       283
           9       1.00      1.00      1.00        44

    ac

## working on full dataframe

In [17]:
train_dataset = TextDataset(train_encodings3, train_df3['labels'].tolist())
test_dataset = TextDataset(test_encodings3, test_df3['labels'].tolist())

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [19]:
#RNN Tuned MODEL FOR SAMPLE 3

import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedRNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.4):
        super(AdjustedRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)  # Use nn.RNN instead of nn.LSTM
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        rnn_out, _ = self.rnn(embedded)  # Use rnn_out instead of lstm_out
        # Use the last hidden state for classification
        output = self.fc(self.dropout(rnn_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedRNNModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.4).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.00002)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    # Measure the time at the start of the epoch
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report9 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), '/workspace/outputs/simplernn_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report9)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report9)  # Print the classification report after the last epoch



Epoch: 0, Training Time: 81.18975806236267 seconds, Testing Time: 12.927451133728027 seconds
Epoch: 1, Training Time: 81.82534098625183 seconds, Testing Time: 13.028075218200684 seconds
Epoch: 2, Training Time: 81.76583075523376 seconds, Testing Time: 12.9928457736969 seconds
Epoch: 3, Training Time: 82.368008852005 seconds, Testing Time: 13.084489107131958 seconds
Epoch: 4, Training Time: 82.09669613838196 seconds, Testing Time: 12.963333368301392 seconds
Epoch: 5, Training Time: 82.13156628608704 seconds, Testing Time: 13.038267374038696 seconds
Epoch: 6, Training Time: 81.87105989456177 seconds, Testing Time: 13.057200908660889 seconds
Epoch: 7, Training Time: 82.07426834106445 seconds, Testing Time: 12.942949295043945 seconds
Epoch: 8, Training Time: 82.19202995300293 seconds, Testing Time: 13.100719690322876 seconds
Epoch: 9, Training Time: 82.12648129463196 seconds, Testing Time: 12.895895004272461 seconds
Training completed.
              precision    recall  f1-score   support


In [24]:
#RNN Tuned MODEL FOR SAMPLE 3

import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedRNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.4):
        super(AdjustedRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)  # Use nn.RNN instead of nn.LSTM
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        rnn_out, _ = self.rnn(embedded)  # Use rnn_out instead of lstm_out
        # Use the last hidden state for classification
        output = self.fc(self.dropout(rnn_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedRNNModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.4).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.00002)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    # Measure the time at the start of the epoch
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report9 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), '/workspace/outputs/simplernn_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report9)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report9)  # Print the classification report after the last epoch



Epoch: 0, Training Time: 14.653367042541504 seconds, Testing Time: 1.7771806716918945 seconds
Epoch: 1, Training Time: 14.599278688430786 seconds, Testing Time: 1.7768876552581787 seconds
Epoch: 2, Training Time: 14.60213828086853 seconds, Testing Time: 1.7757465839385986 seconds
Epoch: 3, Training Time: 14.596824407577515 seconds, Testing Time: 1.7774076461791992 seconds
Epoch: 4, Training Time: 14.605844736099243 seconds, Testing Time: 1.7760374546051025 seconds
Epoch: 5, Training Time: 14.607850313186646 seconds, Testing Time: 1.7770261764526367 seconds
Epoch: 6, Training Time: 14.605615377426147 seconds, Testing Time: 1.7768173217773438 seconds
Epoch: 7, Training Time: 14.60275912284851 seconds, Testing Time: 1.7767760753631592 seconds
Early stopping
              precision    recall  f1-score   support

           0       0.37      0.33      0.35      1953
           1       0.10      0.00      0.01       705
           2       0.29      0.47      0.36       826
           3      

In [20]:
##LSTM tined MODEL FOR SAMPLE 3
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class AdjustedLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification
        output = self.fc(self.dropout(lstm_out[:, -1, :]))
        return output

# Instantiate model
model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report10 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), '/workspace/outputs/lstm_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report10)
            break
            
else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report10)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 203.02118039131165 seconds, Testing Time: 26.80011796951294 seconds
Epoch: 1, Training Time: 201.98398566246033 seconds, Testing Time: 26.68457841873169 seconds
Epoch: 2, Training Time: 202.658305644989 seconds, Testing Time: 26.826446771621704 seconds
Epoch: 3, Training Time: 203.49253058433533 seconds, Testing Time: 26.995861530303955 seconds
Epoch: 4, Training Time: 203.52809619903564 seconds, Testing Time: 26.81853699684143 seconds
Epoch: 5, Training Time: 202.4718325138092 seconds, Testing Time: 26.83898162841797 seconds
Epoch: 6, Training Time: 202.39271092414856 seconds, Testing Time: 26.576830863952637 seconds
Epoch: 7, Training Time: 202.02381110191345 seconds, Testing Time: 26.941951036453247 seconds
Epoch: 8, Training Time: 203.0194296836853 seconds, Testing Time: 26.829503059387207 seconds
Epoch: 9, Training Time: 203.93054628372192 seconds, Testing Time: 26.924782276153564 seconds
Training completed.
              precision    recall  f1-score   su

In [21]:
##BiLSTM tuned MODEL FOR SAMPLE 3
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdjustedBiLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.2):
        super(AdjustedBiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Add this line to store the hidden_dim as an instance variable
        self.hidden_dim = hidden_dim
        
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(2*hidden_dim, output_dim)  # Multiply by 2 because it's bidirectional
        self.dropout = nn.Dropout(dropout)



    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        # Use the last hidden state for classification. We concatenate the last hidden state from both directions
        output = self.fc(self.dropout(torch.cat((lstm_out[:, -1, :self.hidden_dim], lstm_out[:, 0, self.hidden_dim:]), dim=1)))
        return output



# Instantiate model
model = AdjustedBiLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)

#model = AdjustedLSTMModel(vocab_size=tokenizer.vocab_size, embedding_dim=128, hidden_dim=256, output_dim=10, dropout=0.2).to(device)
optimizer = optim.AdamW(model.parameters(),lr=0.001)

# If your dataset is imbalanced, compute class weights
# weights = # Compute based on class distribution
# criterion = nn.CrossEntropyLoss(weight=weights)


# Assuming `train_labels` is a list containing all the labels in your training dataset
train_labels = [label for batch in train_loader for label in batch['labels'].tolist()]

# 1. Compute class distribution
class_counts = Counter(train_labels)

# 2. Calculate the weights
max_count = max(class_counts.values())
class_weights = {class_id: max_count / count for class_id, count in class_counts.items()}
weights = [class_weights[class_id] for class_id in sorted(class_weights.keys())]

weights_tensor = torch.tensor(weights, dtype=torch.float32).to(device)

# 3. Use the weights in the loss function
criterion = nn.CrossEntropyLoss(weight=weights_tensor)

#criterion = nn.CrossEntropyLoss()  # use this if data is not imbalanced

# Add Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)

# Train the model
best_f1 = 0.0  # for early stopping based on F1 score

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    val_loss = 0  # to compute average validation loss for scheduler

    for batch in test_loader:
        with torch.no_grad():
            inputs, labels = batch['input_ids'].to(device), batch['labels'].to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    scheduler.step(val_loss / len(test_loader))  # scheduler step based on avg val loss
    report11 = classification_report(true_labels, predictions)

    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Implementing early stopping based on F1 score
    if val_f1 > best_f1:
        best_f1 = val_f1
        patience_counter = 0
        torch.save(model.state_dict(), '/workspace/outputs/bilstm_model.pth')
    else:
        patience_counter += 1
        if patience_counter >= 3:
            print("Early stopping")
            print(report11)
            break

else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report11)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 241.0042974948883 seconds, Testing Time: 27.989733457565308 seconds
Epoch: 1, Training Time: 241.36780452728271 seconds, Testing Time: 27.740985870361328 seconds
Epoch: 2, Training Time: 257.99055075645447 seconds, Testing Time: 29.94828462600708 seconds
Epoch: 3, Training Time: 260.51057410240173 seconds, Testing Time: 27.734104871749878 seconds
Epoch: 4, Training Time: 241.2057478427887 seconds, Testing Time: 27.58629560470581 seconds
Epoch: 5, Training Time: 241.07780075073242 seconds, Testing Time: 27.801841020584106 seconds
Epoch: 6, Training Time: 240.1743197441101 seconds, Testing Time: 27.680588006973267 seconds
Epoch: 7, Training Time: 240.85477781295776 seconds, Testing Time: 27.55038833618164 seconds
Epoch: 8, Training Time: 240.6198766231537 seconds, Testing Time: 27.818445920944214 seconds
Epoch: 9, Training Time: 240.6929953098297 seconds, Testing Time: 27.44154381752014 seconds
Training completed.
              precision    recall  f1-score   sup

In [23]:
##BERT MODEL FOR SAMPLE 1
import torch.optim as optim
import warnings
import torch
from collections import Counter
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import f1_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BERTForClassification(10).to(device)  # replace NUM_CLASSES with the number of unique labels in your dataset
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# Early stopping parameters
patience = 3
best_valid_loss = float('inf')
counter = 0

for epoch in range(10):
    epoch_start_time = time.time()
    model.train()
    training_start_time = time.time()  # Start measuring time
    for batch in train_loader:
        optimizer.zero_grad()
        criterion = nn.CrossEntropyLoss()
        inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
        outputs = model(inputs, attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    training_end_time = time.time()  # End measuring time
    training_time = training_end_time - training_start_time  # Calculate elapsed time for training

    # Validation phase
    model.eval()
    testing_start_time = time.time()  # Start measuring time
    predictions, true_labels = [], []
    for batch in test_loader:
        with torch.no_grad():
            inputs, attention_mask, labels = batch['input_ids'].to(device), batch['attention_mask'].to(device), batch['labels'].to(device)
            outputs = model(inputs, attention_mask)
            predictions.extend(torch.argmax(outputs, dim=1).cpu().tolist())
            true_labels.extend(labels.cpu().tolist())
    testing_end_time = time.time()  # End measuring time
    testing_time = testing_end_time - testing_start_time  # Calculate elapsed time for testing

    val_f1 = f1_score(true_labels, predictions, average='weighted')
    report12 = classification_report(true_labels, predictions)
    print(f"Epoch: {epoch}, Training Time: {training_time} seconds, Testing Time: {testing_time} seconds")
    
    # Early stopping logic
    if val_f1 < best_valid_loss:
        best_valid_loss = val_f1
        counter = 0
        torch.save(model.state_dict(), '/workspace/outputs/Bert_model.pth')  # Save the model
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered.")
            print(report12)
            break

else:  # This block will be executed if the for loop completes normally, i.e., if early stopping does not occur.
    print("Training completed.")
    print(report12)  # Print the classification report after the last epoch


Epoch: 0, Training Time: 3786.356432914734 seconds, Testing Time: 348.7625558376312 seconds
Epoch: 1, Training Time: 4035.283329963684 seconds, Testing Time: 382.64703011512756 seconds
Epoch: 2, Training Time: 5931.065721035004 seconds, Testing Time: 742.067969083786 seconds
Epoch: 3, Training Time: 8218.446682214737 seconds, Testing Time: 745.0084149837494 seconds
Early stopping triggered.
              precision    recall  f1-score   support

           0       0.90      0.91      0.91      1953
           1       0.84      0.84      0.84       705
           2       0.86      0.88      0.87       826
           3       0.89      0.86      0.88       333
           4       0.96      0.98      0.97       364
           5       0.79      0.80      0.80       560
           6       0.70      0.78      0.74       436
           7       0.89      0.84      0.87      1098
           8       0.94      0.88      0.91       402
           9       1.00      1.00      1.00        64

    accura