# Introduction

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'True'

In [None]:
!pip install transformers[torch]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Operation cancelled by user[0m[31m
[0m

# New Section

In [None]:
from transformers import AutoModel, AutoTokenizer
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from transformers import BertModel, AutoModel, AdamW, get_linear_schedule_with_warmup, BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
import torch
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
#Loading the final cleaned datasets

merged_file_df = pd.read_TYPE('Merged_File')

In [None]:
merged_file_df["Disease_Severity"] = merged_file_df["Disease_Severity"].apply(lambda x: 1 if x == "Active" else 0)
merged_file_df["Ulcer"] = merged_file_df["Ulcer"].apply(lambda x: "have ulcer." if x == "Yes" else "doesn't have ulcer.")

In [None]:
merged_file_df['combined'] = merged_file_df['Ulcer'].astype(str) + " " + merged_file_df['Exam_Notes'].astype(str)

In [None]:
merged_file_df = merged_file_df.drop(['Exam_ID'],axis=1)

In [None]:
merged_file_df

In [None]:
merged_file_df["combined"][1]

In [None]:
# Split your data into training and validation sets
train_df, val_df = train_test_split(merged_file_df, test_size=0.2, random_state=42)

In [None]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

Downloading (…)solve/main/vocab.txt: 0.00B [00:00, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
train_encodings = tokenizer(
    list(train_df["combined"]),
    additional_column_texts=list(train_df["Image_Info"]),
    truncation=True,
    padding=True,
    max_length=512  # Adjust the max_length value here
)
val_encodings = tokenizer(
    list(val_df["combined"]),
    additional_column_texts=list(val_df["Image_Info"]),
    truncation=True,
    padding=True,
    max_length=512  # Adjust the max_length value here
)
train_labels = list(train_df["Disease_Severity"])
val_labels = list(val_df["Disease_Severity"])

Keyword arguments {'additional_column_texts': ['Cecum Appendiceal Orifice Ileocecal Valve Transverse Colon Descending Colon Sigmoid Colon Rectum Rectum Hemorrhoids', 'Appendiceal Orifice Inflammatory Bowel Disease Cecum Inflammatory Bowel Disease Hepatic Flexure Inflammatory Bowel Disease Transverse Colon Inflammatory Bowel Disease Transverse Colon Inflammatory Bowel Disease Splenic Flexure Inflammatory Bowel Disease Descending Colon Inflammatory Bowel Disease Descending Colon Inflammatory Bowel Disease Sigmoid Colon Inflammatory Bowel Disease Sigmoid Colon Inflammatory Bowel Disease Sigmoid Colon Inflammatory Bowel Disease Sigmoid Colon Inflammatory Bowel Disease Rectum Inflammatory Bowel Disease Rectum Inflammatory Bowel Disease', 'Sigmoid ColonTerminal ileumCecumTransverse ColonTransverse ColonDescending ColonSigmoid ColonSigmoid ColonRectumRectum', 'Transverse Colon NormalIleocecal Valve InflammationAppendiceal Orifice InflammationHepatic Flexure NormalTerminal ileum Normal Termina

In [None]:
class CustomDataset():
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_dataset = CustomDataset(train_encodings, train_labels)
val_dataset = CustomDataset(val_encodings, val_labels)

In [None]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=0.0005,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy='epoch'
)

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    predictions = pred.predictions.argmax(-1)

    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, zero_division=0)
    recall = recall_score(labels, predictions, zero_division=0)
    f1 = f1_score(labels, predictions, zero_division=0)

    return {"accuracy": accuracy, "f1": f1, "precision": precision, "recall": recall}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7379,0.826733,0.472603,0.0,0.0,0.0
2,0.6827,0.693239,0.472603,0.0,0.0,0.0
3,0.7239,0.756875,0.472603,0.0,0.0,0.0
4,0.6961,0.695012,0.472603,0.0,0.0,0.0


TrainOutput(global_step=584, training_loss=0.687436336523866, metrics={'train_runtime': 277.3885, 'train_samples_per_second': 8.421, 'train_steps_per_second': 2.105, 'total_flos': 614627425320960.0, 'train_loss': 0.687436336523866, 'epoch': 4.0})

In [None]:
eval_result = trainer.evaluate()
print(eval_result)


{'eval_loss': 0.6950116157531738, 'eval_accuracy': 0.4726027397260274, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 5.095, 'eval_samples_per_second': 28.655, 'eval_steps_per_second': 7.262, 'epoch': 4.0}


# BioClinicalBert

In [None]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader

# Load the pre-trained ClinicalBERT tokenizer
tokenizer = BertTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

# Define a custom dataset class
class ClinicalDataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        self.data = df
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        #text = self.data["Exam_Notes"].iloc[idx]
        label = self.data["Disease_Severity"].iloc[idx]
        text1 = self.data["combined"].iloc[idx]
        text2 = self.data["Image_Info"].iloc[idx]
        # Concatenate the text columns
        text = text1 + " " + text2

        # Tokenize the text and convert to input IDs and attention masks
        encoded_inputs = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )

        input_ids = encoded_inputs["input_ids"].squeeze()
        attention_mask = encoded_inputs["attention_mask"].squeeze()

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "label": torch.tensor(label),
        }



In [None]:
# Set the maximum sequence length
max_length = 512

# Create train and test datasets
train_dataset = ClinicalDataset(train_df, tokenizer, max_length)
test_dataset = ClinicalDataset(val_df, tokenizer, max_length)

In [None]:


# Define batch size and create data loaders
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define the model
model = BertForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", num_labels=2)

# Set device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define training settings
num_train_epochs = 4
learning_rate = 2e-5
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()

Downloading pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model

In [None]:
# Training loop
for epoch in range(num_train_epochs):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        train_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_train_loss = train_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Average training loss: {avg_train_loss}")

Epoch 1: Average training loss: 0.546318781192172
Epoch 2: Average training loss: 0.44765903162833764
Epoch 3: Average training loss: 0.4471554973558204
Epoch 4: Average training loss: 0.42120690756056406


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Evaluation loop
model.eval()
eval_loss = 0.0
predictions = []
true_labels = []

for batch in test_loader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    labels = batch["label"].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits
        loss = outputs.loss
        eval_loss += loss.item()

    # Convert logits to predicted labels
    batch_predictions = torch.argmax(logits, dim=1)
    predictions.extend(batch_predictions.cpu().numpy())
    true_labels.extend(labels.cpu().numpy())

eval_loss /= len(test_loader)

accuracy = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions)
recall = recall_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions)

print(f"Evaluation loss: {eval_loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


Evaluation loss: 0.9033
Accuracy: 0.7534
Precision: 0.7500
Recall: 0.6818
F1-score: 0.7143


# Basic BERT

In [None]:
merged_file_df

In [None]:
import torch
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Load the pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
num_classes = 2  # Number of output classes
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
tokenizer = BertTokenizer.from_pretrained(model_name)

# Example hyperparameters to tune
hyperparameters = {
    'learning_rate': [1e-5, 2e-5, 5e-5],
    'batch_size': [16, 32, 64],
    'epochs': [3, 5, 7]
}



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

In [None]:
# Define your training and validation data
# Assuming your dataset is stored in a pandas DataFrame
# df = ...

X_train = merged_file_df['combined'].values
y_train = merged_file_df['Disease_Severity'].values

X_val = merged_file_df['combined'].values
y_val = merged_file_df['Disease_Severity'].values



In [None]:
# Tokenize the input data
train_encodings = tokenizer.batch_encode_plus(
    X_train,
    truncation=True,
    padding=True,
    max_length=1024
)
val_encodings = tokenizer.batch_encode_plus(
    X_val,
    truncation=True,
    padding=True,
    max_length=1024
)



In [None]:
# Convert the tokenized data into PyTorch tensors
train_dataset = torch.utils.data.TensorDataset(
    torch.tensor(train_encodings['input_ids']),
    torch.tensor(train_encodings['attention_mask']),
    torch.tensor(y_train)
)
val_dataset = torch.utils.data.TensorDataset(
    torch.tensor(val_encodings['input_ids']),
    torch.tensor(val_encodings['attention_mask']),
    torch.tensor(y_val)
)



In [None]:
# Define the model evaluation function
def evaluate(model, dataset):
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
    model.eval()
    predictions = []
    true_labels = []
    for batch in dataloader:
        input_ids, attention_mask, labels = batch
        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=1)
        predictions.extend(predicted_labels.tolist())
        true_labels.extend(labels.tolist())
    return classification_report(true_labels, predictions)



In [None]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Prepare the data
X_train = merged_file_df['combined'].values
y_train = merged_file_df['Disease_Severity'].values

X_val = merged_file_df['combined'].values
y_val = merged_file_df['Disease_Severity'].values

# Tokenize the input text
train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(X_val.tolist(), truncation=True, padding=True)

# Convert to PyTorch tensors
train_inputs = torch.tensor(train_encodings['input_ids'])
train_masks = torch.tensor(train_encodings['attention_mask'])
train_labels = torch.tensor(y_train.tolist())

val_inputs = torch.tensor(val_encodings['input_ids'])
val_masks = torch.tensor(val_encodings['attention_mask'])
val_labels = torch.tensor(y_val.tolist())

# Create data loaders
train_dataset = TensorDataset(train_inputs, train_masks, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = TensorDataset(val_inputs, val_masks, val_labels)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Set up the optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.CrossEntropyLoss()

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 3
best_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch in train_loader:
        batch = tuple(t.to(device) for t in batch)
        inputs, masks, labels = batch

        optimizer.zero_grad()

        outputs = model(inputs, attention_mask=masks, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1} loss: {epoch_loss}")

    # Evaluation
    model.eval()
    val_predictions = []
    val_targets = []

    with torch.no_grad():
        for batch in val_loader:
            batch = tuple(t.to(device) for t in batch)
            inputs, masks, labels = batch

            outputs = model(inputs, attention_mask=masks)
            logits = outputs.logits

            predicted_labels = torch.argmax(logits, dim=1)
            val_predictions.extend(predicted_labels.cpu().numpy().tolist())
            val_targets.extend(labels.cpu().numpy().tolist())

    val_accuracy = accuracy_score(val_targets, val_predictions)
    print(f"Validation accuracy: {val_accuracy}")

    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_model.pt')


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch 1 loss: 0.6678264931492184
Validation accuracy: 0.821917808219178
Epoch 2 loss: 0.4737134588801343
Validation accuracy: 0.821917808219178
Epoch 3 loss: 0.4818429451273835
Validation accuracy: 0.821917808219178


In [None]:
from sklearn.metrics import classification_report

# Load the best model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.load_state_dict(torch.load('best_model.pt'))
model.to(device)
model.eval()

# Evaluation on the validation set
val_predictions = []
val_targets = []

with torch.no_grad():
    for batch in val_loader:
        batch = tuple(t.to(device) for t in batch)
        inputs, masks, labels = batch

        outputs = model(inputs, attention_mask=masks)
        logits = outputs.logits

        predicted_labels = torch.argmax(logits, dim=1)
        val_predictions.extend(predicted_labels.cpu().numpy().tolist())
        val_targets.extend(labels.cpu().numpy().tolist())

# Generate classification report
target_names = ['Class 0', 'Class 1']
report = classification_report(val_targets, val_predictions, target_names=target_names)
print(report)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

              precision    recall  f1-score   support

     Class 0       0.80      0.86      0.83       362
     Class 1       0.85      0.79      0.82       368

    accuracy                           0.82       730
   macro avg       0.82      0.82      0.82       730
weighted avg       0.82      0.82      0.82       730

