In [10]:
import os
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import seaborn as sns
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW

In [12]:
# Read the Ultrasound features data from the specified CSV file
data_USG = pd.read_csv("Features/Ultrasound features.csv")

# Read the Mammogram features data from the specified CSV file
data_MMG = pd.read_csv("Features/Mammogram features.csv")

# Read the multimodal features data from the specified CSV file
data_multimodal = pd.read_csv("Features/multimodal features.csv")

In [14]:
# Separate the features and labels
features = data_USG.iloc[:, :-1]
labels = data_USG.iloc[:, -1]

# Convert the labels to numeric values (0 or 1)
labels = labels.map({"class1": 0, "class2": 1})

# Convert the features to a list of strings
feature_strings = features.astype(str).apply(' '.join, axis=1).tolist()

In [None]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the feature strings
tokenized_inputs = tokenizer(feature_strings, padding=True, truncation=True, return_tensors='pt')

In [None]:
# Convert the labels to a tensor
labels_tensor = torch.tensor(labels.tolist())

# Create a TensorDataset
dataset = TensorDataset(tokenized_inputs['input_ids'], tokenized_inputs['attention_mask'], labels_tensor)

# Set the batch size and create a DataLoader
batch_size = 16
dataloader = DataLoader(dataset, batch_size=batch_size, sampler=RandomSampler(dataset))

In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, eps=1e-8)
epochs = 3

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in dataloader:
        batch = tuple(t.to(device) for t in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

        optimizer.zero_grad()
        outputs = model(**inputs)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Average Loss: {avg_loss:.4f}")