In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np
from PIL import Image

import h5py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split
train_csv = pd.read_csv("train-metadata.csv")

class SkinCancerDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir,self.data_frame.isic_id[idx])
        img_name = img_name + '.jpg'
        
        image = Image.open(img_name).convert('RGB')
        
        label = self.data_frame.target[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


# Define the transform to preprocess the images
transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

# File paths
csv_file_path = 'train-metadata.csv'
image_file_path = 'train-image/image/'

# Load the dataset
full_dataset = SkinCancerDataset(csv_file_path, image_file_path, transform=transform)

# Split the dataset into training and validation sets
train_indices, val_indices = train_test_split(range(len(full_dataset)), test_size=0.2, stratify=full_dataset.data_frame.target)

train_dataset = torch.utils.data.Subset(full_dataset, train_indices)
val_dataset = torch.utils.data.Subset(full_dataset, val_indices)



# Calculate class weights for imbalanced data
labels = full_dataset.data_frame.iloc[:, 1].values
class_counts = pd.Series(labels).value_counts().sort_index()
class_weights = 1. / class_counts
samples_weights = [class_weights[label] for label in labels]


# Create a sampler for the training set
sampler = WeightedRandomSampler(samples_weights, num_samples=len(train_indices), replacement=True)

# Create the DataLoader for training and validation
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)


# Define the model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(100*100*3, 128)  # Adjust based on your image size and channels
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = x.view(-1, 100*100*3)  # Flatten the image
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Set device to MPS
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Instantiate the model, loss function, and optimizer
model = SimpleNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    print(epoch)
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        print("inside batch")
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

  train_csv = pd.read_csv("train-metadata.csv")
  self.data_frame = pd.read_csv(csv_file)


0


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.12/3.12.3/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'SkinCancerDataset' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>


KeyboardInterrupt: 