# **Imports:**

---



In [None]:
# Set the path in Google Drive
model_iter = input('Enter model iteration: ')

In [None]:
# basic imports
import sys
import os
import csv
import random

# Pytorch libaries
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import torchvision.models as models
from torch.utils.data import DataLoader, random_split, ConcatDataset, Subset
from torchvision.datasets import ImageFolder

# For loop
from timeit import default_timer as timer
from tqdm.auto import tqdm
from IPython.display import clear_output

# For visualizing and troubleshooting
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from collections import Counter

# For saving a path and loading
from pathlib import Path
import requests
from google.colab import drive
drive.mount('/content/drive')
import zipfile

print("Imported successfully")

In [None]:
print(f"Torch version: {torch.__version__}\n")
print(f"Torchvision version: {torchvision.__version__}\n")
print(f"CUDA version: {torch.version.cuda}\n")
print(f"cuDNN version: {torch.backends.cudnn.version()}\n")
print(f"Python version: {sys.version}")

# **Training loop setup:**

---



In [None]:
device = 'cuda' if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, download skipped")
else:
  print("helper_functions.py DNE, download initiated")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import accuracy_fn, print_train_time

if 'accuracy_fn' and 'print_train_time' in dir():
    print("Both functions are imported.")
else:
    print("Accuracy function is not imported.")

In [6]:
def train_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              optimizer: torch.optim.Optimizer,
              accuracy_fn,
              device: torch.device = device,
               ):
  train_loss, train_acc = 0, 0

  # For training
  model.train()
  for batch, (image, label) in enumerate(data_loader):

    # Optimizer zero grad
    optimizer.zero_grad()

    # Forward pass
    image = image.to(device)
    label = label.to(device, dtype=torch.float)
    pred = model(image).squeeze()

    # Calculate the loss
    loss = loss_fn(pred, label)
    train_loss += loss
    train_acc += accuracy_fn(y_true = label,
                             y_pred = torch.round(torch.sigmoid(pred)))

    # Backprop
    loss.backward()

    # Optimizer step
    optimizer.step()

  # For calculating average trainloss over every batch in each epoch
  scheduler.step()
  train_loss /= len(data_loader)
  train_acc /= len(data_loader)
  return train_loss, train_acc

In [7]:
def test_step(model: torch.nn.Module,
                  data_loader: torch.utils.data.DataLoader,
                  loss_fn: torch.nn.Module,
                  optimizer: torch.optim.Optimizer,
                  accuracy_fn,
                  device: torch.device = device):

  test_loss, test_acc = 0, 0
  model.eval()

  with torch.inference_mode():
    for batch, (image, label) in enumerate(data_loader):
      # Forward pass
      image = image.to(device)
      label = label.to(device, dtype=torch.float)
      pred = model(image).squeeze()

      # Loss calculation
      loss = loss_fn(pred, label)
      test_loss += loss

      # Accuracy calculation
      test_acc += accuracy_fn(y_true = label,
                              y_pred = torch.round(torch.sigmoid(pred)))

    test_loss /= len(data_loader)
    test_acc /= len(data_loader)
  return test_loss, test_acc

# **Resnet34**

---



In [None]:
model = models.resnet34(weights='IMAGENET1K_V1')
num_ftrs = model.fc.in_features
num_classes = 1
model.fc = nn.Linear(num_ftrs, num_classes)
model = model.to(device)

# **Data Loading:**

---



In [9]:
zip_ref = zipfile.ZipFile("/content/drive/MyDrive/malarianomalaria/cell_images.zip", 'r')
zip_ref.extractall("/tmp")
zip_ref.close()

In [None]:
def load_dataset(dataset_path, transform=None):
    return ImageFolder(dataset_path, transform=transform)

# Define your transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

combined_dataset = load_dataset('/tmp/cell_images/cell_images', transform=transform)

print(f"Class types: {combined_dataset.classes}")


# Split the subset into train and test sets
train_size = int(0.8 * len(combined_dataset))  # 80% for training
test_size = len(combined_dataset) - train_size  # 20% for testing
train_dataset, test_dataset = random_split(combined_dataset, [train_size, test_size])

# Create DataLoaders for train and test sets
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# **Data Checks:**

---



In [None]:
def get_random_images(data_loader, num_images=9):
    images = []
    labels = []  # Optional: Collect labels if you need them

    for image_batch, label_batch in data_loader:
        # Convert batch size to a list of indices, shuffle them
        indices = torch.randperm(len(image_batch))

        # Iterate over shuffled indices to select images randomly
        for idx in indices:
            images.append(image_batch[idx])
            labels.append(label_batch[idx])
            if len(images) == num_images:
                return images, labels  # Stop when you have enough images

    return images, labels  # In case less than num_images are collected

# Function to display images
def show_images(images, cols=3):
    rows = (len(images) + cols - 1) // cols
    plt.figure(figsize=(cols * 3, rows * 3))
    for i, image in enumerate(images):
        plt.subplot(rows, cols, i + 1)
        # Assuming images are in tensor format and need to be converted for display
        plt.imshow(image.numpy().transpose(1, 2, 0))
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Get and display the images
random_images, random_labels = get_random_images(train_dataloader, 9)
show_images(random_images)

In [None]:
def count_labels(data_loader):
    label_count = Counter()

    for _, labels in data_loader:
        label_count.update(labels.tolist())

    return label_count

def calculate_proportions(label_count):
    total_samples = sum(label_count.values())
    proportions = {label: round(count / total_samples, 3) for label, count in label_count.items()}
    return proportions

train_label_count = count_labels(train_dataloader)
test_label_count = count_labels(test_dataloader)

train_proportions = calculate_proportions(train_label_count)
test_proportions = calculate_proportions(test_label_count)

print("Training set label counts:", train_label_count)
print("")
print("Training set label proportions:")
for label, prop in train_proportions.items():
    print(f"Label {label}: {prop}")

print("Testing set label counts:", test_label_count)
print("")
print("Testing set label proportions:")
for label, prop in test_proportions.items():
    print(f"Label {label}: {prop}")


# **Model Training:**

---



In [13]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=18, gamma=0.1)
loss_fn = nn.BCEWithLogitsLoss()

In [None]:
epochCount = []
trainLosses = []
testLosses = []
trainAccuracies = []
testAccuracies = []

epochs = 2
timeStart = timer()

for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch}")
  print("----------------------------------")
  train_loss, train_acc = train_step(model = model,
                                      data_loader = train_dataloader,
                                      loss_fn = loss_fn,
                                      optimizer = optimizer,
                                      accuracy_fn = accuracy_fn,
                                      device = device)
  print(f"Train loss: {train_loss: 5f} | Train acc: {train_acc:.2f}")

  test_loss, test_acc = test_step(model = model,
                                      data_loader = test_dataloader,
                                      loss_fn = loss_fn,
                                      optimizer = optimizer,
                                      accuracy_fn = accuracy_fn,
                                      device = device)
  print(f"Test loss: {test_loss: 5f} | Test acc: {test_acc:.2f}\n")

  epochCount.append(epoch)
  trainLosses.append(train_loss)
  testLosses.append(test_loss)
  trainAccuracies.append(train_acc)
  testAccuracies.append(test_acc)

timeEnd = timer()
totalTrainTime = print_train_time(start = timeStart,
                                  end = timeEnd,
                                  device = str(next(model.parameters()).device))

In [15]:
epochCount1 = np.array(epochCount)
trainLosses1 = np.array([tensor.detach().cpu() for tensor in trainLosses])
testLosses1 = np.array([tensor.detach().cpu() for tensor in testLosses])
trainAccuracies1 = np.array(trainAccuracies)
testAccuracies1 = np.array(testAccuracies)

df = pd.DataFrame({"Epoch": epochCount1,
                   "Train Loss": trainLosses1,
                   "Test Loss": testLosses1,
                   "Train Accuracy": trainAccuracies1,
                   "Test Accuracy": testAccuracies1})

# **Model saving:**

---

In [None]:
drive_path = Path('/content/drive/My Drive/models' + '/' + model_iter)
drive_path.mkdir(parents=True, exist_ok=True)

# Create a model save path
modelName = model_iter + '.pth'
modelSavePath = drive_path / modelName

print(f"SAVING MODEL TO: {modelSavePath}")

# Saving model info to text file
def save_model_info(model, epochs, total_train_time, optimizer, loss_fn, file_path, transform):
    with open(file_path, 'w') as file:
        # Model architecture
        file.write('**Model Architecture:**\n\n')
        file.write(str(model))
        file.write('\n\n')

        # Transformations
        file.write('**Transformations:**\n\n')
        file.write(str(transforms))
        file.write('\n\n')

        # Optimizer details
        file.write('**Optimizer Details:**\n\n')
        file.write(str(optimizer))
        file.write('\n\n')

        # Loss function details
        file.write('**Loss Function:**\n\n')
        file.write(str(loss_fn))
        file.write('\n\n')

        # Number of epochs
        file.write('**Number of Epochs:**\n\n')
        file.write(f'{epochs}\n\n')

        # Total training time
        file.write('**Total Training Time:**\n\n')
        file.write(f'{total_train_time:.2f} seconds\n')

textName = model_iter + '.txt'
modelInfoSavePath = drive_path / textName
save_model_info(model, epochs, totalTrainTime, optimizer, loss_fn, modelInfoSavePath, transform)
print(f"Model information saved to {modelInfoSavePath}")

# Example model save (replace `model.state_dict()` with your actual model's state dict)
torch.save(obj=model.state_dict(), f=modelSavePath)

**Loss Plotting:**

In [None]:
# Assuming 'df' is your DataFrame containing the epoch, training loss, and test loss data
sns.set()  # Sets the default seaborn style

plt.figure(figsize=(10, 6))
sns.lineplot(x='Epoch', y='Train Loss', data=df, label='Train Loss', color = 'red')
sns.lineplot(x='Epoch', y='Test Loss', data=df, label='Test Loss', color = 'darkred')

plt.title('Training vs Test Losses Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.xticks(ticks=range(0, epochs, 1))

# Saving the second graph
graph_save_path_2 = drive_path / 'loss_plot.png'
plt.savefig(graph_save_path_2)
plt.show()

**Accuracy Plotting:**

In [None]:
# Assuming 'df' is your DataFrame containing the epoch, training loss, and test loss data
sns.set()  # Sets the default seaborn style

plt.figure(figsize=(10, 6))
sns.lineplot(x='Epoch', y='Train Accuracy', data=df, label='Train Loss', color = 'red')
sns.lineplot(x='Epoch', y='Test Accuracy', data=df, label='Test Loss', color = 'darkred')

plt.title('Training vs Test Accuracies Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.xticks(ticks=range(0, epochs, 1))

# Saving the second graph
graph_save_path_2 = drive_path / 'accuracy_plot.png'
plt.savefig(graph_save_path_2)
plt.show()