# 4.2 Supervised Learning with

In [None]:
import os
import sys
import re
import csv
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

## Load image paths from google drive. To use the local files, change `google_path` to `.`

In [None]:
google_path = '/content/drive/MyDrive'
path = os.path.join(google_path, 'QC')
train_test_path = os.path.join(path, 'split')
mask_path = os.path.join(path, "mask")
rgb_path = os.path.join(path, "rgb")

# Load training and validation csv files
def load_subjects(csv_path):
    with open(csv_path, newline='') as f:
        reader = csv.reader(f)
        subjects = list(reader)
    return subjects[1:]

train_subjects = load_subjects(os.path.join(train_test_path, 'train_subjects.csv'))
val_subjects = load_subjects(os.path.join(train_test_path, 'validate_subjects.csv'))

## Load images and masks and make transformations

In [None]:
# Match images with the labels in the csv file
def get_image_mask_pairs(subjects):
    images = []
    masks = []
    for patient in subjects:
        for file in os.listdir(rgb_path):
            if re.match(patient[0], file):
                images.append(os.path.join(rgb_path, file))
                masks.append(os.path.join(mask_path, file))
    return images, masks

train_images, train_masks = get_image_mask_pairs(train_subjects)
val_images, val_masks = get_image_mask_pairs(val_subjects)

# Extract features and labels for training and validation sets
def extract_features_labels(images, masks, target_size=(256, 256)):
    X, Y = [], []
    for img_path, mask_path in zip(images, masks):

        # Resize the images and masks to pack them into one string
        img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
        img_resized = cv.resize(img, target_size)

        # Extract the class label for each pixel
        mask_color = cv.imread(mask_path, cv.IMREAD_COLOR)
        mask_resized = cv.resize(mask_color, target_size)
        mask_labels = mask_resized[:, :, 0]
        # Convet the nonzero labels into 1 to create a binary mask
        mask_labels = (mask_labels > 0).astype(np.float32)

        # Flatten them to lower the dimensions
        X.append(img_resized.flatten())
        Y.append(mask_labels.flatten())

    X = np.concatenate(X, axis=0).reshape(-1, 1)
    Y = np.concatenate(Y, axis=0)
    return X, Y

x_train, y_train = extract_features_labels(train_images, train_masks)
x_val, y_val = extract_features_labels(val_images, val_masks)

## Train and evaluate the random forest model

In [None]:
clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
clf.fit(x_train, y_train)

# Predict on training and validation set
y_train_pred = clf.predict(x_train)
y_val_pred = clf.predict(x_val)

# Evaluate performance
train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print("Validation Accuracy: {val_accuracy:.4f}")
print("\nTraining Classification Report:\n", classification_report(y_train, y_train_pred))
print("\nValidation Classification Report:\n", classification_report(y_val, y_val_pred))

## Show sample images

In [None]:
for i in range(3):
    img = cv.imread(val_images[i])
    true_mask = cv.imread(val_masks[i], cv.IMREAD_GRAYSCALE)
    pred_mask = y_pred[i * len(y_pred) // 3: (i + 1) * len(y_pred) // 3].reshape(true_mask.shape)

    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    axes[0].imshow(img)
    axes[0].set_title("Original Image")
    axes[1].imshow(true_mask, cmap="gray")
    axes[1].set_title("Ground Truth Mask")
    axes[2].imshow(pred_mask, cmap="gray")
    axes[2].set_title("Predicted Mask")
    plt.show()