# CAPSTONE PROJECT 2: A COMPUTER VISION MODEL WHICH DETECTS BONE FRACTURES IN THE UPPER EXTREMITIES NAMELY: WRISTS, FOREARMS, UPPER ARM, & SHOULDER FRACTURES
#### Modeling



***
Goal: Build two to three different models and identify the best one. <br>
- Fit your models with a training dataset<bf>
Hint: Try a number of different models: you will want to compare their outputs in the
model evaluation stage. For example, if you’re writing a classification model, you should
implement both an entropy model and a Gini impurity model. For hyperparameter tuning,
think of methods like cross-validation.<br>
- Review model outcomes — Iterate over additional models as needed<br>
Hint: you may want to use standard model evaluation metrics such as accuracy, recall,
precision, and F1.<br>
- Identify the final model that you think is the best model for this project<br>
Hint: the most powerful model isn’t always the best one to use. Other considerations
include computational complexity, scalability, and maintenance costs.<br>
Review the following questions and apply them to your analysis:<br>
- Does my data involve a time series or forecasting? If so, am I splitting the train
and test data appropriately?<br>
- Is my response variable continuous or categorical?<br>
***

In [1]:
# Import modules:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from random import sample

# Image Analysis imports
import sklearn as skl
import skimage as ski
import cv2
from skimage.filters import sobel
from skimage import exposure
from skimage.filters import try_all_threshold
from skimage.filters import threshold_yen
from skimage import measure
from skimage.segmentation import slic

# PyTorch Analysis imports
import torch
from torch import nn
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import albumentations as A
import torchvision
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
import torchvision.transforms.functional as F
from torchvision.transforms import v2

# Feature extraction imports
from torchvision.models import resnet50
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.models.detection.mask_rcnn import MaskRCNN
from torchvision.models.detection.backbone_utils import LastLevelMaxPool
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork
from torch.utils.data import Dataset
from PIL import Image

In [2]:
#Import the data as Pandas DataFrames: df_test, df_train, df_val
data_links = [r'C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\df_test.csv', 
              r'C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\df_train.csv', 
              r'C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\df_val.csv']

def multi_df(list):
    temp_list = []
    for i in list:
        temp_list.append(pd.read_csv(i))
    return temp_list
    
df_test, df_train, df_val = multi_df(data_links)

# Trim the extra index column
df_train.drop(columns='Unnamed: 0', inplace=True)
df_test.drop(columns='Unnamed: 0', inplace=True)
df_val.drop(columns='Unnamed: 0', inplace=True)


In [3]:
# GLOBALS
# Project Main Directory path
dir_path = r"C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2"

# Data Paths
train_dir_path = r"C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\train"
val_dir_path = r"C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\valid"
test_dir_path = r"C:\Users\micha\OneDrive\Documents\GitHub\SpringBoardDataScience\Capstone_Project_2\Data\test"

# Image Scalars
Width = 400
Height = 400

# Labels
num_classes = 8
classes=['elbow positive', 'fingers positive', 'forearm fracture', 'humerus fracture', 'humerus', 'shoulder fracture', 'wrist positive', 'no fracture']
c2l={k:v for k,v in list(zip(classes,list(range(num_classes))))}
l2c={v:k for k,v in c2l.items()}

# Training
BS = 15

### Training Models: <br>
We are going to train 3 models: The first being a basic Resnet model, which we will train without using and pretrained data on the model, and then we will do another Resnet Model using pretrained data.  Also as a fun exercise we will include a model which uses segmentation w/bounding boxes rather to see if we achieve a better performance, therefore we will be building a UNet() model as well.

##### Declare Dataset (train/validate) and DataLoader classes.

In [4]:
def show_tensor_images(image_tensor, num_images=25, size=(1, 28, 28)):
    '''
    Function for visualizing images: Given a tensor of images, number of images, and
    size per image, plots and prints the images in an uniform grid.
    '''
    image_shifted = image_tensor
    image_unflat = image_shifted.detach().cpu().view(-1, *size)
    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
        
def import_boxes(list):
    '''
    Function for converting the text data in the labels files into functional
    dictionaries:  Given a list from a read file it converts that list and 
    returns a dictionary with correlated label and coordinate data.
    '''
    # Convert strings to ints and floats
    for i in range(0, len(list)):
        if len(list[i]) == 1:
            list[i] = int(float(list[i]))
        else:
            list[i] = float(list[i])
    # initialize variables        
    boxes = {'labels': [],
              'coords': []}
    i = -1
    neg_len = len(list) - (len(list) * 2)
    temp_list = []
    # Convert list to a functional dictionary of labels and coords
    while i >=  neg_len:
        if type(list[i]) == int:
            boxes['labels'].append(list[i])
            boxes['coords'].append(temp_list)
            temp_list = []
        else:
            temp_list.insert(0, list[i])
        i -= 1
    
    return boxes

def display_features(img, return_nodes, out, tensor, tensor_index):
    
    _, ax = plt.subplots(4, 5, figsize=(25,20))

    for i, layer in enumerate(return_nodes):
        feat_maps = out[layer].numpy().squeeze(0)
        feat_maps = sample(list(feat_maps), 4)
        labeled_image = draw_bounding_boxes(img, boxes=tensor.get_boxes(tensor_index), colors="red", labels=[str(x) for x in tensor.get_labels(tensor_index)])
        ax[i][0].imshow(labeled_image.permute(1, 2, 0))
        ax[i][0].set_xticks([])
        ax[i][0].set_yticks([])
        for j, feat_map in enumerate(feat_maps):
            sns.heatmap(feat_map, ax=ax[i][j+1], cbar=False)
            ax[i][j+1].set_xticks([])
            ax[i][j+1].set_yticks([])
            ax[i][j+1].set_title(f"{layer}: ({feat_map.shape[0]} X {feat_map.shape[1]})")
        
    plt.show()

In [5]:
class ImageDataset(Dataset):
    def __init__(self, root, transform=None, mode='train'):
        self.transform = transform
        self.files = [rf"{root}\Data\{mode}\images\{name}" for name in os.listdir(root + rf"\Data\{mode}\images") if name.endswith(".jpg")]
        self.labels = {}
        self.boxes = {}
        assert len(self.files) > 0, "Make sure you downloaded the images!"

    def get_image(self, index):
        item = self.transform(Image.open(self.files[index % len(self.files)]))
        if item.shape[0] != 3: 
            item = item.repeat(3, 1, 1)
        # Old versions of PyTorch didn't support normalization for different-channeled images
        item_mean, item_std = item.mean([1,2]), item.std([1,2])
        normalize = transforms.Compose([
            v2.RandomAutocontrast(p=1.0),
            transforms.Normalize(item_mean, item_std),
        ])
        item = normalize(item)
        #item = np.array(item)
        return item
    
    def get_labels(self, index):
        if index in self.labels:
            return self.labels[index]
        labels = []
        boxes = []
        text_file = self.files[index % len(self.files)].replace(".jpg", ".txt").replace("images", "labels")
        with open(text_file, mode="r") as f:
            lines = f.readlines()
            for line in lines:
                values = [value for value in line.split()]
                bboxes = import_boxes(values)
                for i in range(0, len(bboxes['labels'])):
                    coords = bboxes['coords'][i]
                    coords = torch.reshape(torch.FloatTensor(coords), (int(len(coords)/2), 2))
                    size = torch.tensor([self.get_image(index).shape[2], self.get_image(index).shape[1]])
                    min_coords = torch.min(coords, dim=0).values * size
                    max_coords = torch.max(coords, dim=0).values * size
                    box = torch.cat((min_coords, max_coords), dim=0).tolist()
                    labels.append(l2c[bboxes['labels'][i]])
                    boxes.append(box)
        if not labels:
            labels.append('no fracture')
            self.labels[index] = labels
            self.boxes[index] = torch.FloatTensor([[0,0,0,0]])
        else:
            self.labels[index] = labels
            self.boxes[index] = torch.FloatTensor(boxes)
        return labels
    
    def get_boxes(self, index):
        if index in self.boxes:
            return self.boxes[index]
        _ = self.get_labels(index)
        return self.boxes[index]

    def __len__(self):
        return len(self.files)
    