In [1]:
# Required imports
import os
import shutil
import time
import cv2
import numpy as np
import math
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision import datasets
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import seaborn as sns

# Enable matplotlib inline mode for Jupyter Notebook
%matplotlib inline
        
# Constants
Image.MAX_IMAGE_PIXELS = None
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
IMAGE_EXT = '.jpg'
BASE_PATH = '/workspace/0728tot/last_experiment/'
RESIZE_DIMS = (224, 224)
NUM_CLASSES = 2


!ln -s /workspace/0728tot/last_experiment/resnet.py resnet.py
from resnet import *

ln: failed to create symbolic link 'resnet.py': File exists


2023-10-25 17:35:34.507112: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
# Directory paths

# Folders containing original DUV WSI
ORIGINAL_IMAGES_DIR = os.path.join(BASE_PATH, 'unknown_DUV_WSI')

# Folder containing reshaped DUV WSI
RESHAPED_IMAGES_DIR = os.path.join(BASE_PATH, 'reshaped_unknown_DUV_WSI')

# Folders processing the GradCAM++ results
RAW_MAP_DIR = os.path.join(BASE_PATH, 'unknown_DenseNet169ImageNetGradCAM_2')
FINAL_HEAT_MAP_DIR = os.path.join(BASE_PATH, 'unknown_DenseNet169ImageNetGradCAM')
FINAL_OVERLAY_HEAT_MAP_DIR = os.path.join(BASE_PATH, 'unknown_DUV_finalheatmap/')

# Folders for holding the GradCAM++ results and patch level results in a grid system
GRADCAM_GRID_MAP_DIR = os.path.join(BASE_PATH, 'unknown_gradcam_gridmap')
PRED_GRID_MAP_DIR = os.path.join(BASE_PATH, 'unknown_patch_gridmap')

# Folders for processing the patch level and GradCAM++ results for visualization purposes
PROCESS_PATCHES_DIR = os.path.join(BASE_PATH,'unknown_process_patchmaps')
BOUNDING_BOXES_OVERLAY_DIR = os.path.join(BASE_PATH,'unknown_bounding_boxes_overlays')

# Folders for patch related info
PATCH_DIR = os.path.join(BASE_PATH, 'unknown_Patches')
PATCH_FEATURES_DIR = os.path.join(BASE_PATH, 'unknown_Patches_features')
PRIOR_PATCH_FEATURES_DIR = os.path.join(BASE_PATH, 'DUV_features')
PRIOR_PATCH_DATA_CSV = os.path.join(BASE_PATH, 'duvdata.csv')

# Folder for getting training splits, an example
SPLIT_DIR = os.path.join(BASE_PATH, 'prior_train_splits')

In [22]:
# Ensure all new patch and prediction directories exist
all_directories = [RAW_MAP_DIR, FINAL_HEAT_MAP_DIR, GRADCAM_GRID_MAP_DIR, PRED_GRID_MAP_DIR, PATCH_DIR, PATCH_FEATURES_DIR]
for directory in all_directories:
    if not os.path.exists(directory):
        os.makedirs(directory)

# Helper functions
def create_dataframe(data_folder, is_map_format=False):
    '''Creates a dataframe for the dataset. 
       If is_map_format is True, it uses the logic specific to map format.'''
    data_entries = []
    for root, _, files in os.walk(data_folder):
        for filename in files:
            relative_path = os.path.relpath(root, data_folder)
            file_extension = os.path.splitext(filename)[-1]
            if file_extension == '.jpg':
                try:
                    if is_map_format:
                        name_parts = filename.replace(".", "_").split('_')
                        subject, side, _ = name_parts
                    else:
                        name_parts = filename.split('_')
                        subject = int(name_parts[0].split('.')[0])
                        side = int(name_parts[1].split('.')[0]) if len(name_parts) > 1 else 0
                    full_path = os.path.join(root, filename)
                    entry = {
                        'relative_path': relative_path, 
                        'filename': filename, 
                        'subject': subject, 
                        'side': side, 
                        'full_path': full_path
                    }
                    data_entries.append(entry)
                except:
                    pass
    data_frame = pd.DataFrame(data_entries)
    return data_frame

def save_foreground_image(image_path, output_directory, subject_id, side_id):
    image = cv2.imread(image_path)
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    denoised_image = cv2.fastNlMeansDenoising(grayscale_image, None, 25, 7, 21)
    edges_image = cv2.Canny(denoised_image, 100, 100, L2gradient=True)
    x, y, w, h = cv2.boundingRect(edges_image)
    foreground_image = image[y:y+h, x:x+w]
    output_path = os.path.join(output_directory, f"{subject_id}_{side_id}.jpg")
    cv2.imwrite(output_path, foreground_image)

def compute_foreground_ratio(image):
    background_threshold = 5
    return np.mean(image[:, :, 1] >= background_threshold)

dataset_frame = create_dataframe(ORIGINAL_IMAGES_DIR)
dataset_frame = dataset_frame.sort_values('subject')
image_paths = dataset_frame.full_path.to_numpy()
subject_ids = dataset_frame.subject.to_list()
side_ids = dataset_frame.side.to_list()

# Getting the recropped images to reduce processing time
for index in range(len(dataset_frame)):
    save_foreground_image(image_paths[index], RESHAPED_IMAGES_DIR, subject_ids[index], side_ids[index])

In [6]:
# Patch Extraction
subject_side_pairs = [f"{subj}_{side}" for subj, side in zip(subject_ids, side_ids)]
for pair in subject_side_pairs:
    full_patch_directory = os.path.join(PATCH_DIR, f'S{pair}')
    shutil.rmtree(full_patch_directory, ignore_errors=True)
    os.makedirs(full_patch_directory)

processed_frame = create_dataframe(RESHAPED_IMAGES_DIR, is_map_format=True)
processed_frame = processed_frame.sort_values(by=['subject'], ignore_index=True)
processed_image_paths = processed_frame.full_path.to_list()
final_subject_ids = processed_frame.subject.to_numpy()
final_side_ids = processed_frame.side.to_numpy()

patch_width, patch_height = 400, 400
start_time = time.time()

for idx, image_path in enumerate(processed_image_paths):
    image = cv2.imread(image_path)
    image_height, image_width = image.shape[:2]
    resized_width, resized_height = (math.ceil(dim/400) * 400 for dim in (image_width, image_height))
    resized_image = cv2.resize(image, (resized_width, resized_height), interpolation=cv2.INTER_AREA)

    for x in range(0, resized_width - patch_width + 1, patch_width):
        for y in range(0, resized_height - patch_height + 1, patch_height):
            cropped_image = resized_image[y:y+patch_height, x:x+patch_width]
            if compute_foreground_ratio(cropped_image) > 0.8:
                filename = os.path.join(PATCH_DIR, f'S{final_subject_ids[idx]}_{final_side_ids[idx]}', f'{idx}_{x//patch_width}-{y//patch_height}.png')
                cv2.imwrite(filename, cropped_image)
    print(time.time() - start_time)

8.811657905578613
17.011672973632812
19.407439947128296
21.95302677154541
24.92561936378479
29.61857557296753
39.890738010406494
48.88908624649048
54.881184101104736
61.17561435699463
63.11239051818848
65.61716294288635
70.35196781158447
74.57070779800415
77.53427457809448
79.96434259414673
82.26051473617554
84.26114797592163
88.1029806137085
90.83645367622375
92.14929461479187
93.40744519233704
96.88540887832642
100.29750967025757
106.00837278366089
111.89108872413635
117.11548328399658
123.82678627967834
135.56371593475342
141.87888503074646
142.6039924621582
143.4754159450531
144.8688383102417
148.20301127433777
151.0614960193634
154.23908162117004
156.50515747070312
159.10237216949463
159.90557098388672
161.298588514328
163.14071822166443
166.92461562156677
182.45984768867493
183.28068232536316
185.19795322418213
188.99002265930176
189.32873558998108
190.72538447380066
193.1427035331726
195.5515193939209
200.62160873413086
205.38245820999146
207.2925591468811
209.41530466079712
211