In [None]:
from google.colab import drive
drive.mount('/content/drive/')

## Resize/Crop images

Reading an image in using matplotlib.pyplot.imread. Image data array has shape:

- (M, N) for grayscale images.

- (M, N, 3) for RGB images.

- (M, N, 4) for RGBA images.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os

from utils import *

In [None]:
train_path = "/content/drive/MyDrive/W281/Final Project/Data/train"
val_path = "/content/drive/MyDrive/W281/Final Project/Data/valid

# Explore mean image sizes

##### Train

In [None]:
mean_sizes = []

print("CLASS: NORMAL")
mean_sizes.append(get_average_image_size(classN_train_path)[0])

print("\nCLASS: A")
mean_sizes.append(get_average_image_size(classA_train_path)[0])

print("\nCLASS: L")
mean_sizes.append(get_average_image_size(classL_train_path)[0])

print("\nCLASS: S")
mean_sizes.append(get_average_image_size(classS_train_path)[0])


##### Val

In [None]:
print("CLASS: NORMAL")
mean_sizes.append(get_average_image_size(classN_valid_path)[0])

print("\nCLASS: A")
mean_sizes.append(get_average_image_size(classA_valid_path)[0])

print("\nCLASS: L")
mean_sizes.append(get_average_image_size(classL_valid_path)[0])

print("\nCLASS: S")
mean_sizes.append(get_average_image_size(classS_valid_path)[0])


In [None]:
mean_sizes = np.array(mean_sizes)
out_img_size = (int(np.round(mean_sizes[:, 0].mean())), int(np.round(mean_sizes[:, 1].mean())))

print(f"Mean of all images: {out_img_size}")

#### Crop and Resize images

In [None]:
class_mappings = {
    0: "normal",
    1: "adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib",
    2: "large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa",
    3: "squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa"
}

def detect_border_and_crop(image):
  kernel_size = 5
  sigma = 1.0
  kernel = cv2.getGaussianKernel(kernel_size, sigma)
  kernel = np.outer(kernel, kernel.transpose())

  # Apply the Gaussian filter
  gray = cv2.filter2D(image, -1, kernel)

  # threshold to get just the signature
  _, thresh_gray = cv2.threshold(gray, thresh=50, maxval=255, type=cv2.THRESH_BINARY)

  # find where the signature is and make a cropped region
  points = np.argwhere(thresh_gray!=0) # find where the black pixels are
  points = np.fliplr(points) # store them in x,y coordinates instead of row,col indices

  try:
    x, y, w, h = cv2.boundingRect(points) # create a rectangle around those points
    crop = gray[y:y+h, x:x+w] # create a cropped region of the gray image
  except:
    crop = gray

  # get the thresholded crop
  _, thresh_crop = cv2.threshold(crop, thresh=50, maxval=255, type=cv2.THRESH_BINARY)

  return crop, thresh_crop

def crop_and_resize_images(split_path, out_img_size, out_img_dir):
  if not os.path.exists(out_img_dir):
    os.makedirs(out_img_dir)
  split = split_path.rpartition("/")[2]
  for label, class_name in class_mappings.items():
    class_path = os.path.join(split_path, class_name)
    for img_name in os.listdir(class_path):
      if img_name.endswith(('.jpg', '.jpeg', '.png', '.gif')):
        print(img_name)
        img = plt.imread(os.path.join(class_path, img_name))
        cropped, _ = detect_border_and_crop(img)
        resized_img = resize(img, out_img_size, anti_aliasing=True)
        plt.imsave(out_img_dir+"/"+img_name, resized_img, cmap="gray")


In [None]:
out_img_dir = "/content/drive/MyDrive/W281/Final Project/Data_Cropped_and_Resized"
output_img_size = (256, 256)

crop_and_resize_images(train_path, output_img_size, out_img_dir)

##### Visualize results

In [None]:
img_name = "/content/drive/MyDrive/W281/Final Project/Data/train/normal/n9.jpg"
img = plt.imread(img_name)
plt.imshow(img)

In [None]:
img_name = "/content/drive/MyDrive/W281/Final Project/Data_Resized/train/normal/n9.jpg"
img = plt.imread(img_name)
plt.imshow(img, cmap="gray")

In [None]:
img.shape