In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# New Section

In [2]:
from PIL import Image
import numpy as np
import cv2

from google.colab import drive
project_root = "/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files"

In [3]:
# input: np.array of original RGB image
# return: np.array of size 1000x1000 after preprocessing complete
def preprocessSteps(original_img):

    # STEP 1
    # Convert from RGB to Grayscale Img using Normal Average
    # Note: Not ideal method to convert to grayscale - should use luminous factors
    grayScale_img_arr = np.mean(original_img, axis=2)

    # STEP 2
    # For thresholding used 25x25 block size as given by paper
    # Constant for offset not provided by paper - assumed 0
    # Type of adaptive threshold not indicated - used mean thresholding (not gaussian)
    open_cv_img = grayScale_img_arr.astype(np.uint8)
    threshold_img_cv2 = cv2.adaptiveThreshold(open_cv_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 25, 1)
    threshold_img = Image.fromarray(threshold_img_cv2)

    # STEP 3
    # Resize Image to 1000x1000 pixels
    resized_img = threshold_img.resize((1000, 1000))

    # STEP 4
    # Pixel Rescaling 0 to 1 by dividing each pixel by 255
    # Note: Not ideal normalization method as min/max are not 0-255 in each image
    # Ideal: https://stackoverflow.com/questions/62783984/how-to-normalize-pixel-values-in-an-image-and-save-it
    resized_img_np = np.asarray(resized_img)
    resized_img_np = resized_img_np.astype('float32')
    normalized_img = resized_img_np / 255


    return normalized_img

In [4]:
# input: np.array of original RGB image
# return: np.array of size 1000x1000x3 after no-preprocessing - only normalization and resize
def no_preprocessSteps(original_img):

    img_asImage = Image.fromarray(original_img)
    resized_img = img_asImage.resize((1000, 1000))
    resized_img_np = np.asarray(resized_img)
    resized_img_np = resized_img_np.astype('float32')
    normalized_img = resized_img_np / 255

    return normalized_img

Preprocessing Running Stage Below

In [5]:
all_filenames = []
#all_filenames.append('Xtest_Batch0.npy')
#all_filenames.append('Xtest_Batch1.npy')
#for i in range(0, 9):
#  nameX = 'Xtrain_Batch' + str(i) + '.npy'
#  all_filenames.append(nameX)

#newFilename = "Xest_Batch0_even.npy"
#all_filenames.append(newFilename)
#newFilename = "Xest_Batch1_even.npy"
#all_filenames.append(newFilename)


for i in range(0, len(all_filenames)):
  url = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Data Batches/Even Class Distribution Datasets/' + all_filenames[i]
  all_images = np.load(url, allow_pickle=True)
  num_images = len(all_images)
  final_result = np.zeros([num_images, 1000, 1000], dtype = np.int8)
  for x in range(0, len(all_images)):
    original_img = all_images[x]
    processed_img = preprocessSteps(original_img)
    final_result[x] = processed_img
  savedURL = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/Even_Class_Distribution_Datasets/Processed_' + all_filenames[i]
  np.save(savedURL, final_result)


Non-Preprocessing Running Stage Below

In [6]:
all_filenames = []

newFilename = "Xtrain_Batch8_even.npy"
#all_filenames.append(newFilename)
#newFilename = "Xest_Batch1_even.npy"
#all_filenames.append(newFilename)


for i in range(0, len(all_filenames)):
  url = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Data Batches/Even Class Distribution Datasets/Xtrain/' + all_filenames[i]
  all_images = np.load(url, allow_pickle=True)
  num_images = len(all_images)
  final_result = np.zeros([num_images, 1000, 1000, 3], dtype = np.float16)
  for x in range(0, len(all_images)):
    original_img = all_images[x]
    processed_img = no_preprocessSteps(original_img)
    final_result[x] = processed_img
  savedURL = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/Color_Even_Class_Distribution_Datasets/Color_Processed_' + all_filenames[i]
  np.save(savedURL, final_result)

0-4 Non-Preprocessing Running Stage Below

In [7]:
data_filenames = []
label_filenames = []

#dataNewFilename = "Color_Processed_Xest_Batch1_even.npy"
#data_filenames.append(dataNewFilename)
#labelNewFilename = "Yest_Batch1_even.npy"
#label_filenames.append(labelNewFilename)

for i in range(0, len(data_filenames)):
  url_images = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/Color_Even_Class_Distribution_Datasets/' + data_filenames[i]
  all_images = np.load(url_images, allow_pickle=True)
  url_labels = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Data Batches/Even Class Distribution Datasets/Ytest/' + label_filenames[i]
  all_labels = np.load(url_labels, allow_pickle=True)

  short_images = []
  short_labels = []

  for x in range(0, len(all_labels)):
    if (all_labels[x][1] == 0):
      short_images.append(all_images[x])
      short_labels.append(all_labels[x])
    elif (all_labels[x][1] == 4):
      short_images.append(all_images[x])
      toAdd = all_labels[x]
      toAdd[1] = 1
      short_labels.append(toAdd)

  
  short_labels = np.array(short_labels)
  short_images = np.array(short_images)

  savedURL_img = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/04_Only_Color_Even_Class_Distribution_Datasets/04_Only_' + data_filenames[i]
  savedURL_label = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/04_Only_Color_Even_Class_Distribution_Datasets/04_Only_' + label_filenames[i]
  np.save(savedURL_img, short_images)
  np.save(savedURL_label, short_labels)


04 Only Preprocessing Running Stage Below

In [8]:
all_filenames = []
newFilename = "04_Only_Color_Processed_Xtrain_Batch8_even.npy"
all_filenames.append(newFilename)

out_names = []
outname = "04_Only_Processed_Xtrain_Batch8_even.npy"
out_names.append(outname)

for i in range(0, len(all_filenames)):
  url = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/04_Only_Color_Even_Class_Distribution_Datasets/Xtrain/' + all_filenames[i]
  all_images = np.load(url, allow_pickle=True)
  num_images = len(all_images)
  final_result = np.zeros([num_images, 1000, 1000], dtype = np.float16)
  for x in range(0, len(all_images)):
    original_img = all_images[x]
    processed_img = preprocessSteps(original_img)
    final_result[x] = processed_img
  savedURL = '/content/drive/MyDrive/CS 766 Project/Project Coding and Data Files/Processed_Data_Batches/04_Only_Preprocessed_Even_Class_Distribution_Datasets/' + out_names[i]
  np.save(savedURL, final_result)