<a href="https://colab.research.google.com/github/yibowang15/Capstone/blob/main/GrayScale_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import os
import numpy as np
import zipfile
from io import BytesIO

# Step 1: take the average of the RGB to do the grayScaling
def convert_to_grayscale(image_data):
    # Transfer the bytes into array
    image_array = np.frombuffer(image_data, np.uint8)

    # decode the array into color image
    color_image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

    # calculate the avg of RGB values based on aurthor's approach
    gray_image = np.mean(color_image, axis=2).astype(np.uint8)

    return gray_image

# Step 2: adjust the size to 224x224 for VGG 16
def resize_image(image, target_size=(224, 224)):
    resized_image = cv2.resize(image, target_size)
    return resized_image

# Step 3: Handle the photo in the zip file
def process_images_in_zip(zip_path, output_folder):

    # check the directory, if not exist, create new one
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Open zip
    with zipfile.ZipFile(zip_path, 'r') as archive:
        # Catch all the photos
        image_files = [f for f in archive.namelist() if f.endswith(('.jpg', '.jpeg', '.png'))]

        total_images = len(image_files)
        print(f"Total images to process: {total_images}")

        # loop through the photo
        for i, image_file in enumerate(image_files):
            # Catch the photo
            with archive.open(image_file) as file:
                image_data = file.read()

            # Convert
            gray_image = convert_to_grayscale(image_data)

            # size adjustment 224x224
            resized_gray_image = resize_image(gray_image)

            # Save the picture
            output_image_path = os.path.join(output_folder, os.path.basename(image_file))
            cv2.imwrite(output_image_path, resized_gray_image)

            # print the process

            print(f"Processed {i + 1}/{total_images} images ({((i + 1) / total_images) * 100:.2f}%)")



In [None]:
# File path
zip_path = '/content/drive/MyDrive/CapstoneProject/data/train.zip'
output_folder = '/content/drive/MyDrive/CapstoneProject/data/train'


In [None]:

# # Begin processing
# process_images_in_zip(zip_path, output_folder)