In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
%%capture --no-stderr
import os
import pandas as pd
import seaborn as sns
import numpy as np
import cv2

import warnings
warnings.filterwarnings("ignore")

from sklearn.utils import shuffle
from tqdm import tqdm

In [3]:
# constants
DATASET_DIR = "/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images"

In [4]:
os.listdir(DATASET_DIR)

['4', '0', '1', '5', '3', '2']

In [5]:
# Put the augmented dataset in a dataframe
images = []
labels = []

for class_name in os.listdir(DATASET_DIR):
    class_dir = os.path.join(DATASET_DIR, class_name)
    if os.path.isdir(class_dir):
        for img in os.listdir(class_dir):  # List images in the directory
            img_path = os.path.join(class_dir, img)

            images.append(img_path)
            labels.append(class_name)

# Shuffle the dataset
images, labels = shuffle(images, labels)
pd.set_option('display.max_colwidth', None)
df = pd.DataFrame({'path': images, 'label': labels})

df.head()

Unnamed: 0,path,label
0,/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/2/aug__44_19923.jpeg,2
1,/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/3/aug__56_7310391.jpeg,3
2,/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/2/aug__41_9729946.jpeg,2
3,/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/0/aug__18_33768.jpeg,0
4,/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/3/aug__46_5659447.jpeg,3


In [6]:
# example image path and label
df['path'][0], df['label'][0]

('/content/drive/My Drive/Prostate Cancer Experiments/Dataset/augmented_images/2/aug__44_19923.jpeg',
 '2')

In [7]:
df.shape

(4279, 2)

In [8]:
df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
2,715
3,715
1,715
5,715
4,715
0,704


In [9]:
 # Desired image size for GAN training
IMG_HEIGHT = 256
IMG_WIDTH = 256
CHANNELS = 3

# Prepare an array for images and labels
all_images = []
all_labels = []

# Iterate through the DataFrame
for index, row in tqdm(df.iterrows(), desc="Processing images", total=len(df)):
    # Extract the path and label
    image_path = row['path']
    label = row['label']

    # Read the image
    img = cv2.imread(image_path)
    if img is not None:
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
        # Normalize image to [-1, 1]
        img = (img / 127.5) - 1.0
        all_images.append(img)
        all_labels.append(label)

all_images = np.array(all_images, dtype=np.float32)
all_labels = np.array(all_labels)

Processing images: 100%|██████████| 4279/4279 [22:32<00:00,  3.16it/s]


In [10]:
print("Train images shape:", all_images.shape)
print("Train labels shape:", all_labels.shape)

Train images shape: (4279, 256, 256, 3)
Train labels shape: (4279,)


In [11]:
all_images[0]

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       ...,

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        ...,
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]], dtype=float32)

In [12]:
all_labels[0]

'2'

In [13]:
# Directory to save the NumPy arrays
save_dir = "/content/drive/My Drive/Prostate Cancer Experiments/Dataset/processed_data"
os.makedirs(save_dir, exist_ok=True)

# Save images and labels
np.save(os.path.join(save_dir, "all_images.npy"), all_images)
np.save(os.path.join(save_dir, "all_labels.npy"), all_labels)

print(f"Data saved to {save_dir}")

Data saved to /content/drive/My Drive/Prostate Cancer Experiments/Dataset/processed_data
