This notebook performs pre-process on the training and test image data.
The code is obtained from: vbrodrigues/Concrete-Crack-Classification-Model

In [1]:
print("Importing libraries...")

import cv2
import numpy as np
import os
import random
import h5py  #library is used for reducing the size of photos

print("Libraries Imported...")

Importing libraries...
Libraries Imported...


In [2]:
data_directory = "ConcreteImages/"  #modify accordingly
img_size = 32
categories = ["Positive", "Negative"]
data = []

In [3]:
def create_data():
    for category in categories:
        path = os.path.join(data_directory, category)
        class_num = categories.index(category)
        
        # read and resize the images and append to a list with the image and its class number
        for img in os.listdir(path):
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            new_array = cv2.resize(img_array, (img_size, img_size))
            data.append([new_array, class_num])

In [4]:
print("Creating data...")
create_data()
print("Data successfully created...")

Creating data...
Data successfully created...


In [5]:
print("Shuffling data...")
random.shuffle(data)
print("Data successfully shuffled...")

Shuffling data...
Data successfully shuffled...


In [6]:
X_data = []
y = []

In [7]:
# create X with the features (the images) and y with the targets (labels)
for features, label in data:
    X_data.append(features)
    y.append(label)

print("X and y data successfully created...")

X and y data successfully created...


In [8]:
# reshape the image to be on the correct format for tensorflow (nº images, width, height, channels)
print("Reshaping X data...")
X = np.array(X_data).reshape(len(X_data), img_size, img_size, 1)
print("X data successfully reshaped...")

Reshaping X data...
X data successfully reshaped...


In [9]:
print("Saving the data...")
#modify address accordingly
hf = h5py.File("ConcreteImages/concrete_crack_image_data.h5", "w")
hf.create_dataset("X_concrete", data = X, compression = "gzip")
hf.create_dataset("y_concrete", data = y, compression = "gzip")
hf.close()
print("Data successfully saved")

Saving the data...
Data successfully saved
