In [4]:
import cv2
import h5py
import numpy as np
import os
from IPython.display import clear_output
import random

Need to read from HR div2k images, generate 128x128 images, downscale to 64x64 and rescale to 128x128 inter.bicub
The 128px images are ground truth and the upscaled 128px are fed into the network inputs.

In [6]:
def read_data(path,import_all=True,import_num=0):
    if not import_all:
        with h5py.File(path,'r') as file:
            return
    with h5py.File(path,'r') as file:
        images = np.array(file.get('images'))
        labels = np.array(file.get('labels'))
    return images, labels

def save_datasets(images,labels,path):
    with h5py.File(path,'w') as file:
        file.create_dataset("images",data=images)
        file.create_dataset("labels",data=labels)

def process_for_labels(image,scale=2):
    '''
    assumes that the image fed in is a 128x128 normalized RGB image
    Takes in a 128px image and produces the lowres
    '''
    highres = image
    height, width, channels = highres.shape
    lowres = cv2.resize(image, (int(32./scale),int(32./scale)))
    lowres = cv2.resize(lowres, (32,32),interpolation=cv2.INTER_CUBIC)
    return lowres, highres

def prep_data(data_path="F:\div2k\DIV2K_train_HR_32",image_size=32):
    #finished data is saved in .h5 files at D:/TestData/train and D:/TestData/test
    #read in images from chunks
    image_paths = [(data_path + "\\" + path) for path in os.listdir(data_path)]
    #randomization of images
    random.shuffle(image_paths)
    for scale in range(3,5):
        counter = 0
        images = [] #lowres
        labels = [] #highres
        test_images = []
        test_labels = []
        #crop the images into 128px chunks, normalize them, and then turn them into image,label format
        train_batch_counter = 0
        test_batch_counter = 0
        
        if not os.path.exists("D:/TestData/train-x"+str(scale)):
            os.mkdir("D:/TestData/train-x"+str(scale))
        if not os.path.exists("D:/TestData/test-x"+str(scale)):
            os.mkdir("D:/TestData/test-x"+str(scale))
        for image_path in image_paths:
            try:
                image = cv2.imread(image_path, cv2.IMREAD_COLOR)
                assert image.shape == (32,32,3)
                image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
                image = np.ndarray.astype(image,'float32')
                image /= 255.
                image, label = process_for_labels(image,scale=scale)
                if random.randint(0,9) == 0: #roughly 0.9:0.1 train:test split
                    #test data
                    test_images.append(image)
                    test_labels.append(label)
                else:
                    #train data
                    images.append(image)
                    labels.append(label)
            except:
                pass
            if len(images) >= 4096:
                images = np.array(images) # a list of LR images in shape (num_images,128,128,3)
                labels = np.array(labels) # a list of HR images in shape (num_images,128,128,3)
                save_datasets(images,labels,"D:/TestData/train-x"+str(scale)+"/train_"+str(train_batch_counter).zfill(4)+".h5")
                train_batch_counter+=1
                images = []
                labels = []
            if len(test_images) >= 4096:
                test_images = np.array(test_images)
                test_labels = np.array(test_labels)
                save_datasets(test_images,test_labels,"D:/TestData/test-x"+str(scale)+"/test_"+str(test_batch_counter).zfill(4)+".h5")
                test_batch_counter+=1
                test_images = []
                test_labels = []
            
        #store the images and labels in h5py files
        images = np.array(images) # a list of LR images in shape (num_images,128,128,3)
        labels = np.array(labels) # a list of HR images in shape (num_images,128,128,3)

        save_datasets(images,labels,"D:/TestData/train-x"+str(scale)+"/train_"+str(train_batch_counter).zfill(4)+".h5")

        test_images = np.array(test_images)
        test_labels = np.array(test_labels)
        save_datasets(test_images,test_labels,"D:/TestData/test-x"+str(scale)+"/test_"+str(test_batch_counter).zfill(4)+".h5")
        print("it is done")
    
def split_into_chunks(data_path="F:\div2k\DIV2K_train_HR\\"):
    '''reads in images from div2k splits it into 32px chunks, and saves the images in a new folder'''
    image_paths = [(data_path + path) for path in os.listdir(data_path)]
    counter = 0
    for image_path in image_paths:
        image = cv2.imread(image_path)
        height, width, channels = image.shape
        for horizcrop in range(height//32):
            for vertcrop in range(width//32):
                cropped_image = image[vertcrop*32:(vertcrop+1)*32, horizcrop*32: (horizcrop+1)*32]
                new_img_path = "F:\div2k\DIV2K_train_HR_32\\" + str(counter).zfill(7) + ".png"
                if cv2.imwrite(new_img_path,cropped_image):
                    counter += 1
                else:
                    pass

In [7]:
if __name__=="__main__":
    #split_into_chunks()
    #prep_data
    pass

it is done
