In [1]:
import numpy as np
import h5py
import os
from keras.utils import np_utils
from random import shuffle
import cv2
import time

Using TensorFlow backend.


In [10]:
import matplotlib.pyplot as plt 

In [2]:
class DataSet:
    def __init__(self, paths, label):
        self.paths = paths
        self.label = label
        
    def GetDataSet(self):
        return self.paths, self.label

In [3]:
def DataLoad(path):
    labels = os.listdir(path)
    intLabel = {'BlindFlange':0, 'Cross':1, 'Elbow 90':2, 'Elbow non 90':3, 'Flange':4, 'Flange WN':5,
         'Olet':6, 'OrificeFlange':7, 'Pipe':8, 'Reducer CONC':9, 'Reducer ECC':10, 
         'Reducer Insert':11, 'Safety Valve':12, 'Strainer':13, 'Tee':14, 'Tee RED':15, 'Valve':16,
         'Wye':17}
    allDataPath = []
    test = []
    for label in labels: # 라벨 폴더
        labelPath = path + '/' + label
        paths = []
        augmentations = os.listdir(labelPath)
        for augmentation in augmentations: # aug 폴더
            augPath = labelPath + '/' + augmentation
            fileNames = os.listdir(augPath)
            for fileName in fileNames: # 이미지 파일 이름
                filePath = augPath + '/' + fileName
                paths.append(filePath)
        dataSet = DataSet(paths, np_utils.to_categorical(intLabel[label], 18))
        allDataPath.append(dataSet)
    return allDataPath

In [4]:
def DivideData(allDataPath, rate):
    trainX = []
    trainY = []
    testX = []
    testY = []
    
    for data in allDataPath:
        images, label = data.GetDataSet()
        shuffle(images)
        divider = int(len(images)*rate)
        trainX.extend(images[:divider])
        trainY.extend([label]*divider)
        testX.extend(images[divider:])
        testY.extend([label]*(len(images)-divider))
        
    trainY = np.array(trainY)
    testY = np.array(testY)
               
    return trainX, trainY, testX, testY

In [5]:
def MakeHDF5(path, trainX, trainY, testX, testY):
    startTime = time.time()

    fileName = path + '/' + 'data set.hdf5'
    with h5py.File(fileName, 'w') as f:
        f.create_dataset('trainX', (trainY.shape[0], 227, 227, 1), dtype='float32')
        f.create_dataset('trainY', (trainY.shape[0], trainY.shape[1]), dtype='float32')
        f.create_dataset('testX', (testY.shape[0], 227, 227, 1), dtype='float32')
        f.create_dataset('testY', (testY.shape[0], testY.shape[1]), dtype='float32')
        
        trainX_set = f['trainX'] 
        trainY_set = f['trainY']
        testX_set = f['testX'] 
        testY_set = f['testY']
        
        maximum = trainY.shape[0] + testY.shape[0]
        
        for i in range(len(trainX)):
            image = cv2.imread(trainX[i], cv2.IMREAD_GRAYSCALE)
            image = image.astype('float32') / 255
            image = np.reshape(image, [227,227,1])
            trainX_set[i] = image
            cur = i + 1
            print("Progress {0:}/{1:}, {2:2.1%}".format(cur, maximum, cur/maximum), end="\r")
            
        for i in range(len(testX)):
            image = cv2.imread(testX[i], cv2.IMREAD_GRAYSCALE)
            image = image.astype('float32') / 255
            image = np.reshape(image, [227,227,1])
            testX_set[i] = image
            cur = trainY.shape[0] + i + 1
            print("Progress {0:}/{1:}, {2:2.1%}".format(cur, maximum, cur/maximum), end="\r")
         
        trainY_set = trainY
        testY_set = testY
        
    e = time.time() - startTime
    print("")
    print('hdf5 파일 생성 경과 시간: %02d:%02d:%02d'%(e // 3600, e % 3600 // 60, e % 60))

In [6]:
path = "C:/Users/m/data/augmentation_r2"

In [7]:
allDataPath = DataLoad(path)

In [8]:
trainX, trainY, testX, testY = DivideData(allDataPath, 0.7)

In [9]:
MakeHDF5(path, trainX, trainY, testX, testY)

Progress 335016/335016, 100.0%
데이터 로드 경과 시간: 00:16:04


In [12]:
trainX[0]

'C:/Users/m/data/augmentation_r2/BlindFlange/FlipA/BlindFlange_62_2.0_FlipA.png'