In [1]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import numpy as np
import pandas as pd
import cv2
import os
import h5py
import pickle

In [2]:
train_labels = os.listdir('train_data/')
train_labels

['Bluebell',
 'Buttercup',
 'Coltsfoot',
 'Cowslip',
 'Crocus',
 'Daffodil',
 'Daisy',
 'Dandelion',
 'Fritillary',
 'Iris',
 'Lily Valley',
 'Pansy',
 'Snowdrop',
 'Sunflower',
 'Tigerlily',
 'Tulip',
 'Windflower']

In [3]:
train_labels.sort()

In [4]:
X = []
y = []
for label in train_labels:
    dir = os.path.join('train_data', label)
    current_label = label
    print('Current Folder: ' + current_label)
    for x in range(1, 81):
        print('Current Image: ' + str(x))
        file = os.path.join(dir, str(x) + '.jpg')
        image = cv2.imread(file)
        image = cv2.resize(image, (500, 500))
        image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
        cv2.normalize(hist, hist)
        histogram_features = hist.flatten()
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        hue_features = cv2.HuMoments(cv2.moments(image_gray)).flatten()
        current_features = np.hstack([histogram_features, hue_features])
        X.append(current_features)
        y.append(current_label)

Current Folder: Bluebell
Current Image: 1
Current Image: 2
Current Image: 3
Current Image: 4
Current Image: 5
Current Image: 6
Current Image: 7
Current Image: 8
Current Image: 9
Current Image: 10
Current Image: 11
Current Image: 12
Current Image: 13
Current Image: 14
Current Image: 15
Current Image: 16
Current Image: 17
Current Image: 18
Current Image: 19
Current Image: 20
Current Image: 21
Current Image: 22
Current Image: 23
Current Image: 24
Current Image: 25
Current Image: 26
Current Image: 27
Current Image: 28
Current Image: 29
Current Image: 30
Current Image: 31
Current Image: 32
Current Image: 33
Current Image: 34
Current Image: 35
Current Image: 36
Current Image: 37
Current Image: 38
Current Image: 39
Current Image: 40
Current Image: 41
Current Image: 42
Current Image: 43
Current Image: 44
Current Image: 45
Current Image: 46
Current Image: 47
Current Image: 48
Current Image: 49
Current Image: 50
Current Image: 51
Current Image: 52
Current Image: 53
Current Image: 54
Current Imag

In [5]:
np.shape(X)

(1360, 519)

In [6]:
np.shape(y)

(1360,)

In [7]:
le = LabelEncoder()
y = le.fit_transform(y)

In [8]:
mms=MinMaxScaler()
X = mms.fit_transform(X)

In [9]:
X

array([[1.97624573e-02, 5.78024168e-04, 0.00000000e+00, ...,
        3.86393234e-18, 5.20710681e-02, 1.98687454e-17],
       [1.74216708e-04, 0.00000000e+00, 0.00000000e+00, ...,
        3.86393622e-18, 5.20754831e-02, 1.98687346e-17],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        3.86393611e-18, 5.20756714e-02, 1.98687525e-17],
       ...,
       [8.78464639e-02, 4.14349259e-03, 0.00000000e+00, ...,
        3.86466489e-18, 5.24989973e-02, 1.98683835e-17],
       [4.34821308e-02, 1.58172996e-02, 1.84748745e-04, ...,
        3.86392312e-18, 5.21474623e-02, 1.98687803e-17],
       [2.38287393e-01, 7.91747487e-02, 7.36974103e-04, ...,
        3.86392990e-18, 5.20687142e-02, 1.98687461e-17]])

In [15]:
os.makedirs('data', exist_ok=True)
h5_X = h5py.File('data/X.h5', 'w')
h5_X.create_dataset('features', data=np.array(X))

<HDF5 dataset "features": shape (1360, 519), type "<f8">

In [16]:
h5_y = h5py.File('data/y.h5', 'w')
h5_y.create_dataset('targets', data=np.array(y))

<HDF5 dataset "targets": shape (1360,), type "<i8">

In [17]:
h5_X.close()
h5_y.close()

In [18]:
with open ('data/le.h5', 'wb') as f:
    pickle.dump(le, f)

In [19]:
with open ('data/mms.h5', 'wb') as f:
    pickle.dump(mms, f)