### 1. Dataset

![](assets/dataset.png)

In [15]:
import os
import random
import shutil

def tvt_split(path, train=0.7, test=0.3, validation=0.0, class_subdirs=True):
    dir_train_p = '%s_train' % path
    dir_validation_p = '%s_validation' % path
    dir_test_p = '%s_test' % path
    
    if os.path.exists(dir_train_p) and os.path.isdir(dir_train_p):
        shutil.rmtree(dir_train_p)
    if os.path.exists(dir_validation_p) and os.path.isdir(dir_validation_p):
        shutil.rmtree(dir_validation_p)
    if os.path.exists(dir_test_p) and os.path.isdir(dir_test_p):
        shutil.rmtree(dir_test_p)
        
    subdirs = [''] if not class_subdirs else os.listdir(path)
    
    for subdir in subdirs:
        subdir_p = os.path.join(*(path, subdir))
        subdir_list = os.listdir(subdir_p)
        
        random.shuffle(subdir_list)
        
        subdir_list_len = len(subdir_list)
        subdir_list_train_thld = int(subdir_list_len * train)
        subdir_list_validation_thld = int(subdir_list_len * validation) + subdir_list_train_thld
        
        subdir_list_train = subdir_list[:subdir_list_train_thld]
        subdir_list_validation = subdir_list[subdir_list_train_thld:subdir_list_validation_thld]
        subdir_list_test = subdir_list[subdir_list_validation_thld:]
        
        # prepare train part
        subdir_train_p = os.path.join(*(dir_train_p, subdir))
        os.makedirs(subdir_train_p, exist_ok=True)
        for subdir_list_train_item in subdir_list_train:
            src = os.path.join(subdir_p, subdir_list_train_item)
            dest = os.path.join(subdir_train_p, subdir_list_train_item)
            shutil.copyfile(src, dest)
            
        # prepare validation part
        if len(subdir_list_validation) > 0:
            subdir_validation_p = os.path.join(*(dir_validation_p, subdir))
            os.makedirs(subdir_validation_p, exist_ok=True)
            for subdir_list_validation_item in subdir_list_validation:
                src = os.path.join(subdir_p, subdir_list_validation_item)
                dest = os.path.join(subdir_validation_p, subdir_list_validation_item)
                shutil.copyfile(src, dest)
            
        # prepare test part
        subdir_test_p = os.path.join(*(dir_test_p, subdir))
        os.makedirs(subdir_test_p, exist_ok=True)
        for subdir_list_test_item in subdir_list_test:
            src = os.path.join(subdir_p, subdir_list_test_item)
            dest = os.path.join(subdir_test_p, subdir_list_test_item)
            shutil.copyfile(src, dest)
            
tvt_split('data/SET_A')

In [30]:
import pandas as pd
import re

ds_meta_files = ['bounding_boxes', 'classes', 'hierarchy', 'image_class_labels', 'images', 'photographers', 'sizes']
ds_meta = {}

for ds_meta_file in ds_meta_files:
    with open ('data/%s.txt' % ds_meta_file, 'r' ) as f:
        content = f.read()
        
    # TODO
    # content = re.sub('\ ([^\ ]+)(\ (.*))+(\n|\Z)', r'\ \1\2\3', content, flags = re.M)
    content = re.sub('\ (.*)(\n|\Z)', r'|\1\2', content, flags = re.M)
    
    with open ('data/%s.csv' % ds_meta_file, 'w') as f:
        f.write(content)
    
    ds_meta[ds_meta_file] = pd.read_csv('data/%s.csv' % ds_meta_file, header=None, sep='|')

In [28]:
for ds_meta_item in ds_meta:
    print('Quantity of %s: %s' % (ds_meta_item, ds_meta[ds_meta_item].count()[0]))

Quantity of bounding_boxes: 48562
Quantity of classes: 1011
Quantity of hierarchy: 1010
Quantity of image_class_labels: 48562
Quantity of images: 48562
Quantity of photographers: 48562
Quantity of sizes: 48562


### 2. Keras

In [1]:
import os

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
batch_size = 16

train_labels = os.listdir('data/SET_A')

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
    'data/SET_A',
    target_size=(150, 150),
    batch_size=batch_size,
    classes=train_labels)

model = Sequential()
model.add(Flatten(input_shape=(150, 150, 3))) 
model.add(Dense(64))
model.add(Dense(64))
model.add(Dense(len(train_labels)))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=50)

Found 3010 images belonging to 50 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1166fcda0>

### X. References

* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
* BSIF http://www.ee.oulu.fi/~jkannala/bsif/bsif.pdf