## 第1回 FR Frontier ：ファッション画像における洋服の「色」分類

* [コンテスト詳細 ビッグデータ活用ならオプトDSL DeepAnalytics](https://deepanalytics.jp/compe/36)

## preprocess
Opencvをインストール

    conda install opencv

In [1]:
import numpy as np
import pandas as pd
import cv2

In [2]:
import os, sys
current_dir = os.getcwd()
HOME_DIR = current_dir
DATA_GIVEN_DIR = HOME_DIR+"/data/given/"
#DATA_RESIZED_DIR = HOME_DIR+"/data/resized/"
DATA_RESIZED_DIR = "/input/"
DATA_MYDATA_DIR = HOME_DIR+"/data/processed/"
DATA_TRANS_DIR = HOME_DIR+"/data/transparent/"

In [None]:
width = 667
height = 667

### Transparent

In [None]:
os.makedirs(DATA_TRANS_DIR)
os.makedirs(DATA_TRANS_DIR+"train/")
os.makedirs(DATA_TRANS_DIR+"test/")

In [None]:
from PIL import Image
def trans_img(src, dst, width, height):
    img = Image.open(src, 'r')

    # 同じサイズの画像を作成
    trans = Image.new('RGBA', img.size, (0, 0, 0, 0))

    for x in range(width):
        for y in range(height):
            pixel = img.getpixel( (x, y) )
        
            # 白なら処理しない
            if pixel[0] == 255 and pixel[1] == 255 and pixel[2] == 255:
                continue
        
            # 白以外なら、用意した画像にピクセルを書き込み
            trans.putpixel( (x, y), pixel )
    # リサイズ後の画像を保存
    trans.save(dst, 'JPEG', optimize=True)

In [None]:
# リサイズしないと時間かかりすぎ！！
for i in range(0, 12399):
    src = DATA_GIVEN_DIR + 'train/' + 'train_%i.jpg'%i
    dst = DATA_TRANS_DIR + "train/" + 'train_%i.jpg'%i
    trans_img(src, dst, width, height)

In [None]:
for i in range(0, 9801):
    src = DATA_GIVEN_DIR + 'test/' + 'test_%i.jpg'%i
    dst = DATA_TRANS_DIR + "test/" + 'test_%i.jpg'%i
    trans_img(src, dst, width, height)

### Load Data

In [3]:
data = []
for i in range(0, 12399):
    src = DATA_RESIZED_DIR + 'train/' + 'train_%i.jpg'%i
    img = cv2.imread(src)

    histR = cv2.calcHist([img],[0],None,[256],[0,256])
    histG = cv2.calcHist([img],[1],None,[256],[0,256])
    histB = cv2.calcHist([img],[2],None,[256],[0,256])

    hist = np.concatenate([histR, histG, histB], axis=1)
    data.append(hist)

In [4]:
dataX = np.array(data)
dataX.shape

(12399, 256, 3)

### train/valid/test  作成

In [5]:
train_master = pd.read_table('/input/train_master.tsv', decimal='\t')
dataY = np.array(train_master['category_id'])
dataY = np.array(pd.get_dummies(dataY).astype('float32'))
dataY.shape

(12399, 24)

In [6]:
trainX, validX = dataX[:10000], dataX[10000:]
trainY, validY = dataY[:10000], dataY[10000:]
trainX.shape, validX.shape, trainY.shape, validY.shape

((10000, 256, 3), (2399, 256, 3), (10000, 24), (2399, 24))

## training

In [7]:
import keras
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout, Conv2D, BatchNormalization, MaxPooling2D
from keras.optimizers import Adam, Nadam
from keras.preprocessing.image import ImageDataGenerator

from keras.layers import Input, Merge
from keras.layers.merge import Concatenate
from keras.layers import Embedding, Conv1D, MaxPooling1D, SpatialDropout1D

Using TensorFlow backend.


In [8]:
batch_size=64

In [9]:
graph_in = Input ((256,3))
convs = [ ] 
for fsz in range (3, 6): 
    x = Conv1D(64, fsz, padding='same', activation="relu")(graph_in)
    x = MaxPooling1D()(x) 
    x = BatchNormalization(axis=1)(x)
    x = Flatten()(x) 
    convs.append(x)
out = Concatenate()(convs) 
graph = Model(graph_in, out) 

In [10]:
conv2 = Sequential ([
    BatchNormalization(axis=1, input_shape=(256,3)),
    SpatialDropout1D(0.2),
    Dropout(0.2),
    graph,
    Dropout(0.5),
    Dense (100, activation="relu"),
    BatchNormalization(axis=1),
    Dropout (0.7),
    Dense(24, activation='softmax')
    ])

In [11]:
conv2.compile(loss='categorical_crossentropy', optimizer=Nadam(), metrics=['accuracy'])

In [12]:
conv2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_4 (Batch (None, 256, 3)            1024      
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 256, 3)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256, 3)            0         
_________________________________________________________________
model_1 (Model)              (None, 24576)             4032      
_________________________________________________________________
dropout_2 (Dropout)          (None, 24576)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               2457700   
_________________________________________________________________
batch_normalization_5 (Batch (None, 100)               400       
__________

In [None]:
conv2.fit(trainX, trainY, epochs=50, batch_size=batch_size, validation_data=(validX, validY))

Train on 10000 samples, validate on 2399 samples
Epoch 1/50


In [None]:
conv2.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [None]:
conv2.fit(trainX, trainY, epochs=5, batch_size=batch_size, validation_data=(validX, validY))

In [None]:
conv2.compile(loss='categorical_crossentropy', optimizer=Adam(0.01), metrics=['accuracy'])
conv2.fit(trainX, trainY, epochs=30, batch_size=batch_size, validation_data=(validX, validY))

In [None]:
conv2.compile(loss='categorical_crossentropy', optimizer=Adam(0.0001), metrics=['accuracy'])
conv2.fit(trainX, trainY, epochs=20, batch_size=batch_size, validation_data=(validX, validY))

In [None]:
# model.save_weights('2.h5')

In [None]:
# model.load_weights('1.h5')

## Predict

In [None]:
data = []
for i in range(0, 9801):
    src = DATA_RESIZED_DIR + 'test/' + 'test_%i.jpg'%i
    img = cv2.imread(src)

    histR = cv2.calcHist([img],[0],None,[256],[0,256])
    histG = cv2.calcHist([img],[1],None,[256],[0,256])
    histB = cv2.calcHist([img],[2],None,[256],[0,256])

    hist = np.concatenate([histR, histG, histB], axis=1)
    data.append(hist)

In [None]:
testX = np.array(data)
testX.shape

In [None]:
test_features = conv2.predict(testX, batch_size=batch_size)

In [None]:
test_labels = np.argmax(test_features, axis=1)
test_labels[:5]
#test_features[:5]
#array([20, 14, 17, 20, 23])

In [None]:
filenames = []
for i in range(0, 9801):
    filenames.append("test_%i.jpg"%i)

In [None]:
submission = pd.DataFrame({"Image": filenames,
                             "Label": test_labels})

In [None]:
submission_file_name = 'submission_cnn3.csv'
#np.savetxt(submission_file_name, subm, delimiter=',')
submission.to_csv(submission_file_name, index=False, header=None)

In [None]:
from IPython.display import FileLink
FileLink(submission_file_name)