# シングルネットワーク用のnotebook
精度を求めるために基本的にはデュアルのネットワークを利用すれば良いが、比較のために用意している。

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
from numba import jit
from tqdm import tqdm
import os
%matplotlib inline

In [2]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES']='0'
# import keras

In [3]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input,Layer,Lambda
from keras.layers import Flatten,BatchNormalization
from keras.layers import Dense,Dropout
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras import backend as K

In [5]:
imheight = 128
imwidth = 128
channels = 3
ALPHA = 0.1
BETA = 0.05
dense_num = 512
vec_length = 512

In [6]:
from keras.applications.vgg16 import VGG16
#include_top=false => Dense不要
base_model = VGG16(include_top=False, weights='imagenet', input_tensor=Input(shape=(imwidth, imheight, channels)), input_shape=None) 

Instructions for updating:
Colocations handled automatically by placer.


In [7]:
for layer in base_model.layers[:15]:
    layer.trainable=False

In [8]:
def create_embNet():
    x = base_model.output
    conv1 = Conv2D(32, (4,4) , padding='same', activation='relu')(x)
    conv2 = Conv2D(32, (4,4) , padding='same', activation='relu')(conv1)
    flatten = Flatten()(conv2) 
    dense_layer = Dense(dense_num, activation='relu')(flatten)
    norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
    return  Model(inputs=[base_model.input], outputs=norm_layer)

In [9]:
# define three Inputs
a_in = Input(shape = (imheight, imwidth, channels), name='anchor_input')
p_in = Input(shape = (imheight, imwidth, channels), name='positive_input')
n_in = Input(shape = (imheight, imwidth, channels), name='negative_input')

In [10]:
# 学習後にベクトルのencoderとして利用するので外に定義する。
con_embNet = create_embNet()
shop_embNet = create_embNet()

In [11]:
a_emb = shop_embNet(a_in)
p_emb = con_embNet(p_in)
n_emb = con_embNet(n_in)

In [12]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [13]:
class ImprovedTripletLossLayer(Layer):
    def __init__(self, alpha, beta, **kwargs):
        self.alpha = alpha
        self.beta = beta
        super(ImprovedTripletLossLayer, self).__init__(**kwargs)

    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        pn_dist = K.sum(K.square(p-n), axis=-1)
        return K.sum(K.maximum((p_dist - n_dist + self.alpha), 0) + K.maximum((p_dist - self.beta), 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

- triplet lossの選択を忘れずに

In [14]:
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
# triplet_loss_layer = TripletLossLayer(alpha=ALPHA,name='triplet_loss_layer')([a_emb, p_emb, n_emb])
triplet_loss_layer = ImprovedTripletLossLayer(alpha=ALPHA,beta=BETA,name='triplet_loss_layer')([a_emb, p_emb, n_emb])

# Model that can be trained with anchor, positive negative images
tripletNet = Model([a_in, p_in, n_in], triplet_loss_layer)

# complie
tripletNet.compile(loss=None, optimizer='adam')

### データの用意

In [15]:
BASE_PATH = './dataset/T_Shirt_all/'
ids = sorted([x for x in os.listdir(BASE_PATH)])

In [16]:
ids[0]

'id_00000001'

In [17]:
len(ids)

6155

### tripletのパスの組を返す関数

In [18]:
import itertools
import random
def get_triplets(ids,BASE_PATH):
    triplets=[]
    for id_ in tqdm(ids):
        files = sorted([BASE_PATH+id_+'/'+x for x in os.listdir(BASE_PATH+id_)])
        con = sorted([x for x in files if 'comsumer' in x])
        shop = sorted([x for x in files if 'shop' in x ])
        combs = list(itertools.product(tuple(con),tuple(shop)))
        for comb in combs:
            comb = list(comb)
            neg_id = random.choice([x for x in ids if x != id_])
            neg_file = random.choice([BASE_PATH+neg_id+'/'+x for x in os.listdir(BASE_PATH+neg_id) if 'shop' in x])
            comb.append(neg_file)
            triplets.append(comb)
    return triplets

### id単位でtrainとtestを分割する
- 元々np.random.choice()でやっていたが、ブートストラップサンプリングだったのでダメ
- train_test_splitを利用

In [19]:
from sklearn.cross_validation import train_test_split
# from sklearn.model_selection import ShuffleSplit



In [20]:
# random_stateを固定しておく
train_ids,test_ids=train_test_split(ids,test_size=0.33,random_state=0)
del ids

In [21]:
train_ids[:5]

['id_00016780', 'id_00007427', 'id_00029554', 'id_00020254', 'id_00018517']

In [22]:
from PIL import Image
def get_np_triplets(triplet_PATHs):
    triplets = []
    for triplet in tqdm(triplet_PATHs):
        anc_img = Image.open(triplet[0]).convert('RGB')
        pos_img = Image.open(triplet[1]).convert('RGB')
        neg_img = Image.open(triplet[2]).convert('RGB')

        anc_img = np.array(anc_img.resize((128,128)))/255. #resize to (128,128,3)
        pos_img = np.array(pos_img.resize((128,128)))/255.    
        neg_img = np.array(neg_img.resize((128,128)))/255.    

        tri = [anc_img,pos_img,neg_img]
        triplets.append(np.array(tri))
    triplets = np.array(triplets)
    return triplets

test_idsはretrival.ipynbで参照するのでpickleとして保存

In [23]:
import pickle
f = open('./pickle/test_ids.pickle', 'wb')
pickle.dump(test_ids, f)

## 学習

In [24]:
epochs = 120
model_dir ='./model/T_Shirt/Single/a{}b{}'.format(ALPHA,BETA)

各エポックでtestデータを用いてN-top accを出したい→けど結果としてはいらない？
- epochの外でtrain,testに分割するパターンなので常にtestのidは同じ
- 5epochに一度tripletを更新する
- model.fitはepochs=1で行う

In [None]:
model_history = []
for epoch in range(epochs):
    print('epoch %s'% epoch)
    if epoch % 5 == 0:
        if epoch != 0: del triplets
        triplets_train_PATHs = get_triplets(train_ids,BASE_PATH)
        triplets = get_np_triplets(triplets_train_PATHs)
        del triplets_train_PATHs
    # fit
    hist = tripletNet.fit([triplets[:,0],triplets[:,1],triplets[:,2]], epochs=1, batch_size=50)
    model_history.append(hist.history)
    f = open(model_dir+'/{}/history{}.txt'.format(vec_length,epoch),'wb')
    pickle.dump(model_history, f)
    # 使い終わったので削除
##    del triplets
    if (epoch+1) % 5 == 0:
        shop_embNet.save(model_dir+'/{}/shop_e{}.h5'.format(vec_length,epoch))
        con_embNet.save(model_dir+'/{}/con_e{}.h5'.format(vec_length,epoch))
# 学習のhistoryを保存
f = open(model_dir+'/{}/history.txt'.format(vec_length),'wb')
pickle.dump(model_history, f)

  1%|          | 28/4123 [00:00<00:18, 220.07it/s]

epoch 0


100%|██████████| 4123/4123 [00:21<00:00, 194.82it/s]
100%|██████████| 38378/38378 [04:08<00:00, 154.60it/s]


Instructions for updating:
Use tf.cast instead.
Epoch 1/1
epoch 1
Epoch 1/1
epoch 2
Epoch 1/1
epoch 3
Epoch 1/1
epoch 4
Epoch 1/1


  1%|          | 21/4123 [00:00<00:21, 193.60it/s]

epoch 5


100%|██████████| 4123/4123 [00:27<00:00, 151.98it/s]
100%|██████████| 38378/38378 [03:30<00:00, 197.20it/s]


Epoch 1/1
epoch 6
Epoch 1/1
epoch 7
Epoch 1/1
epoch 8
Epoch 1/1
epoch 9
Epoch 1/1


  1%|          | 28/4123 [00:00<00:19, 214.23it/s]

epoch 10


100%|██████████| 4123/4123 [00:21<00:00, 191.59it/s]
100%|██████████| 38378/38378 [03:25<00:00, 187.10it/s]


Epoch 1/1
epoch 11
Epoch 1/1
epoch 12
Epoch 1/1
epoch 13
Epoch 1/1
epoch 14
Epoch 1/1


  1%|          | 27/4123 [00:00<00:16, 246.43it/s]

epoch 15


100%|██████████| 4123/4123 [00:22<00:00, 186.42it/s]
 67%|██████▋   | 25522/38378 [02:30<01:09, 183.88it/s]

In [42]:
# for epoch in range(epochs):
#     print('epoch %s'% epoch)
#     # PATHの組みを取得
#     triplets_train_PATHs = get_triplets(train_ids,BASE_PATH)
#     # np配列に変換
#     triplets = get_np_triplets(triplets_train_PATHs)
#     # fit
#     tripletNet.fit([triplets[:,0],triplets[:,1],triplets[:,2]], epochs=1, batch_size=50) # using batch_size is better
#     # 使い終わったので削除
#     del triplets
#     if (epoch+1) % 5 == 0:
# #         tripletNet.save('./model/T_Shirt/Single/tripletNetmodel_e{}.h5'.format(epoch))
#         # 5epochごとにmodelを保存
#         shop_embNet.save('./model/T_Shirt/Single/a{}/{}/shop_emb_e{}.h5'.format(ALPHA,vec_length,epoch))
#         con_embNet.save('./model/T_Shirt/Single/a{}/{}/con_emb_e{}.h5'.format(ALPHA,vec_length,epoch))