# Dualモデル学習用
- 後でリファクタリング

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
from numba import jit
from tqdm import tqdm

%matplotlib inline

現状GPUメモリに乗りきらずCPUで回している<br>
VGGの出力をあらかじめpickleとして保存することで回避可能か？

## Model Definition

In [2]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES']='0'
import keras

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator

In [4]:
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input,Layer,Lambda
from keras.layers import Flatten,BatchNormalization
from keras.layers import Dense,Dropout
from keras.layers import concatenate
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras import backend as K

In [5]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.keras.backend.set_session(tf.Session(config=config))

#### ハイパーパラメータ
- `alpha > beta`を満たす必要がある(元論文)
- dense_numで特徴が潰れないように
    - dualだから100にしてた(Singleを200次元でやったので100+100で200になるから平等性)
    - 200+200だと多すぎる？

In [6]:
imheight = 128
imwidth = 128
channels = 3
# ALPHA = 0.2
ALPHA=0.1
BETA=0.05
dense_num = 150
vec_length=dense_num*2

#### VGG16読み込み

In [7]:
from keras.applications.vgg16 import VGG16
#include_top=false => Dense不要
base_model = VGG16(include_top=False, weights='imagenet', input_tensor=Input(shape=(imwidth, imheight, channels)), input_shape=None) 

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
for layer in base_model.layers[:15]:
    layer.trainable=False

#### 各ルートのCNNを共通で定義


In [9]:
# shallow network for colorized features
# create_embNetの中でやるべき？
inputs = Input(shape=(imwidth, imheight, channels))
conv1 = Conv2D(32, (4,4) , padding='same', activation='relu')(inputs)
pool1 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(conv1)
conv2 = Conv2D(32, (4,4) , padding='same', activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(conv2)
flatten = Flatten()(pool2) 
dense_layer = Dense(dense_num, activation='relu')(flatten)
norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer1')(dense_layer)
shallow_model=Model(inputs=inputs,outputs=norm_layer) 

In [10]:
def create_embNet():
    shallow_inputs = Input(shape=(imwidth, imheight, channels))
    x = base_model.output
    conv1 = Conv2D(32, (4,4) , padding='same', activation='relu')(x)
    conv2 = Conv2D(32, (4,4) , padding='same', activation='relu')(conv1)
    flatten = Flatten()(conv2) 
    dense_layer = Dense(dense_num, activation='relu')(flatten)
    norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
    # inputに対してshallow_modelのoutputも用意
    x1 = norm_layer
    x2 = shallow_model(shallow_inputs)
    out = concatenate([norm_layer,x2])
    return Model(inputs=[base_model.input,shallow_inputs],outputs=out)
#     return  Model(inputs=[base_model.input], outputs=norm_layer)

In [11]:
# define three Inputs
# create embNetの中でInputを定義すると明示的に3つの入力が分けられない
a_in = Input(shape = (imheight, imwidth, channels), name='anchor_input')
p_in = Input(shape = (imheight, imwidth, channels), name='positive_input')
n_in = Input(shape = (imheight, imwidth, channels), name='negative_input')

In [12]:
sa_in = Input(shape = (imheight, imwidth, channels), name='sanchor_input')

In [13]:
# 学習後にベクトルのencoderとして利用するので外に定義する。
con_embNet = create_embNet()
shop_embNet = create_embNet()

In [14]:
a_emb = shop_embNet([a_in,a_in])
p_emb = con_embNet([p_in,p_in])
n_emb = con_embNet([n_in,n_in])

### Triplet loss
- 実行時コメントアウトはずす

In [15]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [16]:
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
triplet_loss_layer = TripletLossLayer(alpha=ALPHA, name='triplet_loss_layer')([a_emb, p_emb, n_emb])

# Model that can be trained with anchor, positive negative images
tripletNet = Model([a_in, p_in, n_in], triplet_loss_layer)
tripletNet.compile(loss=None, optimizer='adam')

### Improved Triplet Loss
- `Loss=[D(a,p)-D(a,n)+ALPHA]+[D(a,p)-BETA]`
- Positiveを短くする方向に制御する

https://qiita.com/tancoro/items/35d0925de74f21bfff14#improved-triplet-loss

<img src="./readme_imgs/improved.PNG" width=50% align=left><br>



In [15]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, beta, **kwargs):
        self.alpha = alpha
        self.beta = beta
        super(TripletLossLayer, self).__init__(**kwargs)
    #a-pとa-nの距離差がmarginになるように
#     in-class variance(Positive Variance)を抑制する項を追加
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        pn_dist = K.sum(K.square(p-n), axis=-1)
        return K.sum(K.maximum((p_dist - n_dist + self.alpha), 0) + K.maximum((p_dist - self.beta), 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [16]:
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
triplet_loss_layer = TripletLossLayer(alpha=ALPHA, beta=BETA, name='triplet_loss_layer')([a_emb, p_emb, n_emb])

# Model that can be trained with anchor, positive negative images
tripletNet = Model([a_in, p_in, n_in], triplet_loss_layer)

In [17]:
# from IPython.display import SVG
# from keras.utils.vis_utils import model_to_dot

# SVG(model_to_dot(tripletNet, show_shapes=True, show_layer_names=False).create(prog='dot', format='svg'))

In [18]:
#並列化
# from keras.utils import multi_gpu_model
# tripletNet2 = multi_gpu_model(tripletNet,gpus=2)
# compile model
tripletNet.compile(loss=None, optimizer='adam')

### データの用意

In [19]:
import os

BASE_PATH = './dataset/T_Shirt_all/'
ids = sorted([x for x in os.listdir(BASE_PATH)])

In [20]:
ids[0]

'id_00000001'

In [21]:
len(ids)

6155

### tripletのパスの組を返す関数

In [22]:
import itertools
import random

# @jit
def get_triplets(ids,BASE_PATH):
    triplets=[]
    # idの選択の仕方をランダムにしたらいいのか？
    for id_ in tqdm(ids):
        files = sorted([BASE_PATH+id_+'/'+x for x in os.listdir(BASE_PATH+id_)])
#         print(files)
        con = sorted([x for x in files if 'comsumer' in x])
        shop = sorted([x for x in files if 'shop' in x ])
        combs = list(itertools.product(tuple(con),tuple(shop)))
        
        for comb in combs:
#             print(len(comb))
            comb = list(comb)
            neg_id = random.choice([x for x in ids if x != id_])
#             print(neg_id)
#             print(len([BASE_PATH+neg_id+'/'+x for x in os.listdir(BASE_PATH+neg_id) if 'shop' in x]))
            neg_file = random.choice([BASE_PATH+neg_id+'/'+x for x in os.listdir(BASE_PATH+neg_id) if 'shop' in x])
            comb.append(neg_file)
            triplets.append(comb)
    
    return triplets

### id単位でtrainとtestを分割する
- 元々np.random.choice()でやっていたが、ブートストラップサンプリングだったのでダメ
- train_test_splitを利用

In [23]:
from sklearn.cross_validation import train_test_split
# from sklearn.model_selection import ShuffleSplit



In [24]:
# random_stateを固定しておく
train_ids,test_ids=train_test_split(ids,test_size=0.33,random_state=0)
# idsは今後使わないので削除
del ids

In [25]:
train_ids[:5]

['id_00016780', 'id_00007427', 'id_00029554', 'id_00020254', 'id_00018517']

### Triplet作成用関数
- エポックごとにTripletの組み合わせをランダムに変更するためpickle保存ができない
- エポックごとに変えるのは普通？

In [26]:
from PIL import Image
# @jit
def get_np_triplets(triplet_PATHs):
    triplets = []
    # triplets = np.ndarray
    for triplet in tqdm(triplet_PATHs):

#         anc_img = Image.fromarray(np.uint8(triplet[0])).convert('RGB')
#         pos_img = Image.fromarray(np.uint8(triplet[1])).convert('RGB')
#         neg_img = Image.fromarray(np.uint8(triplet[2])).convert('RGB')

        anc_img = Image.open(triplet[0]).convert('RGB')
        pos_img = Image.open(triplet[1]).convert('RGB')
        neg_img = Image.open(triplet[2]).convert('RGB')

        anc_img = np.array(anc_img.resize((128,128)))/255. #resize to (128,128,3)
        pos_img = np.array(pos_img.resize((128,128)))/255.    
        neg_img = np.array(neg_img.resize((128,128)))/255.    

        tri = [anc_img,pos_img,neg_img]
        triplets.append(np.array(tri))

    triplets = np.array(triplets)
    return triplets

test_idsはretrival.ipynbで参照するのでpickleとして保存

In [27]:
import pickle
f = open('test_ids.txt', 'wb')
pickle.dump(test_ids, f)

# 学習

### 仕様について
- 各エポックでtestデータを用いてN-top accを出したい
    - 学習過程のデータとしては有意義だが時間がかかる
    - 最終的なN-topしか載せていない論文がほとんど
- epochの外でtrain,testに分割するパターンなので常にtestのidは同じ
- train_tripletのnegativeが毎回ランダムになるので偏らないメリットがある
- model.fitはepochs=1で行う

### やるべき
- epochごとにlossをファイル出力

In [28]:
epochs = 120

In [29]:
model_history = []
for epoch in range(epochs):
    print('epoch %s'% epoch)
    if epoch % 5 == 0:
        # PATHの組みを取得
        if epoch != 0: del triplets
        triplets_train_PATHs = get_triplets(train_ids,BASE_PATH)
        # np配列に変換
        triplets = get_np_triplets(triplets_train_PATHs)
        # 使い終わったので削除
        del triplets_train_PATHs
    # fit
    hist = tripletNet.fit([triplets[:,0],triplets[:,1],triplets[:,2]], epochs=1, batch_size=50) # using batch_size is better
    model_history.append(hist.history)
    # 使い終わったので削除
##    del triplets
#     if (epoch+1) % 5 == 0:
#         5epochごとにmodelを保存
#         shop_embNet.save('./model/T_Shirt/improved_tripletloss/a{}b{}/{}/shop_e{}.h5'.format(ALPHA,BETA,vec_length,epoch))
#         con_embNet.save('./model/T_Shirt/improved_tripletloss/a{}b{}/{}/con_e{}.h5'.format(ALPHA,BETA,vec_length,epoch))
#         shop_embNet.save('./model/T_Shirt/tripletloss/a{}/{}/shop_e{}.h5'.format(ALPHA,vec_length,epoch))
#         con_embNet.save('./model/T_Shirt/tripletloss/a{}/{}/con_e{}.h5'.format(ALPHA,vec_length,epoch))

# 学習のhistoryを保存
f = open('./model/T_Shirt/improved_tripletloss/a{}b{}/{}/history.txt'.format(ALPHA,BETA,vec_length),'wb')
pickle.dump(model_history, f)

  0%|          | 19/4123 [00:00<00:34, 117.63it/s]

epoch 0


100%|██████████| 4123/4123 [00:24<00:00, 169.93it/s]
100%|██████████| 38378/38378 [04:03<00:00, 157.77it/s]


Instructions for updating:
Use tf.cast instead.
Epoch 1/1
epoch 1
Epoch 1/1
epoch 2
Epoch 1/1
epoch 3
Epoch 1/1
epoch 4
Epoch 1/1
epoch 5


100%|██████████| 4123/4123 [00:30<00:00, 136.52it/s]
100%|██████████| 38378/38378 [04:17<00:00, 148.98it/s]


Epoch 1/1
epoch 6
Epoch 1/1
epoch 7
Epoch 1/1
epoch 8
Epoch 1/1
epoch 9
Epoch 1/1
epoch 10


100%|██████████| 4123/4123 [00:29<00:00, 139.07it/s]
100%|██████████| 38378/38378 [04:00<00:00, 159.51it/s]


Epoch 1/1
epoch 11
Epoch 1/1
epoch 12
Epoch 1/1
epoch 13
Epoch 1/1
epoch 14
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 15


100%|██████████| 4123/4123 [00:22<00:00, 184.66it/s]
100%|██████████| 38378/38378 [03:23<00:00, 188.18it/s]


Epoch 1/1
epoch 16
Epoch 1/1
epoch 17
Epoch 1/1
epoch 18
Epoch 1/1
epoch 19
Epoch 1/1
epoch 20


100%|██████████| 4123/4123 [00:19<00:00, 207.92it/s]
100%|██████████| 38378/38378 [03:24<00:00, 191.84it/s]


Epoch 1/1
epoch 21
Epoch 1/1
epoch 22
Epoch 1/1
epoch 23
Epoch 1/1
epoch 24
Epoch 1/1
epoch 25


100%|██████████| 4123/4123 [00:21<00:00, 187.54it/s]
100%|██████████| 38378/38378 [03:28<00:00, 183.81it/s]


Epoch 1/1
epoch 26
Epoch 1/1
epoch 27
Epoch 1/1
epoch 28
Epoch 1/1
epoch 29
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 30


100%|██████████| 4123/4123 [00:22<00:00, 184.71it/s]
100%|██████████| 38378/38378 [03:28<00:00, 184.02it/s]


Epoch 1/1
epoch 31
Epoch 1/1
epoch 32
Epoch 1/1
epoch 33
Epoch 1/1
epoch 34
Epoch 1/1
epoch 35


100%|██████████| 4123/4123 [00:21<00:00, 194.39it/s]
100%|██████████| 38378/38378 [03:25<00:00, 186.52it/s]


Epoch 1/1
epoch 36
Epoch 1/1
epoch 37
Epoch 1/1
epoch 38
Epoch 1/1
epoch 39
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 40


100%|██████████| 4123/4123 [00:23<00:00, 176.62it/s]
100%|██████████| 38378/38378 [03:29<00:00, 182.87it/s]


Epoch 1/1
epoch 41
Epoch 1/1
epoch 42
Epoch 1/1
epoch 43
Epoch 1/1
epoch 44
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 45


100%|██████████| 4123/4123 [00:22<00:00, 184.47it/s]
100%|██████████| 38378/38378 [03:12<00:00, 199.32it/s]


Epoch 1/1
epoch 46
Epoch 1/1
epoch 47
Epoch 1/1
epoch 48
Epoch 1/1
epoch 49
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 50


100%|██████████| 4123/4123 [00:22<00:00, 181.64it/s]
100%|██████████| 38378/38378 [03:22<00:00, 189.39it/s]


Epoch 1/1
epoch 51
Epoch 1/1
epoch 52
Epoch 1/1
epoch 53
Epoch 1/1
epoch 54
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 55


100%|██████████| 4123/4123 [00:19<00:00, 208.99it/s]
100%|██████████| 38378/38378 [03:06<00:00, 205.89it/s]


Epoch 1/1
epoch 56
Epoch 1/1
epoch 57
Epoch 1/1
epoch 58
Epoch 1/1
epoch 59
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 60


100%|██████████| 4123/4123 [00:20<00:00, 201.69it/s]
100%|██████████| 38378/38378 [03:36<00:00, 177.43it/s]


Epoch 1/1
epoch 61
Epoch 1/1
epoch 62
Epoch 1/1
epoch 63
Epoch 1/1
epoch 64
Epoch 1/1
epoch 65


100%|██████████| 4123/4123 [00:23<00:00, 175.88it/s]
100%|██████████| 38378/38378 [03:33<00:00, 170.57it/s]


Epoch 1/1
epoch 66
Epoch 1/1
epoch 67
Epoch 1/1
epoch 68
Epoch 1/1
epoch 69
Epoch 1/1
epoch 70


100%|██████████| 4123/4123 [00:22<00:00, 181.56it/s]
100%|██████████| 38378/38378 [03:40<00:00, 173.97it/s]


Epoch 1/1
epoch 71
Epoch 1/1
epoch 72
Epoch 1/1
epoch 73
Epoch 1/1
epoch 74
Epoch 1/1
epoch 75


100%|██████████| 4123/4123 [00:23<00:00, 177.38it/s]
100%|██████████| 38378/38378 [03:32<00:00, 180.29it/s]


Epoch 1/1
epoch 76
Epoch 1/1
epoch 77
Epoch 1/1
epoch 78
Epoch 1/1
epoch 79
Epoch 1/1
epoch 80

  0%|          | 0/4123 [00:00<?, ?it/s]




100%|██████████| 4123/4123 [00:20<00:00, 204.23it/s]
100%|██████████| 38378/38378 [03:40<00:00, 174.41it/s]


Epoch 1/1
epoch 81
Epoch 1/1
epoch 82
Epoch 1/1
epoch 83
Epoch 1/1
epoch 84
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 85


100%|██████████| 4123/4123 [00:22<00:00, 186.83it/s]
100%|██████████| 38378/38378 [04:01<00:00, 159.06it/s]


Epoch 1/1
epoch 86
Epoch 1/1
epoch 87
Epoch 1/1
epoch 88
Epoch 1/1
epoch 89
Epoch 1/1
epoch 90


100%|██████████| 4123/4123 [00:21<00:00, 194.94it/s]
100%|██████████| 38378/38378 [04:00<00:00, 159.80it/s]


Epoch 1/1
epoch 91
Epoch 1/1
epoch 92
Epoch 1/1
epoch 93
Epoch 1/1
epoch 94
Epoch 1/1
epoch 95


100%|██████████| 4123/4123 [00:22<00:00, 183.45it/s]
100%|██████████| 38378/38378 [03:56<00:00, 162.24it/s]


Epoch 1/1
epoch 96
Epoch 1/1
epoch 97
Epoch 1/1
epoch 98
Epoch 1/1
epoch 99
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 100


100%|██████████| 4123/4123 [00:19<00:00, 207.28it/s]
100%|██████████| 38378/38378 [03:58<00:00, 160.71it/s]


Epoch 1/1
epoch 101
Epoch 1/1
epoch 102
Epoch 1/1
epoch 103
Epoch 1/1
epoch 104
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 105


100%|██████████| 4123/4123 [00:21<00:00, 189.15it/s]
100%|██████████| 38378/38378 [03:58<00:00, 160.60it/s]


Epoch 1/1
epoch 106
Epoch 1/1
epoch 107
Epoch 1/1
epoch 108
Epoch 1/1
epoch 109
Epoch 1/1
epoch 110


100%|██████████| 4123/4123 [00:22<00:00, 186.51it/s]
100%|██████████| 38378/38378 [04:07<00:00, 154.87it/s]


Epoch 1/1
epoch 111
Epoch 1/1
epoch 112
Epoch 1/1
epoch 113
Epoch 1/1
epoch 114
Epoch 1/1


  0%|          | 0/4123 [00:00<?, ?it/s]

epoch 115


100%|██████████| 4123/4123 [00:22<00:00, 184.23it/s]
100%|██████████| 38378/38378 [03:56<00:00, 162.16it/s]


Epoch 1/1
epoch 116
Epoch 1/1
epoch 117
Epoch 1/1
epoch 118
Epoch 1/1
epoch 119
Epoch 1/1


In [30]:
len(model_history)

120

In [31]:
# 学習のhistoryを保存
f = open('./model/T_Shirt/improved_tripletloss/a{}b{}/{}/history.txt'.format(ALPHA,BETA,vec_length),'wb')
pickle.dump(model_history, f)

# updates
- ver 0.1
    - 学習できることを確認
    - 1epochごとにモデルを保存できるように変更
- ver 0.2
    - embNetを並列化
- ver 0.3(2019/6/30)
    - ~~Triplet lossにshop画像間の距離を考慮するため内積を加える~~
- 2019/10/11
    - Improved Triplet lossを導入
- 2019/11/17
    - Improved Triplet lossを修正\
- 2019/11/22
    - vec_length=512

In [None]:
aaa = get_triplets(train_ids[:5],BASE_PATH)
print(aaa)
bbb = get_np_triplets(aaa)

for b in bbb:
#     print(b)
    fig = plt.figure(figsize=(8,4))

    plt.subplot(1,3,1)
    plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    plt.tick_params(color='white')
    plt.imshow(b[0])
    plt.title('anchor')
    
    plt.subplot(1,3,2)
    plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    plt.tick_params(color='white')    

    plt.imshow(b[1])
    plt.title('anchor')
    plt.subplot(1,3,3)
    plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    plt.tick_params(color='white')
    plt.imshow(b[2])
    plt.title('anchor')    
    