# デュアルネットワーク学習用のnotebook
https://stats.stackexchange.com/questions/248511/purpose-of-l2-normalization-for-triplet-network

## ライブラリのインポート

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
from numba import jit
from tqdm import tqdm
import os
import pickle
%matplotlib inline

場合によってはGPUの指定が必要かもしれない

In [2]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES']='0'

keras関連

In [3]:
import keras
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input,Layer,Lambda
from keras.layers import Flatten,BatchNormalization
from keras.layers import Dense,Dropout
from keras.layers import concatenate
from keras.layers.convolutional import Conv2D
from keras.layers.pooling import MaxPooling2D
from keras import backend as K

In [5]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.keras.backend.set_session(tf.Session(config=config))

## ハイパーパラメータ設定
- 保存先のディレクトリやファイル名に影響するため慎重に。
- Improved Triplet Lossにおいては、`alpha > beta`を満たす必要がある(元論文)

### 入力画像の情報

In [6]:
imheight = 128
imwidth = 128
channels = 3
category = 'T_Shirt'

### 出力次元
- Dualなネットワークではconcatするため出力次元はdense_num*2

In [7]:
deep_dense_num = 350
shallow_dense_num = 150
vec_length= 500

### Triplet Loss設定
- `ALPHA=D(a,p)とD(a,n)の相対距離を抑制`, `BETA=D(a,p)の絶対距離を制御`

In [8]:
ALPHA=0.1
BETA=0.05

## VGG16の読み込み

In [9]:
from keras.applications.vgg16 import VGG16
#include_top=false => Dense不要
base_model = VGG16(include_top=False, weights='imagenet', input_tensor=Input(shape=(imwidth, imheight, channels)), input_shape=None) 

Instructions for updating:
Colocations handled automatically by placer.


学習しないように重みを固定

In [10]:
for layer in base_model.layers[:15]:
    layer.trainable=False

## ネットワーク構造を定義

- 浅いネットワーク(shallow_model)を作成

元々(32,(4,4))だったが，奇数フィルタの方がいいらしい

In [11]:
inputs = Input(shape=(imwidth, imheight, channels))
conv1 = Conv2D(32, (4,4) , padding='same', activation='relu')(inputs)
pool1 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(conv1)
conv2 = Conv2D(32, (4,4) , padding='same', activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(conv2)
flatten = Flatten()(pool2) 
dense_layer = Dense(shallow_dense_num, activation='relu')(flatten)
norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer_shallow')(dense_layer)
shallow_model=Model(inputs=inputs,outputs=norm_layer) 

- shallow_modelと、VGGを通したdeepなモデルと結合しモデル全体を作成する関数

In [12]:
def create_embNet():
    shallow_inputs = Input(shape=(imwidth, imheight, channels))
    x = base_model.output
    conv1 = Conv2D(filters=32, kernel_size=(3,3) , padding='same', activation='relu')(x)
    conv2 = Conv2D(filters=32, kernel_size=(3,3) , padding='same', activation='relu')(conv1)
    flatten = Flatten()(conv2) 
    dense_layer = Dense(deep_dense_num, activation='relu')(flatten)
    norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer_deep')(dense_layer)
    # inputに対してshallow_modelのoutputも用意
    x1 = norm_layer
    x2 = shallow_model(shallow_inputs)
    concat_out = concatenate([norm_layer,x2])
    dense_linear = Dense(units=vec_length, activation='linear')(concat_out)
#     l2norm_out = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer_whole')(concat_out)
#     main_out =  Dense(units=vec_length, activation='relu')(l2norm_out)
    return Model(inputs=[base_model.input,shallow_inputs],outputs=dense_linear)

- inputを定義する
- create embNet()の中でInputを定義すると明示的に3つの入力が分けられない

In [13]:
# define three Inputs
a_in = Input(shape = (imheight, imwidth, channels), name='anchor_input')
p_in = Input(shape = (imheight, imwidth, channels), name='positive_input')
n_in = Input(shape = (imheight, imwidth, channels), name='negative_input')

### これは不明

In [14]:
sa_in = Input(shape = (imheight, imwidth, channels), name='sanchor_input')

- **後に埋め込み用のモデルとして利用するため**ベクトル化までの部分を別で定義しておく

In [15]:
con_embNet = create_embNet()
shop_embNet = create_embNet()

In [16]:
# shallow_model.summary()

- 埋め込み用のベクトルもあらかじめ用意する

In [17]:
# con_embNet.summary()

In [18]:
a_emb = shop_embNet([a_in,a_in])
p_emb = con_embNet([p_in,p_in])
n_emb = con_embNet([n_in,n_in])

## Triplet Loss
- 通常のTriplet Lossを用いる場合はこちらを使う。
- `Loss=max[D(a,p)-D(a-n)+margin,0] where D(A,B)=||A-B||_2^2`

In [19]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

### Lossレイヤの定義とモデルのコンパイル

In [20]:
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
triplet_loss_layer = TripletLossLayer(alpha=ALPHA, name='triplet_loss_layer')([a_emb, p_emb, n_emb])

# Model that can be trained with anchor, positive negative images
tripletNet = Model([a_in, p_in, n_in], triplet_loss_layer)
tripletNet.compile(loss=None, optimizer='adam')

In [21]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

# SVG(model_to_dot(con_embNet).create(prog='dot', format='svg'))

## Improved Triplet Lossの場合

### Improved Triplet Loss
- `Loss=[D(a,p)-D(a,n)+ALPHA]+[D(a,p)-BETA]`
- Positiveを短くする方向に制御する

https://qiita.com/tancoro/items/35d0925de74f21bfff14#improved-triplet-loss

<img src="./readme_imgs/improved.PNG" width=30% align=left><br>

- Improved Triplet Loss用にレイヤを改変

In [22]:
class TripletLossLayer(Layer):
    def __init__(self, alpha, beta, **kwargs):
        self.alpha = alpha
        self.beta = beta
        super(TripletLossLayer, self).__init__(**kwargs)

    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sum(K.square(a-p), axis=-1)
        n_dist = K.sum(K.square(a-n), axis=-1)
        pn_dist = K.sum(K.square(p-n), axis=-1)
        return K.sum(K.maximum((p_dist - n_dist + self.alpha), 0) + K.maximum((p_dist - self.beta), 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss
    
    def get_config(self):
        config = {'alpha': self.alpha}
        base_config = super(TripletLossLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

### モデルの定義とコンパイル
- ハイパーパラメータとしてBETAが増えていることに注意

In [23]:
# Layer that computes the triplet loss from anchor, positive and negative embedding vectors
triplet_loss_layer = TripletLossLayer(alpha=ALPHA, beta=BETA, name='triplet_loss_layer')([a_emb, p_emb, n_emb])

# Model that can be trained with anchor, positive negative images
tripletNet = Model([a_in, p_in, n_in], triplet_loss_layer)
tripletNet.compile(loss=None, optimizer='adam')

## データの用意

- `T_Shirt_all/`(クロップ済み画像が商品id別に保存されたディレクトリ)

In [24]:
BASE_PATH = './dataset/T_Shirt_all/'
category = 'T_Shirt'
ids = sorted([x for x in os.listdir(BASE_PATH)])

### TripletのPATHを返す関数
- 入力:`ids=商品id群`, `BASE_PATH=商品群ディレクトリへのPATH`
- `[consumer_ancのパス, shop_posのパス, shop_negのパス]`のような組を返す

In [25]:
import itertools
import random

def get_triplets(ids,BASE_PATH):
    triplets=[]
    for id_ in tqdm(ids):
        files = sorted([BASE_PATH+id_+'/'+x for x in os.listdir(BASE_PATH+id_)])
        con = sorted([x for x in files if 'comsumer' in x])
        shop = sorted([x for x in files if 'shop' in x ])
        combs = list(itertools.product(tuple(con),tuple(shop)))
        for comb in combs:
            comb = list(comb)
            neg_id = random.choice([x for x in ids if x != id_])
            neg_file = random.choice([BASE_PATH+neg_id+'/'+x for x in os.listdir(BASE_PATH+neg_id) if 'shop' in x])
            comb.append(neg_file)
            triplets.append(comb)
    return triplets

### 商品idの単位でTrain/Testを分割する
- Seed値(random_state)は固定
- random.choice()はブートストラップサンプリングのため、`train_test_split()`を利用
- idsオブジェクトは以降使わないためここで削除

In [26]:
from sklearn.cross_validation import train_test_split
train_ids,test_ids=train_test_split(ids,test_size=0.33,random_state=0)
del ids



In [27]:
len(train_ids)

4123

In [28]:
length = 0
for id_ in train_ids:
    length += len(os.listdir(BASE_PATH+id_))
length

26291

In [29]:
# train_ids

- テスト時に参照するためTestデータの情報をpickleで保存

In [30]:
f = open('./pickle/{}/test_ids.pickle'.format(category), 'wb')
pickle.dump(test_ids, f)

### Triplet作成用関数
- エポックごとにTripletの組み合わせをランダムに変更するためpickle保存ができない
- エポックごとに変えるのは普通なのか怪しい
- **Tripletを200個くらい予め作成しpickle保存しておけば今後回すのが楽になるのでは。**

In [31]:
from PIL import Image
def get_np_triplets(triplet_PATHs):
    triplets = []
    for triplet in tqdm(triplet_PATHs):
        anc_img = Image.open(triplet[0]).convert('RGB')
        pos_img = Image.open(triplet[1]).convert('RGB')
        neg_img = Image.open(triplet[2]).convert('RGB')

        anc_img = np.array(anc_img.resize((128,128)))/255. #resize to (128,128,3)
        pos_img = np.array(pos_img.resize((128,128)))/255.    
        neg_img = np.array(neg_img.resize((128,128)))/255.    

        tri = [anc_img,pos_img,neg_img]
        triplets.append(np.array(tri))

    triplets = np.array(triplets)
    return triplets

## 学習する

- 各エポックでtestデータを用いて`N-top acc`を出したい
- epochの外でtrain,testに分割するパターンなので常にtestのidは同じ
- `train_triplet`のnegativeが毎回ランダムになるので偏らないメリットがある->**pickle保存済みのtripletを用いれば学習を効率化できる**
- `model.fit()`は`epochs=1`で行う
- `model_history=[]`に各エポックにおけるメトリクス(loss/accuracy等)をappendすることで後から推移を確認できる。

In [32]:
model_dir = './model/{}/Dual_improved/a{}b{}'.format(category,ALPHA,BETA)
model_dir

'./model/T_Shirt/Dual_improved/a0.1b0.05'

In [33]:
vec_length

500

In [34]:
epochs = 100

In [35]:
model_history = []
for epoch in range(epochs):
    print('epoch %s'% epoch)
    if epoch % 5 == 0:
        if epoch != 0: del triplets
        triplets_train_PATHs = get_triplets(train_ids,BASE_PATH)
        triplets = get_np_triplets(triplets_train_PATHs)
        del triplets_train_PATHs
    # fit
    hist = tripletNet.fit([triplets[:,0],triplets[:,1],triplets[:,2]], epochs=1, batch_size=50)
    model_history.append(hist.history)
    f = open(model_dir+'/{}/history{}.txt'.format(vec_length,epoch),'wb')
    pickle.dump(model_history, f)
    # 使い終わったので削除
##    del triplets
    if (epoch+1) % 5 == 0:
        shop_embNet.save(model_dir+'/{}/shop_e{}.h5'.format(vec_length,epoch))
        con_embNet.save(model_dir+'/{}/con_e{}.h5'.format(vec_length,epoch))
        
# 学習のhistoryを保存
f = open(model_dir+'/{}/history.txt'.format(vec_length),'wb')
pickle.dump(model_history, f)

  0%|          | 19/4123 [00:00<00:22, 185.17it/s]

epoch 0


100%|██████████| 4123/4123 [00:31<00:00, 129.54it/s]
 14%|█▎        | 5187/38378 [00:43<03:28, 159.53it/s]

TypeError: unsupported operand type(s) for /: 'Image' and 'float'

 14%|█▎        | 5187/38378 [01:00<03:28, 159.53it/s]