In [1]:
import os
import sys
import time
import sklearn
import PIL.Image

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from pprint import pprint

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
mpl.rcParams['figure.figsize'] = (12, 12)
mpl.rcParams['axes.grid'] = False

print(sys.version_info)
for module in tf, mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
tensorflow 2.4.0
matplotlib 3.3.3
numpy 1.19.5
pandas 1.1.5
sklearn 0.24.0
tensorflow 2.4.0
tensorflow.keras 2.4.0


In [7]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

### Read image datasets

In [9]:
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 
    'dog', 'frog', 'horse', 'ship', 'truck',
]

train_label_file = './datasets/cifar-10/trainLabels.csv'
test_csv_file = './datasets/cifar-10/sampleSubmission.csv'
train_folder = './datasets/cifar-10/train/'
test_folder = './datasets/cifar-10/test/'

def parse_csv_file(filepath, folder):
    """Parses csv files into (filename(path), label) format"""
    results = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[1:]
    for line in lines:
        image_id, label_str = line.strip('\n').split(',')
        image_full_path = os.path.join(folder, image_id + '.png')
        results.append((image_full_path, label_str))
    return results

train_labels_info = parse_csv_file(train_label_file, train_folder)
test_csv_info = parse_csv_file(test_csv_file, test_folder)

pprint(train_labels_info[0:5])
pprint(test_csv_info[0:5])
print("Number of training examples: {}".format(len(train_labels_info)))
print("Number of test examples: {}".format(len(test_csv_info)))

[('./datasets/cifar-10/train/1.png', 'frog'),
 ('./datasets/cifar-10/train/2.png', 'truck'),
 ('./datasets/cifar-10/train/3.png', 'truck'),
 ('./datasets/cifar-10/train/4.png', 'deer'),
 ('./datasets/cifar-10/train/5.png', 'automobile')]
[('./datasets/cifar-10/test/1.png', 'cat'),
 ('./datasets/cifar-10/test/2.png', 'cat'),
 ('./datasets/cifar-10/test/3.png', 'cat'),
 ('./datasets/cifar-10/test/4.png', 'cat'),
 ('./datasets/cifar-10/test/5.png', 'cat')]
Number of training examples: 50000
Number of test examples: 300000


In [13]:
train_df = pd.DataFrame(train_labels_info[0:45000])
valid_df = pd.DataFrame(train_labels_info[45000:])
test_df = pd.DataFrame(test_csv_info)

train_df.columns = ['filepath', 'class']
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']

print(train_df.head())
print(valid_df.head())
print(test_df.head())

                          filepath       class
0  ./datasets/cifar-10/train/1.png        frog
1  ./datasets/cifar-10/train/2.png       truck
2  ./datasets/cifar-10/train/3.png       truck
3  ./datasets/cifar-10/train/4.png        deer
4  ./datasets/cifar-10/train/5.png  automobile
                              filepath       class
0  ./datasets/cifar-10/train/45001.png       horse
1  ./datasets/cifar-10/train/45002.png  automobile
2  ./datasets/cifar-10/train/45003.png        deer
3  ./datasets/cifar-10/train/45004.png  automobile
4  ./datasets/cifar-10/train/45005.png    airplane
                         filepath class
0  ./datasets/cifar-10/test/1.png   cat
1  ./datasets/cifar-10/test/2.png   cat
2  ./datasets/cifar-10/test/3.png   cat
3  ./datasets/cifar-10/test/4.png   cat
4  ./datasets/cifar-10/test/5.png   cat


### Construct ImageDataGenerator from dataframe

In [26]:
# Meta configuration
height = 227
width = 227
channels = 3
batch_size = 32
num_classes = 10

# Flow from dataframe, not directory
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'wrap')
# Takes the dataframe and the path to a directory and generates batches
train_generator = train_datagen.flow_from_dataframe(
    train_df,            # Pandas dataframe containing filepaths relative to directory
    directory = './',    # path to the directory to read images from
    x_col = 'filepath',  # column in dataframe containing the filenames
    y_col = 'class',     # column/s in dataframe that has the target data (labels)
    classes = class_names,
    target_size = (height, width),  # the dimensions to which all images found will be resized
    batch_size = batch_size,        # size of the batches of data (default: 32)
    seed = 7,            # random seed for shuffling and transformations
    shuffle = True,
    class_mode = 'sparse')

valid_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)
valid_generator = valid_datagen.flow_from_dataframe(
    valid_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = "sparse")

train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num, valid_num)

Found 45000 validated image filenames belonging to 10 classes.
Found 5000 validated image filenames belonging to 10 classes.
45000 5000


In [27]:
for i in range(1):
    x, y = train_generator.next()
    print(x.shape, y.shape)
    print(y)

(32, 227, 227, 3) (32,)
[2. 1. 4. 4. 4. 4. 6. 5. 2. 8. 4. 6. 6. 3. 7. 1. 7. 2. 8. 8. 3. 0. 5. 3.
 9. 1. 4. 5. 6. 7. 9. 2.]


### Build models

In [28]:
# Using AlexNet (slightly modified) structure
model = keras.models.Sequential([
    keras.layers.Conv2D(filters=96, kernel_size=11, strides=(4, 4), padding="valid",
                        activation="selu", input_shape=[width, height, channels]),
    keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
    keras.layers.Conv2D(filters=256, kernel_size=5, strides=(1, 1), padding="same",
                        activation="selu"),
    keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
    keras.layers.Conv2D(filters=384, kernel_size=3, strides=(1, 1), padding="same",
                        activation="selu"),
    keras.layers.Conv2D(filters=384, kernel_size=3, strides=(1, 1), padding="same",
                        activation="selu"),
    keras.layers.Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding="same",
                        activation="selu"),
    keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(4096, activation="selu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4096, activation="selu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1000, activation="softmax"),
])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 55, 55, 96)        34944     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 27, 27, 96)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 27, 27, 256)       614656    
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 13, 13, 256)       0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 13, 13, 384)       885120    
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 13, 13, 384)       1327488   
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 13, 13, 256)      

### Triplet loss

In [30]:
from DeepHash.distance.tfversion import distance

In [31]:
def triplet_loss(self, anchor, pos, neg, margin):
    with tf.variable_scope('triplet_loss'):
        pos_dist = distance(anchor, pos, pair=False, dist_type="euclidean2")
        neg_dist = distance(anchor, neg, pair=False, dist_type="euclidean2")
        basic_loss = tf.maximum(pos_dist - neg_dist + margin, 0.0)
        loss = tf.reduce_mean(basic_loss, 0)
        
        tf.summary.histogram('pos_dist', pos_dist)
        tf.summary.histogram('neg_dist', neg_dist)
        tf.summary.histogram('pos_dist - neg_dist', pos_dist - neg_dist)
        
    return loss

In [32]:
def quantization_loss(self, z, h):
    with tf.name_scope('quantization_loss'):
        q_loss = tf.reduce_mean(tf.reduce_sum(tf.square(z - tf.matmul(h, self.C)), -1))
    return q_loss

In [None]:
def apply_quantization_loss(self, global_step):
    anchor, pos, neg = tf.split(self.img_last_layer, 3, axis=0)
    triplet_loss = triplet_loss(anchor, pos, neg, self.triplet_margin)
    cq_loss = quantization_loss(self.img_last_layer, self.b_img)
    self.loss = triplet_loss + cq_loss * self.cq_lambda
    
    lr = tf.train.exponential_decay(self.learning_rate,
                                    global_step,
                                    self.decay_step,
                                    self.decay_factor,
                                    staircase=True)
    opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=0.9)
    grads_and_vars = opt.compute_gradients(self.loss, self.train_layers+self.train_last_layer)
    fcgrad, _ = grads_and_vars[-2]
    fbgrad, _ = grads_and_vars[-1]
    
    tf.summary.scalar('loss', self.loss)
    tf.summary.scalar('triplet_loss', triplet_loss)
    tf.summary.scalar('cq_loss', cq_loss)
    tf.summary.scalar('lr', self.lr)
    self.merged = tf.summary.merge_all()