## SphereFace Network
[SphereFace: Deep Hypersphere Embedding for Face Recognition](http://wyliu.com/papers/LiuCVPR17v3.pdf)

refered to [tensorflow sphereface](https://github.com/hujun100/tensorflow-sphereface)

### Structure: four convolution units
- Conv1.x
- Conv2.x
- Conv3.x
- Conv4.x

### Split the network into different parts
- Conv with strides
- Conv with residual units

### Caffe implementation
Using the [Netscope](http://ethereon.github.io/netscope/#/editor) to show the caffe network defined in prototxt

**Note**: Each Conv followed by a prelu layer

In [1]:
import tensorflow as tf
from tensorflow.python.framework import ops
import numpy as np
tf.reset_default_graph()

### 1.prelu

In [2]:
def prelu(x, name = 'prelu'):
    with tf.variable_scope(name):
        alphas = tf.get_variable('alpha', x.get_shape()[-1], initializer=tf.constant_initializer(0.25), regularizer = l2_regularizer, dtype = tf.float32)
    pos = tf.nn.relu(x)
    neg = tf.multiply(alphas,(x - abs(x)) * 0.5)
    return pos + neg

### 2.conv with strides

In [3]:
def first_conv(input, num_output, name):
  with tf.variable_scope(name):
    zero_init = tf.zeros_initializer()
    network = tf.layers.conv2d(input, num_output, kernel_size = [3, 3], strides = (2, 2), padding = 'same', kernel_initializer = xavier, bias_initializer = zero_init, kernel_regularizer = l2_regularizer, bias_regularizer = l2_regularizer)
    network = prelu(network, name = name)
    return network

### 3.conv with residual units

In [4]:
def block(input, name, num_output):
    with tf.variable_scope(name):
        network = tf.layers.conv2d(input, num_output, kernel_size = [3, 3], strides = [1, 1], padding = 'same', kernel_initializer = tf.random_normal_initializer(stddev=0.01), use_bias = False , kernel_regularizer = l2_regularizer)
        network = prelu(network, name = 'name'+ '1')
        network = tf.layers.conv2d(network, num_output, kernel_size = [3, 3], strides = [1, 1], padding = 'same', kernel_initializer = tf.random_normal_initializer(stddev=0.01), use_bias = False, kernel_regularizer = l2_regularizer)
        network = prelu(network, name = 'name'+ '2')
        network = tf.add(input, network)
        return network

### 4.infer

In [5]:
l2_regularizer= tf.contrib.layers.l2_regularizer(1.0)
xavier = tf.contrib.layers.xavier_initializer_conv2d() 
def get_shape(tensor):
    static_shape = tensor.shape.as_list()
    dynamic_shape = tf.unstack(tf.shape(tensor))
    dims = [s[1] if s[0] is None else s[0] for s in zip(static_shape,dynamic_shape)]
    return dims
def infer(input,embedding_size=512):
    with tf.variable_scope('conv1_'):
        network = first_conv(input, 64, name = 'conv1')
        network = block(network, 'conv1_23', 64)
    with tf.variable_scope('conv2_'):
        network = first_conv(network, 128, name = 'conv2')
        network = block(network, 'conv2_23', 128)
        network = block(network, 'conv2_45', 128)
    with tf.variable_scope('conv3_'):
        network = first_conv(network, 256, name = 'conv3')
        network = block(network, 'conv3_23', 256)
        network = block(network, 'conv3_45', 256)
        network = block(network, 'conv3_67', 256)
        network = block(network, 'conv3_89', 256)
    with tf.variable_scope('conv4_'):
        network = first_conv(network, 512, name = 'conv4')
        network = block(network, 'conv4_23', 512)
    with tf.variable_scope('feature'):
        #BATCH_SIZE = network.get_shape()[0]
        dims = get_shape(network)
        print(dims)
        #BATCH_SIZE = tf.shape(network)[0]
        #feature = tf.layers.dense(tf.reshape(network,[BATCH_SIZE, -1]), 512, kernel_regularizer = l2_regularizer, kernel_initializer = xavier)
        feature = tf.layers.dense(tf.reshape(network,[dims[0], np.prod(dims[1:])]), embedding_size, kernel_regularizer = l2_regularizer, kernel_initializer = xavier)
    return feature

In [17]:
tf.reset_default_graph()
image = tf.random_normal([1,112,96,3])
#image = tf.constant(np.random.normal(size=[1,112,96,3]),dtype=tf.float32)
feature = infer(image)
print feature.get_shape()
tf.summary.FileWriter('sphereface_network',tf.get_default_graph())

[1, 7, 6, 512]
(1, 512)


<tensorflow.python.summary.writer.writer.FileWriter at 0x7ff213e7b790>

In [18]:
pred = tf.layers.dense(feature,1)
print pred.get_shape()
loss = tf.nn.l2_loss(pred-1)
optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(loss)

(1, 1)


In [19]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in xrange(500):
        loss_np,_ = sess.run([loss,optimizer])
        if i % 20 ==0:
            print loss_np

0.384229
0.277823
0.0985079
0.126074
0.0673081
0.0297425
0.00743761
0.0312544
0.00901861
0.00035515
0.00304774
0.000781628
0.000343543
3.64557e-06
0.00109801
0.00045575
8.61884e-05
1.42084e-05
1.65151e-05
0.00298974
0.000394176
0.000196685
0.00237238
0.00044498
0.000510066
