In [24]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [25]:
mnist_train = pd.read_csv('Data/digit-recognizer/train.csv').astype(np.float32)
mnist_test = pd.read_csv('Data/digit-recognizer/test.csv').astype(np.float32)

In [26]:
y_train = mnist_train[['label']][:30000]
x_train = mnist_train[['pixel' + str(idx) for idx in range(784)]][:30000]

y_dev = mnist_train[['label']][30000:42000]
x_dev = mnist_train[['pixel' + str(idx) for idx in range(784)]][30000:42000]

x_test = mnist_test[['pixel' + str(idx) for idx in range(784)]]




In [4]:
x_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: float32(784)
memory usage: 83.7 MB


In [28]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_dev = scaler.transform(x_dev)
x_test = scaler.transform(x_test)


print(x_train.shape, y_train.shape, x_dev.shape, y_dev.shape, x_test.shape)

(30000, 784) (30000, 1) (12000, 784) (12000, 1) (28000, 784)


In [29]:
EPSILON = 1e-3

In [39]:
tf.compat.v1.disable_eager_execution()

def DNN_BN(x, weights, beta, scale, activation_function = None):
    wx = tf.matmul(x, weights)
    mean, var = tf.nn.moments(x=wx, axes=[0])
    bn = tf.nn.batch_normalization(wx, mean, var, beta, scale, EPSILON)
    if not activation_function:
        return bn
    else:
        return activation_function(bn)
    
def DNN(x, weights, biases, activation_function = None):
    wx = tf.matmul(x, weights)
    score = wx + biases
    if not activation_function:
        return score
    else:
        return activation_function(score)
    
def weight_variable(shape):
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
def scale_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
      return tf.nn.conv2d(input=x, filters=W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
      return tf.nn.max_pool2d(input=x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
def get_center_loss(features, labels):
    with tf.compat.v1.variable_scope('center', reuse=True):
        centers = tf.compat.v1.get_variable('centers')
    
    len_features = features.get_shape()[1]
    labels = tf.reshape(labels, [-1])

    centers_batch = tf.gather(centers, labels)

    loss = tf.reduce_sum(input_tensor=(features - centers_batch) ** 2, axis=[1])
 
    return loss

def update_centers(features, labels, alpha):
    with tf.compat.v1.variable_scope('center', reuse=True):
        centers = tf.compat.v1.get_variable('centers')
    
    labels = tf.reshape(labels, [-1])
    centers_batch = tf.gather(centers, labels)
    
    diff = centers_batch - features

    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    centers = tf.compat.v1.scatter_sub(centers,labels, diff)
    
    return centers


In [40]:

xs = tf.compat.v1.placeholder(tf.float32, [None, 784])
ys = tf.compat.v1.placeholder(tf.int64, [None, 1])

ys_one_hot = tf.one_hot(ys, 10)
keep_prob = tf.compat.v1.placeholder(tf.float32)

with tf.compat.v1.variable_scope('center', reuse=tf.compat.v1.AUTO_REUSE):
    centers = tf.compat.v1.get_variable('centers', [10, 1024], dtype=tf.float32,\
                                        initializer=tf.compat.v1.constant_initializer(0), trainable=False)
#------CNN1-------#
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

x_image = tf.reshape(xs, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#-------CNN2-------#
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#-------DNN------#
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
s_fc1 = scale_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

#h_fc1 = DNN(h_pool2_flat, W_fc1, b_fc1, tf.nn.relu)
h_fc1 = DNN_BN(h_pool2_flat, W_fc1, b_fc1, 1, activation_function = None)

center_loss = get_center_loss(h_fc1, ys)

update_centers = update_centers(h_fc1, ys, 0.5)

h_fc1_drop = tf.nn.dropout(h_fc1, 1 - (keep_prob))

#-------DNN2-----#
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

#----------------#
softmax_loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(ys_one_hot), logits=y_conv)

loss = tf.reduce_mean(input_tensor=softmax_loss + 0.1 * center_loss) 

train_op = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(loss)

result = tf.argmax(input=y_conv,axis=1)

ground_truth = tf.reshape(ys, [-1])

correct_prediction = tf.equal(result, ground_truth)

accuracy = tf.reduce_mean(input_tensor=tf.cast(correct_prediction, tf.float32))

init = tf.compat.v1.global_variables_initializer()

with tf.compat.v1.Session() as sess:
    sess.run(init)

    for i in range(3):
        print('LOSS, Softmax_loss, Center_loss', sess.run([loss, softmax_loss, tf.reduce_mean(center_loss)], feed_dict = {xs: x_train, ys: y_train.values, keep_prob:1.0}))
        print('ACC@TRAIN:', sess.run(accuracy, feed_dict = {xs: x_train, ys: y_train.values, keep_prob:1.0}))
        print('ACC@DEV:', sess.run(accuracy, feed_dict = {xs: x_dev, ys: y_dev.values, keep_prob:1.0}))
        j = 0  
        while j < 30000:       
            _, cen = sess.run([train_op, update_centers], feed_dict = {xs: x_train[j:j+1000], ys: y_train[j:j+1000].values, keep_prob:1.0})
            j += 1000  
            
    pd.DataFrame({"ImageId": range(1, len(x_test) + 1), "Label": sess.run(result, feed_dict = {xs: x_test, keep_prob:1.0})}).to_csv('Data/digit-recognizer/CNN.csv', index=False)


LOSS, Softmax_loss, Center_loss [107.79014, array([2.5941334 , 0.5708584 , 3.5006802 , ..., 4.659817  , 0.43560472,
       1.9124224 ], dtype=float32), 1034.1765]
ACC@TRAIN: 0.104
ACC@DEV: 0.09133333
LOSS, Softmax_loss, Center_loss [76.74749, array([0.17514414, 0.00534059, 0.12958196, ..., 1.6332753 , 1.24791   ,
       0.36571258], dtype=float32), 760.2356]
ACC@TRAIN: 0.78833336
ACC@DEV: 0.7765833
LOSS, Softmax_loss, Center_loss [60.708534, array([0.11759663, 0.00240727, 0.08563621, ..., 0.7809577 , 1.5245128 ,
       0.09837157], dtype=float32), 601.8905]
ACC@TRAIN: 0.8494
ACC@DEV: 0.84575


In [23]:
tf.compat.v1.disable_eager_execution()

indices = tf.constant([[4], [3], [1], [7]])
updates = tf.constant([9, 10, 11, 12])
tensor = tf.ones([8], dtype=tf.int32)
tensor = tf.compat.v1.scatter_sub(tensor,indices, updates)
with tf.compat.v1.Session() as sess:
    sess.run(init)
    sess.run(tensor)

AttributeError: 'Tensor' object has no attribute '_lazy_read'