# Classifier

In [None]:
#%matplotlib notebook
import os
import matplotlib.pyplot as plt
import numpy as np
from JSAnimation import IPython_display
from matplotlib import animation
import matplotlib.patches as mpatches
import cv2
import scipy.misc
import tensorflow as tf
import tensorflow.contrib.layers as c_layers
import seaborn as sns
import pandas as pd
import glob
from scipy import misc
from keras.preprocessing.image import ImageDataGenerator

### Data Loading

In [None]:
all_classes = ['[0, 0, 0, 0]','[0, 0, 0, 1]','[0, 0, 1, 0]','[0, 1, 0, 0]','[0, 1, 1, 0]','[1, 0, 0, 0]',
               '[1, 0, 1, 0]','[2, 0, 0, 0]','[2, 0, 0, 1]','[2, 0, 1, 0]','[2, 1, 0, 0]',#'[2, 1, 1, 0]','[2,0,1,1]'
               '[3, 0, 0, 0]','[3, 0, 0, 1]','[3, 0, 1, 0]','[3, 1, 0, 0]','[4, 0, 0, 0]','[4, 0, 0, 1]',
               '[4, 0, 1, 0]']
datagen = ImageDataGenerator(validation_split=0.1,rescale=1./255)
train_it = datagen.flow_from_directory('./Results/TowerTraining/Classifier/Sorted/', class_mode='sparse',
                                       batch_size=256,shuffle=True,subset="training",target_size=(168,168),classes=all_classes)
val_it = datagen.flow_from_directory('./Results/TowerTraining/Classifier/Sorted/', class_mode='sparse',
                                       batch_size=256,shuffle=True,subset="validation",target_size=(168,168),classes=all_classes)

realLabel = []
for c,v in train_it.class_indices.items():
    c_ext = np.fromstring(c[1:-1], dtype=int, sep=', ')
    realLabel.append(c_ext)

def getRealLabel(labelBatch,RL):
    newLB = []
    for label in labelBatch:
        l = RL[int(label)]
        newLB.append(l)
    return newLB

In [None]:
num_exp = [[0,0,0,0,0],[0,0],[0,0],[0,0]]
classes = [[[0,1,2,3,4],[5,6],[7,8,9,10],[11,12,13,14],[15,16,17]],[[0,1,2,5,6,7,8,9,11,12,13,15,16,17],[3,4,10,14]],
           [[0,1,3,5,7,8,10,11,12,14,15,16],[2,4,6,9,13,17]],[[0,2,3,4,5,6,7,9,10,11,13,14,15,17],[1,8,12,16]]]
for branch in range(len(num_exp)):
    for c in range(len(num_exp[branch])):
        sum_exp = 0
        for s in classes[branch][c]:
            sum_exp = sum_exp + train_it.classes[train_it.classes==s].shape[0]
        num_exp[branch][c] = sum_exp
print('Number of samples: '+str(num_exp))
if np.sum(num_exp[0])== np.sum(num_exp[1]) == np.sum(num_exp[2]) == np.sum(num_exp[3]):
    print('all good')

In [None]:
class_weights = [[0,0,0,0,0],[0,0],[0,0],[0,0]]
print('Class Weights: ')
for branch in range(len(class_weights)):
    bincount = np.array(num_exp[branch])
    weights = np.sum(bincount) / (bincount.shape[0] * bincount)
    class_weights[branch] = weights
    print(np.round(weights,2))


In [None]:
train_it.class_indices.items()

In [None]:
'''@misc{guest2017gini,
  author = "Olivia Guest",
  title = "Using the Gini Coefficient to Evaluate Deep Neural Network Layer Representations",
  year = "2017",
  howpublished = "Blog post",
  url = "http://neuroplausible.com/gini"
}'''
def gini(array):
    """Calculate the Gini coefficient of a numpy array."""
    # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
    # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
    array = array.flatten() #all values are treated equally, arrays must be 1d
    #array = np.abs(array)# change from original code
    #if np.amin(array) < 0:
     #   array -= np.amin(array) #values cannot be negative
    array += 0.0000001 #values cannot be 0
    array = np.sort(array) #values must be sorted
    index = np.arange(1,array.shape[0]+1) #index per array element
    n = array.shape[0]#number of array elements
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient


In [None]:
path = "./Results/TowerTraining/Recordings/Standard/3999_16.100/"
obs = np.load(path+"visobs.npy")
hand_l = pd.read_csv(path+'HandLabels.csv')
label_test = np.zeros(obs.shape[0])
label_test = np.array(hand_l['Label'])
def addLabel(currL,ToAdd):
    if ToAdd<=1:
        return currL
    elif ToAdd==2:
        currL[0] = 4
    elif ToAdd==3:
        currL[0] = 2
    elif ToAdd==4:
        currL[0] = 3
    elif ToAdd==5:
        currL[0] = 4
    elif ToAdd==6:
        currL[0]=1
    elif ToAdd==7:
        currL[1]=1
    elif ToAdd==8:
        currL[2]=1
    elif ToAdd==9:
        currL[3]=1
    else:
        print(ToAdd)
    return currL

def formatLabels(labels1,labels2):
    formatted = []
    for i,l in enumerate(labels1):
        newLabel=[0,0,0,0]
        if l == 0:
            print(i)
        newLabel = addLabel(newLabel,l)
        #print(str(i)+': '+str(newLabel)+ '  '+str(l))
        newLabel = addLabel(newLabel,labels2[i])
        #print(str(i)+': '+str(newLabel)+ '  '+str(labels2[i]))
        formatted.append(newLabel)
    return np.array(formatted)

fLabels = formatLabels(np.array(hand_l['Label']),np.array(hand_l['Secondary label']))

## Object Classification
### Network

In [None]:
tf.reset_default_graph()

def swish(input_activation):
    """Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
    return tf.multiply(input_activation, tf.nn.sigmoid(input_activation))

o_size_h = 168
o_size_w = 168
vec_obs_size = 8
num_layers = 2
h_size = 256
h_size_vec = 256
            
visual_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 3], dtype=tf.float32,name="visual_observation_0")
labels = tf.placeholder(shape=[None,4], dtype=tf.int64,name="labels")

def create_vector_observation_encoder(observation_input, h_size, activation, num_layers, scope,reuse):
    with tf.variable_scope(scope):
        hidden_vec = observation_input
        for i in range(num_layers):
            hidden_vec = tf.layers.dense(hidden_vec, h_size, activation=activation, reuse=reuse,name="hidden_{}".format(i),kernel_initializer=c_layers.variance_scaling_initializer(1.0))
    return hidden_vec

def create_visual_observation_encoder(image_input, h_size, activation, num_layers, scope,reuse):
    with tf.variable_scope(scope):
        conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],activation=tf.nn.elu, reuse=reuse, name="conv_1")
        conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],activation=tf.nn.elu, reuse=reuse, name="conv_2")
        hidden_vis = c_layers.flatten(conv2)

    with tf.variable_scope(scope + '/' + 'flat_encoding'):
        hidden_flat = create_vector_observation_encoder(hidden_vis, h_size, activation,num_layers, scope, reuse)
    return hidden_flat

def create_discrete_action_masking_layer(all_logits, action_size):
        """
        Creates a masking layer for the discrete actions
        :param all_logits: The concatenated unnormalized action probabilities for all branches
        :param action_size: A list containing the number of possible actions for each branch
        :return: The action output dimension [batch_size, num_branches] and the concatenated normalized logits
        """
        action_idx = [0] + list(np.cumsum(action_size))
        branches_logits = [all_logits[:, action_idx[i]:action_idx[i + 1]] for i in range(len(action_size))]#split in action branches (size=action_size)
        raw_probs = [tf.nn.softmax(branches_logits[k]) + 1.0e-10 for k in range(len(action_size))]
        normalized_probs = [
            tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
            for k in range(len(action_size))]
        #normalized_probs = raw_probs
        output = tf.concat([tf.multinomial(tf.log(normalized_probs[k]), 1) for k in range(len(action_size))], axis=1)#sample outputs from log probdist
        #output = tf.concat([tf.multinomial(tf.nn.log_softmax(raw_probs[k]), 1) for k in range(len(action_size))], axis=1)#sample outputs from log probdist
        
        log_probs = [tf.log(normalized_probs[k] + 1.0e-10) for k in range(len(action_size))]#xx
        log_probs_flat = tf.concat(log_probs, axis=1)
        return output, log_probs_flat, log_probs

visual_encoders = []

encoded_visual = create_visual_observation_encoder(visual_in,h_size,swish,num_layers,"main_graph_0_encoder0", False)
visual_encoders.append(encoded_visual)
hidden = tf.concat(visual_encoders, axis=1)

class_size = [5,2,2,2]
policy_branches = []
for size in class_size:
    policy_branches.append(tf.layers.dense(hidden, size, activation=tf.nn.relu, use_bias=False,kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)))

all_log_probs = tf.concat([branch for branch in policy_branches], axis=1, name="action_probs")

output, normalized_logits_flat, norm_logits = create_discrete_action_masking_layer(all_log_probs, class_size)

output = tf.identity(output)
normalized_logits = tf.identity(normalized_logits_flat, name='action')#has nan in places where prob is negative bc it it log(probs)

comparison = tf.equal(labels, output)

accuracy = tf.reduce_mean(tf.cast(comparison, dtype=tf.float32))

cross_entropiesD = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = norm_logits[0], labels = labels[:,0])
cross_entropiesK = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = norm_logits[1], labels = labels[:,1])
cross_entropiesO = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = norm_logits[2], labels = labels[:,2])
cross_entropiesP = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = norm_logits[3], labels = labels[:,3])
print(cross_entropiesD)

class_weightsD = tf.constant(class_weights[0],dtype='float32')
class_weightsK = tf.constant(class_weights[1],dtype='float32')
class_weightsO = tf.constant(class_weights[2],dtype='float32')
class_weightsP = tf.constant(class_weights[3],dtype='float32')
print(class_weightsD)

weightsD = tf.reduce_sum(class_weightsD * tf.one_hot(labels[:,0],5), axis=1)
weightsK = tf.reduce_sum(class_weightsK * tf.one_hot(labels[:,1],2), axis=1)
weightsO = tf.reduce_sum(class_weightsO * tf.one_hot(labels[:,2],2), axis=1)
weightsP = tf.reduce_sum(class_weightsP * tf.one_hot(labels[:,3],2), axis=1)
print(weightsD)
scaled_errorD = cross_entropiesD * weightsD
scaled_errorK = cross_entropiesK * weightsK
scaled_errorO = cross_entropiesO * weightsO
scaled_errorP = cross_entropiesP * weightsP

cross_entropies = tf.reduce_mean([cross_entropiesD,cross_entropiesK,cross_entropiesO,cross_entropiesP],axis=1)
print(cross_entropies)
scaled_error = tf.reduce_mean([scaled_errorD,scaled_errorK,scaled_errorO,scaled_errorP],axis=1)
# Calculate the mean cross entropy for the mini-batch 
mean_cross_entropy = tf.reduce_mean(cross_entropies)
mean_scaled_error = tf.reduce_mean(scaled_error)

optimizer = tf.train.AdamOptimizer(0.0001)

gradients = optimizer.compute_gradients(mean_scaled_error)
hidden_grad = tf.gradients(mean_scaled_error,hidden)

training_step = optimizer.minimize(mean_scaled_error)

saver = tf.train.Saver()

In [None]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

Allentropies = []
Allaccuracies = []
Allscaled_errs = []
AllValaccuracies = []
AllGinis = []
LastSave = 0

print('            Entropy    Error   Accuracy')
epochs = 300
for i in range(epochs):
    count = 0
    xVal,yVal = val_it.next()
    yVal = getRealLabel(yVal,realLabel)
    Valentro, Valsc_err, Valaccu,Valcomp,Valout,ValProbs = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, comparison,output,normalized_logits_flat], feed_dict = {visual_in: xVal, labels: yVal})
    #print(Valcomp[:10])
    #print(yVal[:10])
    #print(Valout[:10])
    #print(np.round(ValProbs[:10],3))
    if i==0:
        print('Validation:  '+str(Valentro)+ ' - '+str(Valsc_err)+' - '+str(Valaccu))
        AllValaccuracies.append(Valaccu)
    else:
        print('Training: '+str(np.mean(entropies))+' - '+str(np.mean(scaled_errs))+' - '+str(np.mean(accuracies))+
          '    Validation:  '+str(Valentro)+ ' - '+str(Valsc_err)+' - '+str(Valaccu))
    
    Testentro, Testsc_err, Testaccu,TestEnc = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, encoded_visual], feed_dict = {visual_in: obs, labels: fLabels})
    print('Test:  '+str(Testentro)+ ' - '+str(Testsc_err)+' - '+str(Testaccu)+ ' - Gini: ' + str(np.round(gini(np.abs(TestEnc)),3)))
    AllGinis.append(gini(np.abs(TestEnc)))
    
    if Valaccu >np.max(AllValaccuracies):
        saver.save(sess, "./Results/TowerTraining/Classifier/Model_lr0001_scaled/model.ckpt")
        print('saved model')
        LastSave = i-1
        
    AllValaccuracies.append(Valaccu)
    for batchX, batchy in train_it:
        batchy = getRealLabel(batchy,realLabel)
        entropies = []
        accuracies = []
        scaled_errs = []
        entro, sc_err, accu,comp, _ = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, comparison,training_step], feed_dict = {visual_in: batchX, labels: batchy})
        entropies.append(entro)
        accuracies.append(accu)
        scaled_errs.append(sc_err)
        #print(str(entro)+' - '+str(accu))
        if count>126:
            Allentropies.append(entropies)
            Allaccuracies.append(accuracies)
            Allscaled_errs.append(scaled_errs)
            break
        count = count+1
    

In [None]:
np.save("./Results/TowerTraining/Classifier/Model_lr0001_scaled/Accs.npy",Allaccuracies)
np.save("./Results/TowerTraining/Classifier/Model_lr0001_scaled/Entro.npy",Allentropies)
np.save("./Results/TowerTraining/Classifier/Model_lr0001_scaled/Err.npy",Allscaled_errs)
np.save("./Results/TowerTraining/Classifier/Model_lr0001_scaled/ValAcc.npy",AllValaccuracies)

## Train with Noisy Labels

In [None]:
def getNoisyLabel(labelBatch,RL,noise_prob):
    newLB = []
    for label in labelBatch:
        if np.random.uniform(0,1,1) < noise_prob:
            label = np.round(np.random.uniform(0,len(RL)-1,1))
        l = RL[int(label)]
        newLB.append(l)
    return newLB
_,yTrain = train_it.next()
print(yTrain[:10])
yTrainR = getRealLabel(yTrain[:10],realLabel)
yTrainN = getNoisyLabel(yTrain[:10],realLabel,0.2)

print(yTrainN)
print(yTrainR)

In [None]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

Allentropies = []
Allaccuracies = []
AllValaccuracies = []
Allscaled_errs = []
AllGinis = []
LastSave = 0

epochs = 30
for i in range(epochs):
    count = 0
    xVal,yVal = val_it.next()
    yVal = getRealLabel(yVal,realLabel)
    Valentro, Valsc_err, Valaccu,Valcomp = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, comparison], feed_dict = {visual_in: xVal, labels: yVal})
    if i==0:
        print('Validation:  '+str(Valentro)+ ' - '+str(Valsc_err)+' - '+str(Valaccu))
        AllValaccuracies.append(Valaccu)
    else:
        print('Training: '+str(np.mean(entropies))+' - '+str(np.mean(scaled_errs))+' - '+str(np.mean(accuracies))+
          '    Validation:  '+str(Valentro)+ ' - '+str(Valsc_err)+' - '+str(Valaccu))
    
    Testentro, Testsc_err, Testaccu,TestEnc = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, encoded_visual], feed_dict = {visual_in: obs, labels: fLabels})
    print('Test:  '+str(Testentro)+ ' - '+str(Testsc_err)+' - '+str(Testaccu)+ ' - Gini: ' + str(np.round(gini(np.abs(TestEnc)),3)))
    AllGinis.append(gini(np.abs(TestEnc)))
    #if Valaccu >np.max(AllValaccuracies):
    #    saver.save(sess, "./Results/TowerTraining/Classifier/Model_lr0001/model.ckpt")
    #    print('saved model')
    #    LastSave = i-1
    AllValaccuracies.append(Valaccu)
    for batchX, batchy in train_it:
        batchy = getNoisyLabel(batchy,realLabel,0.1)
        entropies = []
        accuracies = []
        scaled_errs = []
        entro, sc_err, accu,comp, _ = sess.run([mean_cross_entropy, mean_scaled_error,accuracy, comparison,training_step], feed_dict = {visual_in: batchX, labels: batchy})
        entropies.append(entro)
        accuracies.append(accu)
        scaled_errs.append(sc_err)
        #print(str(entro)+' - '+str(accu))
        if count>126:
            Allentropies.append(entropies)
            Allaccuracies.append(accuracies)
            Allscaled_errs.append(scaled_errs)
            break
        count = count+1
    

In [None]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
saver.restore(sess, "./Results/TowerTraining/Classifier/Model_lr0001_scaled/model.ckpt")
act,out,probs,probsO,grad,HG = sess.run([hidden,output,norm_logits,all_log_probs,gradients,hidden_grad], feed_dict = {visual_in: obs*255,labels: fLabels})

In [None]:
print('After Training: '+str(np.sum(out==fLabels)/(fLabels.shape[0]*4)))

In [None]:
np.save(path+'ClassOut.npy',out)
np.save(path+'ClassAct.npy',act)