In [3]:
import tensorflow as tf
from __future__ import print_function
import numpy as np


## A1 Q1
Euclidean distance function, vectorized    

In [4]:
def euclideanDistance(x, z):
    '''
    We vectorized the operation with matrix operations
    the ||x - z ||^2 is = Sum(x - z)^2 = Sum from 1 to D
    of (x^2 - 2xz + z^2)
    
    x^2 = x^T times x
    z^2 = z^T times z
    we can find the sum of each row by using the reduce sum function
    
    sum of -2xz 
    we can use matrix multiplication, x times z^T to obtain xz,
    then multiply by -2
    '''
    n1 = x.shape[0]
    n2 = z.shape[0]    
    
    x_squared = tf.square(x)
    z_squared = tf.square(z) #tf.matmul(z ,tf.transpose(z))
    #print("x_squared.eval()")
    #print(x_squared.eval())
    x_2_sum = tf.reduce_sum(x_squared, 1)
    z_2_sum = tf.reduce_sum(z_squared, 1)
    
    x_2_sum = tf.transpose(x_2_sum)
    z_2_sum = tf.transpose(z_2_sum)
    #x_2_sum = tf.reshape(x_2_sum, [-1, 1])
    #z_2_sum = tf.reshape(z_2_sum, [-1, 1])
    #print("x_2_sum.eval()")
    #print(x_2_sum.eval())
    
    xz = tf.matmul(x, tf.transpose(z))
    #print(xz.eval())
    minus_2xz = tf.scalar_mul(-2, xz)
    #print(minus_2xz.eval())
 
    x_2_tile =tf.tile(tf.expand_dims(x_2_sum, 1), [1, n2])
    z_2_tile = tf.tile(tf.expand_dims(z_2_sum ,1), [1, n1])
    z_2_tile_T = tf.transpose(z_2_tile)
    #print(x_2_tile.eval())
    #print(z_2_tile_T.eval())
    
    result = x_2_tile + minus_2xz +z_2_tile_T
    
    return result

"""VERIFIED"""

def testi():
    A = tf.constant([[1, 1], [2,2], [3, 3], [4,4]])
    B = tf.constant([[1, 1], [2, 2],[3,3]])
    res_mine = euclideanDistance(A, B)
    
    print("----my func----")
    print(res_mine.eval())
    print("---diff square---")
    res_lib = PairwiseDistances(A, B)
    print(res_lib.eval())
    

## A1 Q2
### part 1

| x1^T | x2^T | .... |
top k closest in x1^T indices are the neighbours


yˆ(x∗) = YTr∗, where r∗ = [r1,...,rN],rn = 1/k
0, otherwise.




In [5]:
#given starter code
import numpy as np
import matplotlib.pyplot as plt

# my code part1 responsibility
def nearestIndices(_dist_mat, _k):
    # with tf.Session() as sess:
    #_dist_mat = tf.constant([ [4, 9], [16, 25] ], tf.int32)
    print("our k value", _k)
    #print("dist mat shape", _dist_mat.shape)
    nearest_k_data, nearest_k_indices = tf.nn.top_k(tf.negative(_dist_mat), _k)
       
    #print(type(indices_arr))
    return nearest_k_data, nearest_k_indices


#unit testing
#dist_mat = tf.constant([ 4, 9, 16, 25 ], tf.int32)
#topk = nearestIndices(dist_mat, 2)
#responsibility(topk, 2, 4)



In [152]:
def pairDist(_data, _data1):
    #TODO: what if no library function
    data_t = tf.convert_to_tensor(_data)
    data1_t = tf.convert_to_tensor(_data1)
    dist = euclideanDistance(data_t, data1_t)
    return dist
  

def knn(_trainData, _trainTarget, _new_data, _k):
    '''
    KNN using responsibility
    '''
    #nearest indices
    dist_mat = pairDist( _new_data, _trainData )    
    #dist_mat = tf.convert_to_tensor(dist_mat)
    nearest_k, nearest_k_idx = nearestIndices(dist_mat, _k)
    predict_res = tf.reduce_mean(tf.gather(_trainTarget, nearest_k_idx), 1)
    #print("predict", predict_res.eval())
    return predict_res


def knnVote(_trainData, _trainTarget, _new_data, _k):
    '''
    KNN using majority vote
    '''
    #nearest indices
    dist_mat = pairDist( _new_data, _trainData )    
    nearest_k, nearest_k_idx = nearestIndices(dist_mat, _k)
    neighbours = tf.gather(_trainTarget, nearest_k_idx)
    print("neibours index before", neighbours)
    
    s1 = neighbours.shape[0]
    n_unstack = tf.unstack(neighbours, axis = 1)
    #print("n_unstack", n_unstack)
    print("neighbours after reshape", neighbours)

    nearest_k_y, idx, votes = [], [], []
    predict_res = []
    for i in n_unstack:
        y, i, v = tf.unique_with_counts(i) 
        nearest_k_y.append(y)
        idx.append(i)
        votes.append(v)
        predict_res.append(tf.argmax(v))
        
    #print("predict", predict_res.eval())
    return predict_res

  

#pairDist(testData) 
#pred_result = knn(testData, testTarget, _k = 3)

In [153]:
def plotResult(_trainData, _trainTarget):
    X = np.linspace(0.0, 11.0, num = 100)[:, np.newaxis]
    xTensor = tf.stack(X)
    
    num_neighbour_list = [1, 3, 5, 50]
    for j in num_neighbour_list:
        print("plot result")
        dist_mat = pairDist( X, _trainData ) 
        nearest_k, nearest_k_idx = nearestIndices(dist_mat, j)
        predict_res = tf.reduce_mean(tf.gather(_trainTarget, nearest_k_idx), 1)

        plt.figure()#(j+100)
        plt.scatter(sessMain.run(_trainData), sessMain.run(_trainTarget))
        plt.title("K = " + str(j))
        plt.plot(sessMain.run(xTensor), sessMain.run(predict_res))
        
        fileName = str("KNN") + str(j) + str("trainingGraph.png")
        plt.savefig(fileName)


In [154]:
def testKValuesKNN(trainData, trainTarget, testData, testTarget):
    num_neighbour_list = [1, 3, 5, 50]
    loss_list = []

    for j in num_neighbour_list:
        print("j = ", j)
        y_hat = knn(trainData, trainTarget, testData,  j)
        #_y_hat = tf.transpose(_y_hat)
        #mse_mat = euclideanDistance(_y_hat, _y) 
      
        mse_mat = tf.square(tf.subtract(y_hat, testTarget))
        loss = tf.reduce_mean(mse_mat)/2.0
        print("neighbout = " ,j)
        loss_list.append(loss.eval())
    return loss_list
        

In [155]:
def q1():
    np.random.seed(521)
    Data = np.linspace(1.0 , 10.0 , num =100) [:, np. newaxis]
    Target = np.sin( Data ) + 0.1 * np.power( Data , 2) \
         + 0.5 * np.random.randn(100 , 1)
    randIdx = np.arange(100)
    np.random.shuffle(randIdx)
    trainData, trainTarget  = Data[randIdx[:80]], Target[randIdx[:80]]
    validData, validTarget = Data[randIdx[80:90]], Target[randIdx[80:90]]
    testData, testTarget = Data[randIdx[90:100]], Target[randIdx[90:100]]

    # convert numpy array to tensors
    trainData = tf.stack(trainData)
    trainTarget = tf.stack(trainTarget)
    testData = tf.stack(testData)
    testTarget = tf.stack(testTarget)
    validData = tf.stack(validData)
    validtarget = tf.stack(validTarget)
    
    plotResult(trainData, trainTarget)
    
    loss_test = testKValuesKNN(trainData, trainTarget, testData, testTarget)
    print("loss_test", loss_test)
    
    loss_train = testKValuesKNN(trainData, trainTarget, trainData, trainTarget)
    print("loss_train" , loss_train)
    
    loss_valid = testKValuesKNN(trainData, trainTarget, validData, validTarget)
    print("loss_valid", loss_valid)

## Loss on KNN

|k | 1 | 3 | 5| 50|
|------|------|-----|-----|
|test | 0.12799977712101845 | 0.14242504248546536 | 0.18633105926605592 | 0.70693467047889302 |
|train | 0.0 | 0.10825207710580038 | 0.12183845521874122 | 1.2477892734500411 |
|valid | 0.28807977607463453 | 0.30897640431136369 | 0.31043863052707066 | 1.2230445257949047 |

In [156]:
def data_segmentation(data_path, target_path, task):
    # task = 0 >> select the name ID targets for face recognition task
    # task = 1 >> select the gender ID targets for gender recognition task data = np.load(data_path)/255
    data = np.load(data_path)/255
    data = np.reshape(data, [-1, 32*32])
    target = np.load(target_path)
    np.random.seed(45689)
    rnd_idx = np.arange(np.shape(data)[0])
    np.random.shuffle(rnd_idx)
    trBatch = int(0.8*len(rnd_idx))
    validBatch = int(0.1*len(rnd_idx))
    trainData, validData, testData = data[rnd_idx[1:trBatch],:], \
                                   data[rnd_idx[trBatch+1:trBatch + validBatch],:],\
                                   data[rnd_idx[trBatch + validBatch+1:-1],:]
    trainTarget, validTarget, testTarget = target[rnd_idx[1:trBatch], task], \
                              target[rnd_idx[trBatch+1:trBatch + validBatch], task],\
                              target[rnd_idx[trBatch + validBatch + 1:-1], task]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

def rgb2gray(image):
    return dot(image[...,:3], [0.299, 0.587, 0.114])

In [157]:
def q2():
    data_path = '/Users/vikuo/Documents/GitHub/ece521/assi/A1/data/data.npy'
    target_path = '/Users/vikuo/Documents/GitHub/ece521/assi/A1/data/target.npy'
    trainData, validData, testData, trainTarget, validTarget, testTarget = data_segmentation(data_path, target_path, 0)
    
    # convert numpy array to tensors
    trainData = tf.stack(trainData)
    trainTarget = tf.stack(trainTarget)
    testData = tf.stack(testData)
    testTarget = tf.stack(testTarget)
    validData = tf.stack(validData)
    validtarget = tf.stack(validTarget)
    
    loss_train = testQ2KNN(trainData, trainTarget, trainData, trainTarget)
    print("loss train", loss_train)
    
    loss_test = testQ2KNN(trainData, trainTarget, testData, testTarget)
    print("loss test", loss_test)
    
    loss_valid = testQ2KNN(trainData, trainTarget, testData, testTarget)
    print("loss valid", loss_valid)

In [158]:
def testQ2KNN(trainData, trainTarget, testData, testTarget):
    num_neighbour_list = [1, 5, 10, 25, 50, 100, 200]
    loss_list = []
    for j in num_neighbour_list:
        print("j = ", j)
        y_hat = knnVote(trainData, trainTarget, testData,  j)
        testTarget = tf.cast(testTarget, tf.int64)
        print(testTarget)
        mse_mat = tf.square(tf.subtract(y_hat, testTarget))
        loss = tf.reduce_mean(mse_mat)#/2.0
        print("div by 2!!!", loss)
        print("neighbout = " ,j)
        loss_list.append(loss.eval())
    return loss_list


In [159]:
from scipy import spatial as sp
from sklearn import metrics as skm


if __name__ == "__main__":
    
    init = tf.global_variables_initializer()
    sessMain = tf.InteractiveSession()
    sessMain.run(init)
    #q1()

    q2()
    
    
    
    



        

j =  1
our k value 1
neibours index before Tensor("Gather_23:0", shape=(747, 1), dtype=uint8)
neighbours after reshape Tensor("Gather_23:0", shape=(747, 1), dtype=uint8)
Tensor("Cast_5:0", shape=(747,), dtype=int64)
div by 2!!! Tensor("Mean_3:0", shape=(), dtype=int64)
neighbout =  1
j =  5
our k value 5
neibours index before Tensor("Gather_24:0", shape=(747, 5), dtype=uint8)
neighbours after reshape Tensor("Gather_24:0", shape=(747, 5), dtype=uint8)
Tensor("Cast_5:0", shape=(747,), dtype=int64)


ValueError: Dimensions must be equal, but are 5 and 747 for 'Sub_9' (op: 'Sub') with input shapes: [5], [747].