# Home assignment 3

You should work on the assignement in groups of 2 participants. 

Upload your solution as a jupyter notebook to L2P by 17th of July 23:59h. (The deadline is strict)

Do not forget to specify the names of all contributing students in the jupyter notebook.

You should add comments to your code where necessary and print the relevant results.

# Network representation
Given a network obtain representations for the nodes in the jazz network (you can download it from 'http://konect.uni-koblenz.de/networks/arenas-jazz'). The representations are to be obtained the following way. 

1. Let $Z_u$ and $Z_v$ are the representations of nodes $u$ and $v$. At each step of stochastic gradient descent (SGD) you should randomly select a pair of nodes and minimize the loss function - 

   $(Z_u^T Z_v - A_{u,v})^2$
   
2. Obtain another representation of the nodes in the network using the same procedure as in 1 but this time with the loss function as - 

   $(Z_u^T Z_v - A_{u,v})^2 + (Z_u^T Z_v - A_{u,v}^2)^2$
   
3. From these two representations obtain the 5-nearest neighbors of node '0'. The distance between two nodes can be measured as the euclidean distance between the representations of the two nodes.


  
Hints: Calculate the gradient for the loss function and update the representaion vectors using SGD. You can keep the learning rate as 0.001 and the number of iterations as 5000

In [11]:
import re
import os
import numpy as np
import networkx as nx

learning_rate = 0.001
iterations = 5000
# constructing graph
G = nx.Graph()
graph_file = './out.arenas-jazz'
if os.path.exists(graph_file):
    with open(graph_file) as fs:
        try:
            for line in fs:
                if re.match('%.*', line): #filtering comments out
                    continue
                else:
                    u, v = line.strip().split()
                    G.add_edge(u, v)
        except:
            print('open ' + graph_file +' failed.')
else:
    print(graph_file + ' is not a correct path.')

adj_mat = nx.adjacency_matrix(G)
adj_mat = nx.to_numpy_matrix(G)
nodes_num = np.shape(adj_mat)[0] # The number of nodes
A = adj_mat/(np.sum(adj_mat, 1).reshape(nodes_num, 1)) # Dividing adj matrix by row sum
# Assuming these features are considered.
# name, sex, age, nationality, height, weight, role
representation_dim = 7
representations_mat = np.random.rand(representation_dim, nodes_num)

# node: representation of a node
# dataset: representations of all nodes
# num: the number of nearest node
def nearest_neighbors(node, dataset, num):
    distances = []
    for i_node in dataset:
        distances.append(np.linalg.norm(node - i_node))
    
    distances.sort()
    return distances[:num]
     
def loss_basic(R_mat, A_mat):
    loss = 0
    nodes_num = np.shape(R_mat)[1]
    Z_u = np.random.rand(representation_dim, 1)
    Z_v = np.random.rand(representation_dim, 1)
    for i in range(nodes_num):
        Z_u = R_mat[:, i] # column vector
        for j in range(nodes_num - i):
            Z_v = R_mat[:,j] # column vector
            # $(Z_u^T Z_v - A_{u,v})^2$
            if i != j:
                loss += 2*(np.square(np.dot(Z_u, np.transpose(Z_v)) - A_mat[i,j]))
            else:
                loss += np.square(np.dot(Z_u, np.transpose(Z_v)) - A_mat[i,j])
            
    return loss

def loss_advance(R_mat, A_mat):
    loss = 0
    nodes_num = np.shape(R_mat)[1]
    Z_u = np.random.rand(representation_dim, 1)
    Z_v = np.random.rand(representation_dim, 1)
    for i in range(nodes_num):
        Z_u = R_mat[i]
        for j in range(nodes_num - i):
            Z_v = R_mat[j]
            #$(Z_u^T Z_v - A_{u,v})^2 + (Z_u^T Z_v - A_{u,v}^2)^2$
            Z_uv = np.dot(np.transpose(Z_u), Z_v)
            if i != j:
                loss += 2*(np.square(Z_uv - A_mat[i,j]) + np.square(Z_uv - np.dot(A_mat[i,j], A_mat[i,j])))
            else:
                loss += np.square(Z_uv - A_mat[i,j]) + np.square(Z_uv - np.dot(A_mat[i,j], A_mat[i,j]))
    return loss
        
# R_mat: representation matrix
# A_mat: adjacency matrix
# l_rate: learning rate
# m: the number of examples of R_mat, here we are stochastically using 1 example to update gradient.
# iterations: the number of iterations
# loss_type: 0 - first loss, 1 - second loss
def stochatis_gradient_descent(R_mat, A_mat, l_rate, m, iterations, loss_type):
    #x_trans = x.transpose()
    loss = 0
    cost = 0
    for i in range(0, iterations):
        #print("Iteration %d | Cost: %f" % (i, cost))
        # avg gradient
        R_mat_trans = np.transpose(R_mat)
        random_index = np.random.randint(0, nodes_num, size=2)

        #loss = loss_basic(R_mat, A_mat, samples)
        Z_u = R_mat[:,random_index[0]]
        Z_v = R_mat[:,random_index[1]]
        # 1. loss
        if loss_type == 0:
            loss += 2*(np.square(np.dot(Z_u, np.transpose(Z_v)) - A_mat[random_index[0],random_index[1]]))
        # 2. loss
        elif loss_type == 1:
            Z_uv = np.dot(np.transpose(Z_u), Z_v)
            loss += 2*(np.square(Z_uv - A_mat[random_index[0],random_index[1]]) + np.square(Z_uv - np.dot(A_mat[random_index[0],random_index[1]], A_mat[random_index[0],random_index[1]])))
        else:
            print('Loss type is unknown.')
        # avg cost
        # cost = np.sum(loss ** 2) / (2 * m)
        cost += loss ** 2 / (2 * m)
        
        representation_trans = R_mat_trans[random_index]
        gradient = np.dot(representation_trans, loss) / m
        # update weight
        R_mat[:,random_index] = R_mat[:,random_index] - l_rate * np.transpose(gradient)
       
    return R_mat

first_result = stochatis_gradient_descent(representations_mat, A, learning_rate, 1, iterations, 0)
second_result = stochatis_gradient_descent(representations_mat, A, learning_rate, 1, iterations, 1)
print(nearest_neighbors(first_result[0], first_result, 5))
print(nearest_neighbors(second_result[0], second_result, 5))


[0.0, 1.021209130207161e-15, 1.0704289710950333e-15, 1.1312910362709394e-15, 5.813425945625108e-15]
[0.0, 1.021209130207161e-15, 1.0704289710950333e-15, 1.1312910362709394e-15, 5.813425945625108e-15]
