# Social Link Prediction
## April 12th, 2022
### Overview: A series of functions are built to allow for predicting the most likely-to-occur links between two people amongst a group of people.

In [3]:
import numpy as np
from numpy import linalg as nla
from scipy import linalg as la
from scipy.sparse import csgraph as csg

### The following functions will be created:
- index: calculates the index of a matrix
- is_drazin: boolean function to determine whether a matrix is the Drazin inverse of another
- drazin_inverse: calculates the drazin inverse of a matrix
- laplacian: calculates the laplacian of a matrix
- effective_resistance: calculates the effective resistance between nodes in a graph

Note: The drazin inverse is a suitable alternative to the inverse of a matrix when the matrix is singular
- 

In [31]:
def index(A, tol=1e-5):
    """Compute the index of the matrix A. The index of a matrix is the smallest nonnegative integer such that rank(A^(k+1)) = rank(A^k). 

    Parameters:
        A ((n,n) ndarray): An nxn matrix.

    Returns:
        k (int): The index of A.
    """

    # test for non-singularity
    if not np.isclose(la.det(A), 0):
        return 0
    
    #k cannot be >= len(A)
    n = len(A)
    k = 1
    Ak = A.copy()
    while k <= n:
        #rank of A^k:
        r1 = np.linalg.matrix_rank(Ak)
        #rank of A^(k+1):
        r2 = np.linalg.matrix_rank(np.dot(A,Ak))
        
        #return if ranks are the same
        if r1 == r2:
            return k
        #otherwise iterate A^k and k
        Ak = np.dot(A,Ak)
        k += 1

    return k

In [5]:
def is_drazin(A, Ad, k):
    """Verify that a matrix Ad is the Drazin inverse of A.

    Parameters:
        A ((n,n) ndarray): An nxn matrix.
        Ad ((n,n) ndarray): A candidate for the Drazin inverse of A.
        k (int): The index of A.

    Returns:
        (bool) True of Ad is the Drazin inverse of A, False otherwise.
    """
    # Ad is the Drazin inverse of A if and only if the following three conditions hold:
    return np.allclose(A@Ad,Ad@A) and np.allclose(nla.matrix_power(A,k+1)@Ad,nla.matrix_power(A,k)) and np.allclose(Ad@A@Ad,Ad)

### Displaying the above functions' behavior

In [7]:
A_ = np.array([
    [1,3,0,0],
    [0,1,3,0],
    [0,0,1,3],
    [0,0,0,0]
])
Ad_ = np.array([
    [1,-3,9,81],
    [0,1,-3,-18],
    [0,0,1,3],
    [0,0,0,0]
])
is_drazin(A_,Ad_,index(A_))

True

In [8]:
B_ = np.array([
    [1,1,3],
    [5,2,6],
    [-2,-1,-3]
])
Bd_ = np.zeros((3,3))
is_drazin(B_,Bd_,index(B_))

True

## Creating a function to return the Drazin inverse of a matrix

In [9]:
def drazin_inverse(A, tol=1e-4):
    """Compute the Drazin inverse of A.

    Parameters:
        A ((n,n) ndarray): An nxn matrix.

    Returns:
       ((n,n) ndarray) The Drazin inverse of A.
    """
    #get shape of n
    n = A.shape[0]
    
    #define f and g
    f = lambda x: abs(x) > tol
    g = lambda x: abs(x) <= tol
    
    #doing schur decomp
    T1,Q1,k1 = la.schur(A,sort = f)
    T2,Q2,k2 = la.schur(A,sort = g)
    
    #stacking on U, inverting
    U = np.hstack((Q1[:,:k1],Q2[:,:n-k1]))
    U_inv = la.inv(U)
    
    #find V, make Z a zero matrix
    V = U_inv@A@U
    Z = np.zeros((n,n))
    
    #if k isn't 0, set the first k rows and columns of M inverse to inverse of V, then same with Z to M_inv
    if k1 != 0:
        M_inv = la.inv(V[:k1,:k1])
        Z[:k1,:k1] = M_inv
    
    #return product = Drazin inverse
    return U@Z@U_inv

In [30]:
A__d = drazin_inverse(A_)
is_drazin(A_,A__d,index(A_))

True

### Helper function which calculates the Laplacian of a matrix

In [14]:
def laplacian(A):
    """Compute the Laplacian matrix of the graph G that has adjacency matrix A.

    Parameters:
        A ((N,N) ndarray): The adjacency matrix of an undirected graph G.

    Returns:
        L ((N,N) ndarray): The Laplacian matrix of G.
    """
    #initializing a matrix of A's shape
    D = np.zeros((A.shape[0],A.shape[1]))
    
    #summing up all the entries in a row and making that sum the diagonal of D
    j = 0
    for row in A:
        Sum = 0
        for i in row:
            Sum += i
        D[j,j] = Sum
        j += 1
    
    #calculating and returning L
    L = D - A
    return L

## The following function calculates the effective resistance between nodes in a graph, which requires the Drazin inverse. This will be used to find likely-to-occur links between nodes. 

In [16]:
def effective_resistance(A):
    """Compute the effective resistance for each node in a graph.

    Parameters:
        A ((n,n) ndarray): The adjacency matrix of an undirected graph.

    Returns:
        ((n,n) ndarray) The matrix where the ijth entry is the effective
        resistance from node i to node j.
    """
    #get laplacian of adjacency
    L = csg.laplacian(A)
    
    #init R to be same shape as A with all zeros
    R = np.zeros((A.shape))
    
    #make nxn identity
    n = len(A)
    I = np.identity(n)
    
    
    for j in range(len(A)):
        #copy L so as not to alter the original
        Lcopy = L.copy()
        
        #replace the jth row of L with the jth row of I
        Lcopy[j] = I[j]
        
        #do Drazin
        Lj = drazin_inverse(Lcopy)
        
        #assign jth row of R to the diagonal of Lj
        R[j] = np.diag(Lj)
        
    #subtract identity so that resist to self is 0
    R = R - np.identity(n)
    return R

### Display of effective resistance for different matrices

In [17]:
A1 = np.array([
    [0,1,0,0],
    [1,0,1,0],
    [0,1,0,1],
    [0,0,1,0]
])
A2 = np.array([
    [0,3],
    [3,0]
])
A3 = np.array([
    [0,1,1],
    [1,0,1],
    [1,1,0]
])

In [18]:
effective_resistance(A1)

array([[0., 1., 2., 3.],
       [1., 0., 1., 2.],
       [2., 1., 0., 1.],
       [3., 2., 1., 0.]])

In [14]:
effective_resistance(A2)

array([[0.        , 0.33333333],
       [0.33333333, 0.        ]])

In [19]:
effective_resistance(A3)

array([[0.        , 0.66666667, 0.66666667],
       [0.66666667, 0.        , 0.66666667],
       [0.66666667, 0.66666667, 0.        ]])

## The file social_network.csv contains strings of name pairs representing connections between two people.

## Creation of the LinkPredictor class, which will predict which links are likely to next occur based off of effective resistance.
- Constructor: Reads the social_network file, creates an adjacency matrix based off the pairing, and calculates the effective resistance matrix.
- predict_link: predicts the next most-likely to be made link
- add_link: adds a link between the given nodes, updates the adjacency and effective resistance matrices

In [23]:
class LinkPredictor:
    """Predict links between nodes of a network."""

    def __init__(self, filename='social_network.csv'):
        """Create the effective resistance matrix by constructing
        an adjacency matrix.

        Parameters:
            filename (str): The name of a file containing graph data.
        """
        #reading file
        with open('social_network.csv') as readfile:
            ff = readfile.read().split('\n')
        for i in range(len(ff)):
            ff[i] = ff[i].split(',')
        ff = ff[:-1]
        
        #index_dict will map a name to an index; name_dict will map an index to a name
        index_dict = dict()
        name_dict  = dict()
        
        #init iter variable
        i = 0
        #for every name pair in the list of names
        for pair in ff:
            #if first name not already in keys, add to both dictionaries
            if pair[0] not in index_dict.keys():
                index_dict[pair[0]] = i
                name_dict[i] = pair[0]
                i += 1
            #if second name not in both dictionaries, add to both dictionaries
            if pair[1] not in index_dict.keys():
                index_dict[pair[1]] = i
                name_dict[i] = pair[1]
                i += 1
        
        #creating adjacency matrix
        A = np.zeros((len(index_dict),len(index_dict)))
        for pair in ff:
            #getting indices of names
            ind1 = index_dict[pair[0]]
            ind2 = index_dict[pair[1]]
            
            #connecting nodes
            A[ind1,ind2] += 1
            A[ind2,ind1] += 1
        
        #saving effective resistance matrix
        self.R = effective_resistance(A)
        
        #saving the index, name dictionaries, name list, and adjacency matrix
        self.names_list = ff
        self.index_dict = index_dict
        self.name_dict  = name_dict
        self.A = A
            
    def predict_link(self, node=None):
        """Predict the next link, either for the whole graph or for a
        particular node.

        Parameters:
            node (str): The name of a node in the network.

        Returns:
            node1, node2 (str): The names of the next nodes to be linked.
                Returned if node is None.
            node1 (str): The name of the next node to be linked to 'node'.
                Returned if node is not None.

        Raises:
            ValueError: If node is not in the graph.
        """
        #making all the effective resistances between nodes already connected to 999
        R_mute = (self.A+np.identity(self.A.shape[0]))*999 + self.R
        
        #if generally looking for next two to link
        if node is None:
            #indices of nodes to connect
            index_pair = np.unravel_index(R_mute.argmin(), R_mute.shape)
            
            #get the corresponding names
            name_pair  = (self.name_dict[index_pair[0]],self.name_dict[index_pair[1]]) 
            return name_pair
        
        #if looking for someone to make a link for
        else:
            #checking in the graph
            if node not in self.index_dict.keys():
                raise ValueError("Node not in graph")
                
            #if green lit
            else:
                #getting the index of our person
                index1 = self.index_dict[node]
                
                
                #finding the index of the minimum resistance to our person, finding who they are
                index2 = np.argmin(R_mute[index1])
                
                friend = self.name_dict[index2]
                
                return friend
        
        

    def add_link(self, node1, node2):
        """Add a link to the graph between node 1 and node 2 by updating the
        adjacency matrix and the effective resistance matrix.

        Parameters:
            node1 (str): The name of a node in the network.
            node2 (str): The name of a node in the network.

        Raises:
            ValueError: If either node1 or node2 is not in the graph.
        """
        #checking if nodes are in the graph
        if node1 not in self.index_dict.keys() or node2 not in self.index_dict.keys():
            raise ValueError('nodes need to be in the graph')
        
        #getting the indices of the two nodes
        index1 = self.index_dict[node1]
        index2 = self.index_dict[node2]
        
        #creating the link between the nodes
        self.A[index1,index2] = 1
        self.A[index2,index1] = 1

### Displaying the file

In [21]:
with open('social_network.csv') as readfile:
    ff = readfile.read().split('\n')
for i in range(len(ff)):
    ff[i] = ff[i].split(',')
ff

[['Piers', 'Abigail'],
 ['Piers', 'Oliver'],
 ['Abigail', 'Oliver'],
 ['Piers', 'Stephanie'],
 ['Abigail', 'Stephanie'],
 ['Oliver', 'Stephanie'],
 ['Piers', 'Carol'],
 ['Piers', 'Melanie'],
 ['Piers', 'Stephen'],
 ['Carol', 'Stephen'],
 ['Melanie', 'Stephen'],
 ['Piers', 'Sally'],
 ['Abigail', 'Sally'],
 ['Oliver', 'Sally'],
 ['Stephanie', 'Sally'],
 ['Piers', 'Penelope'],
 ['Oliver', 'Penelope'],
 ['Oliver', 'Alan'],
 ['Piers', 'Trevor'],
 ['Carol', 'Trevor'],
 ['Melanie', 'Trevor'],
 ['Piers', 'Jake'],
 ['Piers', 'Mary'],
 ['Stephanie', 'Mary'],
 ['Piers', 'Anna'],
 ['Abigail', 'Anna'],
 ['Oliver', 'Anna'],
 ['Stephanie', 'Anna'],
 ['Melanie', 'Connor'],
 ['Stephen', 'Connor'],
 ['Piers', 'John'],
 ['Abigail', 'John'],
 ['Piers', 'Eric'],
 ['Abigail', 'Eric'],
 ['Piers', 'Paul'],
 ['Abigail', 'Paul'],
 ['Colin', 'Jane'],
 ['Jake', 'Jane'],
 ['Oliver', 'Thomas'],
 ['Colin', 'Thomas'],
 ['Jake', 'Thomas'],
 ['Oliver', 'Christopher'],
 ['Colin', 'Charles'],
 ['Brandon', 'Charles'],
 ['

### Creating object and displaying different attributes and functions

In [25]:
LLP = LinkPredictor()
# adjacency matrix
LLP.A

array([[0., 1., 1., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 1., 1., 0.]])

In [26]:
# resistance matrix
LLP.R

array([[0.        , 0.18998381, 0.19809478, ..., 0.72006729, 0.72006729,
        0.2333001 ],
       [0.18998381, 0.        , 0.22975655, ..., 0.76301021, 0.76301021,
        0.27562492],
       [0.19809478, 0.22975655, 0.        , ..., 0.69073556, 0.69073556,
        0.20953544],
       ...,
       [0.72006729, 0.76301021, 0.69073556, ..., 0.        , 1.        ,
        0.53555306],
       [0.72006729, 0.76301021, 0.69073556, ..., 1.        , 0.        ,
        0.53555306],
       [0.2333001 , 0.27562492, 0.20953544, ..., 0.53555306, 0.53555306,
        0.        ]])

### The LLP predicts a link between Emily and Oliver. After adding a connection between them, the next predicted link is between Emily and Piers.

In [27]:
LLP.predict_link()

('Emily', 'Oliver')

In [28]:
LLP.add_link('Emily', 'Oliver')

In [29]:
LLP.predict_link()

('Emily', 'Piers')