In [1]:
from abc import ABC
from abc import abstractmethod
import networkx as nx
import numpy as np
import progressbar
import random
import math

In [2]:
class LinkPrediction(ABC):
    def __init__(self, graph):
        self.graph = graph
        self.N = len(graph)
    
    def neighbors(self, v):
        neighbors_list = self.graph.neighbors(v)
        return list(neighbors_list)

    @abstractmethod
    def fit(self):
        raise NotImplementedError("Fit must be implemented")

In [3]:
class CommonNeighbors(LinkPrediction):
    def fit(self):
        scores = {}
        
        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))
            scores[(u, v)] = len(u_neighbors.intersection(v_neighbors))

        return scores

In [4]:
class Jaccard(LinkPrediction):
    def fit(self):
        scores = {}

        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))

            intersection = len(u_neighbors.intersection(v_neighbors))
            union = len(u_neighbors.union(v_neighbors))
            
            if union > 0:
                scores[(u, v)] = intersection / union
            else:
                scores[(u, v)] = 0.0

        return scores

In [5]:
class AdamicAdar(LinkPrediction):
    def fit(self):
        scores = {}

        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))
            common_neighbors = u_neighbors.intersection(v_neighbors)

            score = 0.0
            for w in common_neighbors:
                degree_w = len(list(self.neighbors(w)))
                if degree_w > 1:
                    score += 1 / math.log(degree_w)

            scores[(u, v)] = score

        return scores

In [6]:
def remove_random_edges(graph, fraction):
    edges = list(graph.edges())
    removed_edges = set(random.sample(edges, int(fraction * len(edges))))
    temp_graph = graph.copy()
    temp_graph.remove_edges_from(removed_edges)
    return temp_graph, removed_edges

def get_link_scores(predictor):
    scores = predictor.fit()
    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

def evaluate(scores, removed_edges, k):
    top_pairs = set(pair for pair, _ in scores[:k])
    correct = len(removed_edges.intersection(top_pairs))
    precision = correct / k
    recall = correct / len(removed_edges)

    return precision, recall

In [7]:
# Read data
data_path = "../Data/fb100"

caltech = nx.read_gml(f"{data_path}/Caltech36.gml")
mit = nx.read_gml(f"{data_path}/MIT8.gml")
john_hopkins = nx.read_gml(f"{data_path}/Johns Hopkins55.gml")

In [8]:
fractions = [0.05, 0.1, 0.15, 0.2]
ks = [50, 100, 200, 300, 400]

In [13]:
def experiment(predictor, removed_edges): 
    scores = get_link_scores(predictor)
    for k in ks:
        precision, recall = evaluate(scores, removed_edges, k)

        print(f"k: {k}")
        print(f"precision: {precision}")
        print(f"recall: {recall}")

def common_neighbors_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = CommonNeighbors(reduced_graph)
        experiment(predictor, removed_edges)
        print("")

def jaccard_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = Jaccard(reduced_graph)
        experiment(predictor, removed_edges)
        print("")

def adamic_ada_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = AdamicAdar(reduced_graph)
        experiment(predictor, removed_edges)
        print("")

In [14]:
common_neighbors_experiment(caltech)

k: 50
precision: 0.16
recall: 0.009615384615384616
k: 100
precision: 0.15
recall: 0.018028846153846152
k: 200
precision: 0.11
recall: 0.026442307692307692
k: 300
precision: 0.1
recall: 0.036057692307692304
k: 400
precision: 0.1075
recall: 0.051682692307692304

k: 50
precision: 0.32
recall: 0.00960960960960961
k: 100
precision: 0.23
recall: 0.013813813813813814
k: 200
precision: 0.24
recall: 0.02882882882882883
k: 300
precision: 0.21
recall: 0.03783783783783784
k: 400
precision: 0.185
recall: 0.044444444444444446

k: 50
precision: 0.26
recall: 0.005204163330664532
k: 100
precision: 0.33
recall: 0.01321056845476381
k: 200
precision: 0.31
recall: 0.024819855884707767
k: 300
precision: 0.2866666666666667
recall: 0.0344275420336269
k: 400
precision: 0.2875
recall: 0.04603682946357086

k: 50
precision: 0.38
recall: 0.005703992794956469
k: 100
precision: 0.31
recall: 0.009306514560192134
k: 200
precision: 0.29
recall: 0.01741218853197238
k: 300
precision: 0.2833333333333333
recall: 0.02551786

In [15]:
jaccard_experiment(caltech)

k: 50
precision: 0.22
recall: 0.013221153846153846
k: 100
precision: 0.16
recall: 0.019230769230769232
k: 200
precision: 0.14
recall: 0.03365384615384615
k: 300
precision: 0.10333333333333333
recall: 0.037259615384615384
k: 400
precision: 0.1075
recall: 0.051682692307692304

k: 50
precision: 0.14
recall: 0.004204204204204204
k: 100
precision: 0.18
recall: 0.010810810810810811
k: 200
precision: 0.155
recall: 0.018618618618618618
k: 300
precision: 0.14666666666666667
recall: 0.026426426426426425
k: 400
precision: 0.1375
recall: 0.03303303303303303

k: 50
precision: 0.16
recall: 0.0032025620496397116
k: 100
precision: 0.23
recall: 0.009207365892714172
k: 200
precision: 0.22
recall: 0.017614091273018415
k: 300
precision: 0.22
recall: 0.02642113690952762
k: 400
precision: 0.215
recall: 0.0344275420336269

k: 50
precision: 0.2
recall: 0.003002101471029721
k: 100
precision: 0.21
recall: 0.006304413089162414
k: 200
precision: 0.235
recall: 0.014109876913839688
k: 300
precision: 0.2466666666666

In [16]:
adamic_ada_experiment(caltech)

k: 50
precision: 0.16
recall: 0.009615384615384616
k: 100
precision: 0.17
recall: 0.020432692307692308
k: 200
precision: 0.15
recall: 0.036057692307692304
k: 300
precision: 0.12666666666666668
recall: 0.04567307692307692
k: 400
precision: 0.1075
recall: 0.051682692307692304

k: 50
precision: 0.34
recall: 0.01021021021021021
k: 100
precision: 0.28
recall: 0.016816816816816817
k: 200
precision: 0.28
recall: 0.033633633633633635
k: 300
precision: 0.25666666666666665
recall: 0.04624624624624624
k: 400
precision: 0.2325
recall: 0.055855855855855854

k: 50
precision: 0.32
recall: 0.006405124099279423
k: 100
precision: 0.31
recall: 0.012409927942353884
k: 200
precision: 0.27
recall: 0.021617293835068056
k: 300
precision: 0.2866666666666667
recall: 0.0344275420336269
k: 400
precision: 0.2575
recall: 0.04123298638911129

k: 50
precision: 0.34
recall: 0.005103572500750525
k: 100
precision: 0.39
recall: 0.011708195737015911
k: 200
precision: 0.335
recall: 0.02011407985589913
k: 300
precision: 0.3