In [1]:
from abc import ABC
from abc import abstractmethod
import networkx as nx
import numpy as np
import progressbar
import random
import math

In [2]:
class LinkPrediction(ABC):
    def __init__(self, graph):
        self.graph = graph
        self.N = len(graph)
    
    def neighbors(self, v):
        neighbors_list = self.graph.neighbors(v)
        return list(neighbors_list)

    @abstractmethod
    def fit(self):
        raise NotImplementedError("Fit must be implemented")

In [3]:
class CommonNeighbors(LinkPrediction):
    def fit(self):
        scores = {}
        
        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))
            scores[(u, v)] = len(u_neighbors.intersection(v_neighbors))

        return scores

In [4]:
class Jaccard(LinkPrediction):
    def fit(self):
        scores = {}

        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))

            intersection = len(u_neighbors.intersection(v_neighbors))
            union = len(u_neighbors.union(v_neighbors))
            
            if union > 0:
                scores[(u, v)] = intersection / union
            else:
                scores[(u, v)] = 0.0

        return scores

In [5]:
class AdamicAdar(LinkPrediction):
    def fit(self):
        scores = {}

        for u, v in nx.non_edges(self.graph):
            u_neighbors = set(self.neighbors(u))
            v_neighbors = set(self.neighbors(v))
            common_neighbors = u_neighbors.intersection(v_neighbors)

            score = 0.0
            for w in common_neighbors:
                degree_w = len(list(self.neighbors(w)))
                if degree_w > 1:
                    score += 1 / math.log(degree_w)

            scores[(u, v)] = score

        return scores

In [6]:
def remove_random_edges(graph, fraction):
    edges = list(graph.edges())
    removed_edges = set(random.sample(edges, int(fraction * len(edges))))
    temp_graph = graph.copy()
    temp_graph.remove_edges_from(removed_edges)
    return temp_graph, removed_edges

def get_link_scores(predictor):
    scores = predictor.fit()
    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

def evaluate(scores, removed_edges, k):
    top_pairs = set(pair for pair, _ in scores[:k])
    correct = len(removed_edges.intersection(top_pairs))
    precision = correct / k
    recall = correct / len(removed_edges)

    return precision, recall

In [7]:
fractions = [0.05, 0.1, 0.15, 0.2]
ks = [50, 100, 200, 300, 400]

In [8]:
def experiment(predictor, fraction, removed_edges): 
    scores = get_link_scores(predictor)
    for k in ks:
        precision, recall = evaluate(scores, removed_edges, k)

        print(f"Results for predictor {type(predictor).__name__} with fraction = {fraction} and k = {k}: precision = {precision}, recall {recall}")

def common_neighbors_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = CommonNeighbors(reduced_graph)
        experiment(predictor, fraction, removed_edges)

def jaccard_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = Jaccard(reduced_graph)
        experiment(predictor, fraction, removed_edges)

def adamic_ada_experiment(graph):
    for fraction in fractions:
        reduced_graph, removed_edges = remove_random_edges(graph, fraction)
        predictor = AdamicAdar(reduced_graph)
        experiment(predictor, fraction, removed_edges)

In [9]:
# Read data
data_path = "../Data/fb100"

In [10]:
princeton = nx.read_gml(f"{data_path}/Princeton12.gml")

In [11]:
common_neighbors_experiment(princeton)


Results for predictor CommonNeighbors with fraction = 0.05 and k = 50: precision = 0.38, recall 0.0012955134324287468
Results for predictor CommonNeighbors with fraction = 0.05 and k = 100: precision = 0.37, recall 0.0025228419473612438
Results for predictor CommonNeighbors with fraction = 0.05 and k = 200: precision = 0.28, recall 0.0038183553797899905
Results for predictor CommonNeighbors with fraction = 0.05 and k = 300: precision = 0.24666666666666667, recall 0.0050456838947224875
Results for predictor CommonNeighbors with fraction = 0.05 and k = 400: precision = 0.2325, recall 0.006341197327151234
Results for predictor CommonNeighbors with fraction = 0.1 and k = 50: precision = 0.44, recall 0.0007500340924587481
Results for predictor CommonNeighbors with fraction = 0.1 and k = 100: precision = 0.35, recall 0.0011932360561843721
Results for predictor CommonNeighbors with fraction = 0.1 and k = 200: precision = 0.29, recall 0.0019773626073912452
Results for predictor CommonNeighbors

In [12]:
jaccard_experiment(princeton)

Results for predictor Jaccard with fraction = 0.05 and k = 50: precision = 0.1, recall 0.00034092458748124913
Results for predictor Jaccard with fraction = 0.05 and k = 100: precision = 0.14, recall 0.0009545888449474976
Results for predictor Jaccard with fraction = 0.05 and k = 200: precision = 0.105, recall 0.0014318832674212465
Results for predictor Jaccard with fraction = 0.05 and k = 300: precision = 0.12, recall 0.002454657029864994
Results for predictor Jaccard with fraction = 0.05 and k = 400: precision = 0.145, recall 0.0039547252147824905
Results for predictor Jaccard with fraction = 0.1 and k = 50: precision = 0.1, recall 0.00017046229374062457
Results for predictor Jaccard with fraction = 0.1 and k = 100: precision = 0.12, recall 0.000409109504977499
Results for predictor Jaccard with fraction = 0.1 and k = 200: precision = 0.135, recall 0.0009204963861993728
Results for predictor Jaccard with fraction = 0.1 and k = 300: precision = 0.12, recall 0.001227328514932497
Results

In [13]:
adamic_ada_experiment(princeton)

Results for predictor AdamicAdar with fraction = 0.05 and k = 50: precision = 0.26, recall 0.0008864039274512478
Results for predictor AdamicAdar with fraction = 0.05 and k = 100: precision = 0.28, recall 0.0019091776898949953
Results for predictor AdamicAdar with fraction = 0.05 and k = 200: precision = 0.25, recall 0.0034092458748124915
Results for predictor AdamicAdar with fraction = 0.05 and k = 300: precision = 0.24, recall 0.004909314059729988
Results for predictor AdamicAdar with fraction = 0.05 and k = 400: precision = 0.24, recall 0.006545752079639984
Results for predictor AdamicAdar with fraction = 0.1 and k = 50: precision = 0.34, recall 0.0005795717987181235
Results for predictor AdamicAdar with fraction = 0.1 and k = 100: precision = 0.35, recall 0.0011932360561843721
Results for predictor AdamicAdar with fraction = 0.1 and k = 200: precision = 0.34, recall 0.002318287194872494
Results for predictor AdamicAdar with fraction = 0.1 and k = 300: precision = 0.33, recall 0.003

In [14]:
caltech = nx.read_gml(f"{data_path}/Caltech36.gml")

In [15]:
common_neighbors_experiment(caltech)

Results for predictor CommonNeighbors with fraction = 0.05 and k = 50: precision = 0.2, recall 0.01201923076923077
Results for predictor CommonNeighbors with fraction = 0.05 and k = 100: precision = 0.2, recall 0.02403846153846154
Results for predictor CommonNeighbors with fraction = 0.05 and k = 200: precision = 0.115, recall 0.027644230769230768
Results for predictor CommonNeighbors with fraction = 0.05 and k = 300: precision = 0.10666666666666667, recall 0.038461538461538464
Results for predictor CommonNeighbors with fraction = 0.05 and k = 400: precision = 0.0975, recall 0.046875
Results for predictor CommonNeighbors with fraction = 0.1 and k = 50: precision = 0.3, recall 0.009009009009009009
Results for predictor CommonNeighbors with fraction = 0.1 and k = 100: precision = 0.26, recall 0.015615615615615615
Results for predictor CommonNeighbors with fraction = 0.1 and k = 200: precision = 0.255, recall 0.03063063063063063
Results for predictor CommonNeighbors with fraction = 0.1 an

In [16]:
jaccard_experiment(caltech)

Results for predictor Jaccard with fraction = 0.05 and k = 50: precision = 0.16, recall 0.009615384615384616
Results for predictor Jaccard with fraction = 0.05 and k = 100: precision = 0.15, recall 0.018028846153846152
Results for predictor Jaccard with fraction = 0.05 and k = 200: precision = 0.105, recall 0.025240384615384616
Results for predictor Jaccard with fraction = 0.05 and k = 300: precision = 0.10333333333333333, recall 0.037259615384615384
Results for predictor Jaccard with fraction = 0.05 and k = 400: precision = 0.1025, recall 0.04927884615384615
Results for predictor Jaccard with fraction = 0.1 and k = 50: precision = 0.18, recall 0.005405405405405406
Results for predictor Jaccard with fraction = 0.1 and k = 100: precision = 0.23, recall 0.013813813813813814
Results for predictor Jaccard with fraction = 0.1 and k = 200: precision = 0.17, recall 0.02042042042042042
Results for predictor Jaccard with fraction = 0.1 and k = 300: precision = 0.17, recall 0.03063063063063063
R

In [17]:
adamic_ada_experiment(caltech)

Results for predictor AdamicAdar with fraction = 0.05 and k = 50: precision = 0.28, recall 0.016826923076923076
Results for predictor AdamicAdar with fraction = 0.05 and k = 100: precision = 0.26, recall 0.03125
Results for predictor AdamicAdar with fraction = 0.05 and k = 200: precision = 0.19, recall 0.04567307692307692
Results for predictor AdamicAdar with fraction = 0.05 and k = 300: precision = 0.15333333333333332, recall 0.055288461538461536
Results for predictor AdamicAdar with fraction = 0.05 and k = 400: precision = 0.135, recall 0.06490384615384616
Results for predictor AdamicAdar with fraction = 0.1 and k = 50: precision = 0.38, recall 0.011411411411411412
Results for predictor AdamicAdar with fraction = 0.1 and k = 100: precision = 0.33, recall 0.01981981981981982
Results for predictor AdamicAdar with fraction = 0.1 and k = 200: precision = 0.26, recall 0.03123123123123123
Results for predictor AdamicAdar with fraction = 0.1 and k = 300: precision = 0.25666666666666665, rec