In [18]:
from abc import ABC
from abc import abstractmethod
import networkx as nx
import numpy as np
import progressbar
import random

In [5]:
class LinkPrediction(ABC):
    def __init__(self, graph):
        self.graph = graph
        self.N = len(graph)
    
    def neighbors(self, v):
        neighbors_list = self.graph.neighbors(v)
        return list(neighbors_list)

    @abstractmethod
    def fit(self):
        raise NotImplementedError("Fit must be implemented")

In [22]:
class CommonNeighbors(LinkPrediction):
    def __init__(self, graph):
        super(CommonNeighbors, self).__init__(graph)

    def fit(self):
        scores = {}
        nodes = list(self.graph.nodes())

        for u in nodes:
            u_neighbors = set(self.neighbors(u))
            for v in nodes:
                if u >= v or self.graph.has_edge(u, v):
                    continue
                v_neighbors = set(self.neighbors(v))
                scores[(u, v)] = len(u_neighbors.intersection(v_neighbors))

        return scores

In [24]:
def remove_random_edges(graph, fraction):
    edges = list(graph.edges())
    removed_edges = set(random.sample(edges, int(fraction * len(edges))))
    return graph.remove_edges_from(removed_edges), removed_edges

def get_link_scores(predictor):
    scores = predictor.fit()
    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

def evaluate(scores, removed_edges, k):
    top_pairs = set(pair for pair, _ in scores[:k])
    correct = len(removed_edges.intersection(top_pairs))
    precision = correct / k
    recall = correct / len(removed_edges)

    return precision, recall

In [15]:
# Read data
data_path = "../Data/fb100"

caltech = nx.read_gml(f"{data_path}/Caltech36.gml")
mit = nx.read_gml(f"{data_path}/MIT8.gml")
john_hopkins = nx.read_gml(f"{data_path}/Johns Hopkins55.gml")

In [16]:
fractions = [0.05, 0.1, 0.15, 0.2]
ks = [50, 100, 200, 300, 400]

In [25]:
for fraction in fractions:
    print(f"fraction: {fraction}")
    for k in ks:
        reduced_graph, removed_edges = remove_random_edges(caltech, fraction)
        predictor = CommonNeighbors(caltech)
        scores = get_link_scores(predictor)
        precision, recall = evaluate(scores, removed_edges, k)

        print(f"k: {k}")
        print(f"precision: {precision}")
        print(f"recall: {recall}")
    print("")

fraction: 0.05
k: 50
precision: 0.16
recall: 0.011204481792717087
k: 100
precision: 0.09
recall: 0.01327433628318584
k: 200
precision: 0.115
recall: 0.03571428571428571
k: 300
precision: 0.07333333333333333
recall: 0.03594771241830065
k: 400
precision: 0.065
recall: 0.04475043029259897

fraction: 0.1
k: 50
precision: 0.16
recall: 0.007239819004524887
k: 100
precision: 0.11
recall: 0.011066398390342052
k: 200
precision: 0.075
recall: 0.01675977653631285
k: 300
precision: 0.09333333333333334
recall: 0.034782608695652174
k: 400
precision: 0.0525
recall: 0.028965517241379312

fraction: 0.15
k: 50
precision: 0.08
recall: 0.0040858018386108275
k: 100
precision: 0.04
recall: 0.004807692307692308
k: 200
precision: 0.025
recall: 0.007072135785007072
k: 300
precision: 0.023333333333333334
recall: 0.011647254575707155
k: 400
precision: 0.0275
recall: 0.021526418786692758

fraction: 0.2
k: 50
precision: 0.02
recall: 0.0017271157167530224
k: 100
precision: 0.02
recall: 0.004310344827586207
k: 200
p