In [1]:
import networkx as nx
import numpy as np
import random
from scipy.sparse import diags

In [2]:
def label_propagation(G, labels, max_iter=1000, tol=1e-6): 
    node_to_idx = {node: idx for idx, node in enumerate(G.nodes())}
    idx_to_node = {idx: node for node, idx in node_to_idx.items()}
    
    A = nx.adjacency_matrix(G)
    row_sum = np.array(A.sum(axis=1)).flatten()
    row_sum[row_sum == 0] = 1
    
    Dinv = diags(1.0 / row_sum)
    P = Dinv @ A 
    
    n_nodes = len(G.nodes)
    unique_labels = sorted(set(labels.values()))
    label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
    Y = np.zeros((n_nodes, len(unique_labels)))
    
    for node, label in labels.items():
        if label is not None:
            Y[node_to_idx[node], label_to_index[label]] = 1
    
    for _ in range(max_iter):
        Y_prev = Y.copy()
        Y = P.dot(Y)
        for node in labels:
            if labels[node] is not None:
                Y[node_to_idx[node], :] = Y_prev[node_to_idx[node], :]
        if np.linalg.norm(Y - Y_prev) < tol:
            break
    
    index_to_label = {idx: label for label, idx in label_to_index.items()}
    predicted_labels = {}
    for idx in range(n_nodes):
        node = idx_to_node[idx]
        if Y[idx].sum() > 0: 
            predicted_labels[node] = index_to_label[np.argmax(Y[idx, :])]
    
    return predicted_labels

def randomly_remove_attributes(node_attrs, remove_fraction):
    known_labels = {node: attr for node, attr in node_attrs.items() if attr is not None}
    
    total_to_remove = int(len(known_labels) * remove_fraction)
    removed_nodes = random.sample(list(known_labels.keys()), total_to_remove)
    incomplete_attrs = node_attrs.copy()
    for node in removed_nodes:
        incomplete_attrs[node] = None
    return incomplete_attrs, {node: attr for node, attr in node_attrs.items() if node not in removed_nodes and attr is not None}

def evaluate_label_propagation(G, node_attrs, remove_fractions):
    results = {}
    for fraction in remove_fractions:
        incomplete_attrs, labeled_attrs = randomly_remove_attributes(node_attrs, fraction)
            
        predicted_labels = label_propagation(G, labeled_attrs)
        
        missing_nodes = [node for node in incomplete_attrs if incomplete_attrs[node] is None]
        true_labels = [node_attrs[node] for node in missing_nodes if node in node_attrs and node_attrs[node] is not None]
        recovered_labels = [predicted_labels.get(node) for node in missing_nodes if node in predicted_labels]
        
        if true_labels and recovered_labels: 
            accuracy = sum(1 for y, y_pred in zip(true_labels, recovered_labels) if y == y_pred) / len(true_labels)
            mae = sum(abs(float(y) - float(y_pred)) for y, y_pred in zip(true_labels, recovered_labels)) / len(true_labels)
            results[f"{fraction * 100}% Removed"] = {"accuracy": accuracy, "mae": mae}
            
    
    return results

In [3]:
data_path = "../Data/fb100"
duke = nx.read_gml(f"{data_path}/Duke14.gml")
attributes_to_evaluate = ['dorm', 'major_index', 'gender']
remove_fractions = [0.1, 0.2, 0.3]

for attr_name in attributes_to_evaluate:
    attr_data = {node: duke.nodes[node][attr_name] 
                 for node in duke.nodes 
                 if duke.nodes[node].get(attr_name) is not None}
    
    results = evaluate_label_propagation(duke, attr_data, remove_fractions)
    
    print(f"Results for attribute: {attr_name}")
    for fraction, result in results.items():
        print(f"{fraction}: Accuracy = {result['accuracy']:.4f}, Mean Square Error: = {result['mae']}")
    print()

Results for attribute: dorm
10.0% Removed: Accuracy = 0.4914, Mean Square Error: = 62.8685540950455
20.0% Removed: Accuracy = 0.4891, Mean Square Error: = 63.79282465891865
30.0% Removed: Accuracy = 0.4815, Mean Square Error: = 64.95181940700809

Results for attribute: major_index
10.0% Removed: Accuracy = 0.2710, Mean Square Error: = 37.43073811931244
20.0% Removed: Accuracy = 0.2779, Mean Square Error: = 36.24355735219808
30.0% Removed: Accuracy = 0.1402, Mean Square Error: = 43.72270889487871

Results for attribute: gender
10.0% Removed: Accuracy = 0.6208, Mean Square Error: = 0.42163801820020225
20.0% Removed: Accuracy = 0.6139, Mean Square Error: = 0.4305204648812532
30.0% Removed: Accuracy = 0.6119, Mean Square Error: = 0.42890835579514824

