# Task B: Node2Vec on Superhero Graph

1. Imports and Setup

In [2]:
import os
import networkx as nx
import pandas as pd
import numpy as np
from node2vec import Node2Vec
from sklearn.decomposition import PCA
import plotly.express as px

2. Graph Construction

In [3]:
edges = [
    ('CAPTAIN AMERICA', 'IRON MAN'), ('CAPTAIN AMERICA', 'HAWKEYE'), ('CAPTAIN AMERICA', 'BLACK WIDOW'),
    ('CAPTAIN AMERICA', 'THOR'), ('CAPTAIN AMERICA', 'HULK'), ('CAPTAIN AMERICA', 'SPIDER-MAN'),
    ('IRON MAN', 'WAR MACHINE'), ('IRON MAN', 'BLACK WIDOW'), ('IRON MAN', 'THOR'), ('IRON MAN', 'HULK'),
    ('IRON MAN', 'SPIDER-MAN'), ('BLACK WIDOW', 'HAWKEYE'), ('BLACK WIDOW', 'HULK'), ('THOR', 'HULK'),
    ('WOLVERINE', 'BEAST'), ('WOLVERINE', 'COLOSSUS'), ('WOLVERINE', 'CYCLOPS'), ('WOLVERINE', 'ROGUE'),
    ('WOLVERINE', 'STORM'), ('WOLVERINE', 'JEAN GREY'), ('WOLVERINE', 'PROFESSOR X'),
    ('CYCLOPS', 'JEAN GREY'), ('CYCLOPS', 'PROFESSOR X'), ('JEAN GREY', 'PROFESSOR X'), ('JEAN GREY', 'STORM'),
    ('ROGUE', 'ICEMAN'),
    ('DAREDEVIL', 'ELEKTRA'), ('DAREDEVIL', 'PUNISHER'), ('LUKE CAGE', 'JESSICA JONES'),
    ('LUKE CAGE', 'IRON FIST'), ('JESSICA JONES', 'IRON FIST'),
    ('STAR-LORD', 'GAMORA'), ('STAR-LORD', 'DRAX'), ('STAR-LORD', 'ROCKET RACCOON'), ('STAR-LORD', 'GROOT'),
    ('GAMORA', 'DRAX'), ('GAMORA', 'ROCKET RACCOON'), ('ROCKET RACCOON', 'GROOT'), ('NEBULA', 'GAMORA'),
    ('MR. FANTASTIC', 'INVISIBLE WOMAN'), ('MR. FANTASTIC', 'THING'), ('MR. FANTASTIC', 'HUMAN TORCH'),
    ('INVISIBLE WOMAN', 'THING'), ('INVISIBLE WOMAN', 'HUMAN TORCH'), ('THING', 'HUMAN TORCH'),
    ('BLACK PANTHER', 'STORM'),
    ('PROFESSOR X', 'CAPTAIN AMERICA'), ('SPIDER-MAN', 'DAREDEVIL'), ('HULK', 'ROCKET RACCOON'),
    ('THING', 'HULK'), ('STORM', 'THOR')
]

graph = nx.Graph()
graph.add_edges_from(edges)

print(f"Graph constructed successfully with {graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges.")

Graph constructed successfully with 34 nodes and 51 edges.


3. Train Node2Vec Model

In [4]:
node2vec = Node2Vec(
    graph,
    dimensions=128,
    walk_length=100,
    num_walks=10,
    workers=os.cpu_count()
)

model = node2vec.fit(window=5, min_count=1, sg=1)
print("Node2Vec model trained successfully.")

Computing transition probabilities:   0%|          | 0/34 [00:00<?, ?it/s]

Node2Vec model trained successfully.


4. Task B.a: Query for Similar Nodes

In [5]:
query_node = 'HULK'

try:
    top_10_similar = model.wv.most_similar(query_node, topn=10)
    print(f"\nTop 10 superheroes most similar to '{query_node}':")
    for i, (name, score) in enumerate(top_10_similar, 1):
        print(f"{i}. {name} (Similarity: {score:.4f})")
except KeyError:
    print(f"ERROR: Query node '{query_node}' not found in the model's vocabulary.")



Top 10 superheroes most similar to 'HULK':
1. THOR (Similarity: 0.9135)
2. CAPTAIN AMERICA (Similarity: 0.9087)
3. BLACK WIDOW (Similarity: 0.9006)
4. WAR MACHINE (Similarity: 0.8980)
5. IRON MAN (Similarity: 0.8827)
6. HAWKEYE (Similarity: 0.8818)
7. ROCKET RACCOON (Similarity: 0.8027)
8. THING (Similarity: 0.7970)
9. GROOT (Similarity: 0.7780)
10. GAMORA (Similarity: 0.7478)


5. Task B.b: Visualize Embeddings with PCA

In [6]:
nodes = model.wv.index_to_key
embeddings = model.wv.vectors

pca = PCA(n_components=2)
embeddings_2d = pca.fit_transform(embeddings)

df = pd.DataFrame({
    'name': nodes,
    'x': embeddings_2d[:, 0],
    'y': embeddings_2d[:, 1]
})

fig = px.scatter(
    df,
    x='x',
    y='y',
    text='name',
    title='2D PCA Visualization of Superhero Node Embeddings'
)
fig.update_traces(textposition='top center')
fig.update_layout(height=800, xaxis_title="Principal Component 1", yaxis_title="Principal Component 2")

print("\nGenerating interactive plot")
fig.show()


Generating interactive plot
