In [1]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import torch
import numpy as np
import networkx as nx
import dgl
from cycler import cycler
from umap import UMAP
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from graph_env.environ import GraphEnv
from graph_env.agent import DQN, train, eval, noise_decay


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
plt.rcdefaults()
plt.rcParams['axes.facecolor'] = 'lightgrey'
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.color'] = 'w'
plt.rcParams['axes.prop_cycle'] = cycler(color='bgrcmyk')
plt.rcParams['font.family'] = 'arial'

In [None]:
### Configuration ###

### Environment/Graph ###
n_components = 25
num_communities = 10
n_iter_no_change = 10
max_iter = 100
min_change = 0.003
# min_change = 0.002
# min_change = 0.0025
# min_change = 0.005

### Agent ###
hidden_dim = 400
eta = 0.0001
mem_size = 2048
num_eval_episodes = 10
save_path = './model_citeseer_3'

### Training ###
burn_in = 512
train_steps = 9
update_freq = 3
batch_size = 16
gamma = 0.99
tau = 0.05
exploration_rate = 0.999
exploration_decay = 1e-5
num_episodes = 2_000


In [4]:
# expl = float(exploration_rate)
# n_eps = int(num_episodes)
# gamma = float(exploration_decay)
# trace = []
# for episode in range(n_eps):
#     expl = noise_decay(expl, episode, gamma)
#     trace.append(expl)
# plt.plot(trace);
# del expl, n_eps, gamma, trace, episode

In [5]:
graph = dgl.data.CiteseerGraphDataset()[0]
full_features = csr_matrix(graph.ndata['feat'])
graph.ndata['feat'] = torch.tensor(
    TruncatedSVD(n_components=n_components).fit_transform(full_features)
).to(graph.ndata['feat'])

  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [6]:
env = GraphEnv(graph, num_communities=num_communities,
               n_iter_no_change=n_iter_no_change, max_iter=max_iter, min_change=min_change)
adapter = env.get_adapter(features=graph.ndata['feat'])

In [7]:
agent = DQN(
    adapter,
    hidden_dim=hidden_dim,
    eta=eta,
    mem_size=mem_size,
)

---
### Training

In [None]:
preds, eval_history = eval(env, agent, num_episodes=num_eval_episodes, verbose=False)
np.mean([env.compute_modularity(p) for p in preds])

In [None]:
train_history = train(
    env=env,
    agent=agent,
    burn_in=burn_in,
    train_steps=train_steps,
    update_freq=update_freq,
    batch_size=batch_size,
    gamma=gamma,
    tau=tau,
    exploration_rate=exploration_rate,
    exploration_decay=exploration_decay,
    num_episodes=num_episodes,
    verbose=True,
    save_path=save_path,
)

In [None]:
# train_history = pd.read_csv(os.path.join(save_path, 'history.csv'))

In [None]:
train_history.groupby('episode')['reward'].mean().plot();
plt.ylabel('Average Reward per Episode')
# train_history.groupby('episode')['reward'].sum().plot();
# plt.ylabel('Total Reward per Episode')
plt.xlabel('Episode')
plt.tight_layout()
plt.show()

In [None]:
# train_history.groupby('episode')['exploration'].mean().plot();
# plt.ylabel('Exploration Rate per Episode')
# plt.xlabel('Episode')
# plt.tight_layout()
# plt.show()

In [None]:
train_history.groupby('episode').size().plot();
plt.ylabel('Steps per Episode')
plt.xlabel('Episode')
plt.tight_layout()
plt.show()

In [None]:
train_history.groupby('episode')['modularity'].apply(lambda x: list(x)[-1]).plot();
plt.ylabel('Modularity per Episode')
plt.xlabel('Episode')
plt.tight_layout()
plt.show()

---
### Evaluation

In [None]:
agent.load(save_path)

In [None]:
preds, eval_history = eval(env, agent, num_episodes=10, verbose=False)
np.mean([env.compute_modularity(p) for p in preds])

In [None]:
lab = graph.ndata['label'].numpy()
env.compute_modularity(lab)

In [None]:
pred = max(preds, key=lambda p: env.compute_modularity(p))
env.compute_modularity(pred)

In [None]:
adjacency_matrix = graph.adj().to_dense().numpy()
degree_matrix = np.diag(adjacency_matrix.sum(1))
laplacian_matrix = degree_matrix - adjacency_matrix
laplacian_matrix = csr_matrix(laplacian_matrix)
laplacian_encoded = TruncatedSVD(n_components=graph.ndata['feat'].shape[1]).fit_transform(laplacian_matrix)
del adjacency_matrix, degree_matrix

In [None]:
reducer = UMAP(n_components=2, n_neighbors=25, random_state=42)
embedding = reducer.fit_transform(full_features)
embedding += reducer.fit_transform(laplacian_matrix)/10
# embedding = reducer.fit_transform(laplacian_matrix)
# embedding = reducer.fit_transform(np.hstack([graph.ndata['feat'].numpy(), laplacian_encoded]))

In [None]:
for p in np.unique(pred):
    plt.plot(*embedding[pred==p].T, '.')
plt.show()

In [None]:
pos = {i: embedding[i] for i in range(graph.num_nodes())}
colors = 'bgrcmy'
G = nx.Graph(graph.to_networkx())
plt.rcdefaults() #
fig, ax = plt.subplots()
nx.draw_networkx_edges(G, pos=pos, alpha=0.1, ax=ax)
for i, p in enumerate(np.unique(pred)):
    nx.draw_networkx_nodes(G.subgraph(np.where(pred==p)[0]), pos=pos, ax=ax, node_size=10, node_color=colors[i], alpha=0.55)

In [None]:
colors = 'byrcmg'
G = nx.Graph(graph.to_networkx())
plt.rcdefaults() #
fig, ax = plt.subplots()
nx.draw_networkx_edges(G, pos=pos, alpha=0.1, ax=ax)
for i, l in enumerate(np.unique(lab)):
    nx.draw_networkx_nodes(G.subgraph(np.where(lab==l)[0]), pos=pos, ax=ax, node_size=10, node_color=colors[i], alpha=0.55)

---
### Baseline

In [None]:
G = graph.to_networkx()

In [None]:
louvain = np.zeros(graph.num_nodes(), dtype=int)
for idx, comm in enumerate(nx.community.louvain_communities(G, resolution=0.0001)):
    for node in comm:
        louvain[node] = idx
idx

In [None]:
env2 = GraphEnv(env.graph, num_communities=idx+1)
env2.compute_modularity(louvain)

---