In [None]:
# install StellarGraph if running on Google Colab
import sys
if 'google.colab' in sys.modules:
  %pip install -q stellargraph[demos]==1.2.1

In [None]:
# verify that we're using the correct version of StellarGraph for this notebook
import stellargraph as sg

try:
    sg.utils.validate_notebook_version("1.2.1")
except AttributeError:
    raise ValueError(
        f"This notebook requires StellarGraph version 1.2.1, but a different version {sg.__version__} is installed.  Please see <https://github.com/stellargraph/stellargraph/issues/1172>."
    ) from None

In [None]:
from stellargraph import StellarGraph
import stellargraph as sg
from stellargraph.data import EdgeSplitter
from stellargraph.mapper import FullBatchNodeGenerator, FullBatchLinkGenerator
from stellargraph.layer import GCN, LinkEmbedding

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn import preprocessing, feature_extraction, model_selection

from stellargraph import globalvar
from stellargraph import datasets
from IPython.display import display, HTML
%matplotlib inline

In [None]:
import collections
import numpy as np
import pandas as pd
import pickle
from random import uniform as random_uniform
from scipy.spatial import distance
from scipy.stats import spearmanr
from sklearn.preprocessing import StandardScaler

<table><tr><td>Run the latest release of this notebook:</td><td><a href="https://mybinder.org/v2/gh/stellargraph/stellargraph/master?urlpath=lab/tree/demos/basics/loading-pandas.ipynb" alt="Open In Binder" target="_parent"><img src="https://mybinder.org/badge_logo.svg"/></a></td><td><a href="https://colab.research.google.com/github/stellargraph/stellargraph/blob/master/demos/basics/loading-pandas.ipynb" alt="Open In Colab" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg"/></a></td></tr></table>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cate_to_idx = {'Nightlife Spot': 0,
              'Food': 1,
              'Travel & Transport': 2,
              'Professional & Other Places': 3,
              'Shop & Service': 4,
              'Residence': 5,
              'Outdoors & Recreation': 6,
              'Arts & Entertainment': 7,
              'College & University': 8,
              'Event': 9}

In [None]:
city = 'chicago'
period = 'midday'

In [None]:
adj_matrix = pd.read_pickle('/content/drive/My Drive/Colab Notebooks/' + city + '_' + period + '_adj_matrix_business.pkl')

In [None]:
cate_features = pd.read_pickle('/content/drive/My Drive/Colab Notebooks/cate_features_' + city + '.pkl')

In [None]:
postal_set = set(cate_features.index)

In [None]:
scaler = StandardScaler()
cate_features_array = scaler.fit_transform(cate_features)

In [None]:
for cate in cate_to_idx:
  idx = cate_to_idx[cate]
  cate_features[cate] = cate_features_array.T[idx]

In [None]:
postal_nodes = cate_features

In [None]:
adj_matrix

In [None]:
all_selected_edges = pd.read_pickle('/content/drive/My Drive/Colab Notebooks/' + city + '_all_selected_edges.pkl')

In [None]:
out_postals = []
in_postals = []
postal_pair_to_weight = {}
for out_postal, record in adj_matrix.iterrows():
  if out_postal not in postal_set:
    continue
  for in_postal in postal_set:
    if (out_postal, in_postal) not in all_selected_edges:
      continue
    weight = record[in_postal]
    out_postals.append(out_postal)
    in_postals.append(in_postal)
    postal_pair_to_weight[(out_postal, in_postal)] = weight

In [None]:
print(len(out_postals), len(in_postals), len(postal_pair_to_weight))
assert len(postal_pair_to_weight) == len(all_selected_edges)

In [None]:
# postal_pair_to_weight[('60637', '60655')]

In [None]:
postal_edges = pd.DataFrame(
    {
        "source": out_postals,
        "target": in_postals,
    }
)
postal_edges

In [None]:
G = StellarGraph(
    {"corner": postal_nodes}, {"line": postal_edges}
)
print(G.info())

In [None]:
f = open('/content/drive/My Drive/Colab Notebooks/G_' + city + '_' + period + '.pkl', "wb")
pickle.dump(G, f)
f.close()

In [None]:
edge_ids = []
labels = []
for postals, weight in postal_pair_to_weight.items():
  edge_id = [postals[0], postals[1]]
  edge_ids.append(edge_id)
  labels.append(weight)
edge_ids = np.asarray(edge_ids)
labels = np.asarray(labels)

In [None]:
train_gen = FullBatchLinkGenerator(G, method="gcn")
train_flow = train_gen.flow(edge_ids, labels)

In [None]:
num_layers = 4
gcn = GCN(
    layer_sizes=[32] * num_layers, activations=["relu"] * num_layers, generator=train_gen, dropout=0.0
)

In [None]:
x_inp, x_out = gcn.in_out_tensors()

In [None]:
before_prediction = LinkEmbedding(activation="relu", method="concat")(x_out)
before_prediction.shape

In [None]:
norm_layer = tf.keras.layers.LayerNormalization()
norm_before_prediction = norm_layer(before_prediction)
norm_before_prediction.shape

Add layer:

https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer

In [None]:
prediction = layers.Dense(units=1, activation="linear")(norm_before_prediction)
prediction.shape

In [None]:
prediction = keras.layers.Reshape((-1,))(prediction)
prediction.shape

Should select a suitable optimizer:

https://keras.io/api/optimizers/

In [None]:
model = keras.Model(inputs=x_inp, outputs=prediction)

model.compile(
    optimizer=keras.optimizers.Adagrad(learning_rate=0.02),
    loss=keras.losses.MeanSquaredError(),
    metrics=[keras.metrics.MeanSquaredError()]
)

In [None]:
init_train_metrics = model.evaluate(train_flow)

print("\nTrain Set Metrics of the initial (untrained) model:")
for name, val in zip(model.metrics_names, init_train_metrics):
    print("\t{}: {:0.4f}".format(name, val))

In [None]:
epochs = 40000
history = model.fit(
    train_flow, epochs=epochs, verbose='auto', shuffle=False
)

In [None]:
sg.utils.plot_history(history)

In [None]:
all_predictions = model.predict(train_gen.flow(G.edges()))

In [None]:
all_predictions

In [None]:
len(all_predictions[0])

In [None]:
before_prediction_model = keras.Model(inputs=x_inp, outputs=before_prediction)

In [None]:
before_prediction_features = before_prediction_model.predict(train_gen.flow(G.edges()))

In [None]:
before_prediction_features.shape

In [None]:
before_prediction_features = before_prediction_features[0]
before_prediction_features.shape

In [None]:
np.array_equiv(before_prediction_features[1], before_prediction_features[100])

In [None]:
x_out_model = keras.Model(inputs=x_inp, outputs=x_out)

In [None]:
x_out_features = x_out_model.predict(train_gen.flow(G.edges()))

In [None]:
x_out_features = x_out_features[0]
x_out_features.shape