In [1]:
# Data preparation
#We create a dataset consisting in multiple random graphs and associated data features.

using GraphNeuralNetworks, Graphs, Flux, CUDA, Statistics, MLUtils
using Flux: DataLoader

all_graphs = GNNGraph[]

for _ in 1:1000
    g = rand_graph(10, 40,  
            ndata=(; x = randn(Float32, 16,10)),  # input node features
            gdata=(; y = randn(Float32)))         # regression target   
    push!(all_graphs, g)
end

In [2]:
#Model building
#We concisely define our model as a GNNChain containing two graph convolutional layers. 
#If CUDA is available, our model will live on the gpu.

device = CUDA.functional() ? Flux.gpu : Flux.cpu;

model = GNNChain(GCNConv(16 => 64),
                BatchNorm(64),     # Apply batch normalization on node features (nodes dimension is batch dimension)
                x -> relu.(x),     
                GCNConv(64 => 64, relu),
                GlobalPool(mean),  # aggregate node-wise features into graph-wise features
                Dense(64, 1)) |> device

opt = Flux.setup(Adam(1f-4), model)

(layers = ((weight = [32mLeaf(Adam(0.0001, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0], (0.9, 0.999))[32m)[39m, bias = [32mLeaf(Adam(0.0001, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, σ = (), add_self_loops = (), use_edge_weight = ()), (λ = (), β = [32mLeaf(Adam(0.0001, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, γ = [32mLeaf(Adam(0.0001, (0.9, 0.999), 1.0e-8

In [3]:
#Training
#The  Flux training pipeline is used for our dataset. 
#We use Flux's DataLoader to iterate over mini-batches of graphs that are glued together into a 
#single GNNGraph using the Flux.batch method. 

In [4]:
train_graphs, test_graphs = MLUtils.splitobs(all_graphs, at=0.8)

train_loader = DataLoader(train_graphs, 
                batchsize=32, shuffle=true, collate=true)
test_loader = DataLoader(test_graphs, 
                batchsize=32, shuffle=false, collate=true)

loss(model, g::GNNGraph) = mean((vec(model(g, g.x)) - g.y).^2)

loss(model, loader) = mean(loss(model, g |> device) for g in loader)

for epoch in 1:100
    for g in train_loader
        g = g |> device
        grad = gradient(model -> loss(model, g), model)
        Flux.update!(opt, model, grad[1])
    end

    @info (; epoch, train_loss=loss(model, train_loader), test_loss=loss(model, test_loader))
end

┌ Info: (epoch = 1, train_loss = 1.0213763f0, test_loss = 1.1829649f0)
└ @ Main c:\Users\victo\OneDrive\Documents\Desktop_New\Dissertation_Project_Parts\eSGC_Code_Parts\jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_W3sZmlsZQ==.jl:19
┌ Info: (epoch = 2, train_loss = 1.0450398f0, test_loss = 1.2269471f0)
└ @ Main c:\Users\victo\OneDrive\Documents\Desktop_New\Dissertation_Project_Parts\eSGC_Code_Parts\jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_W3sZmlsZQ==.jl:19
┌ Info: (epoch = 3, train_loss = 1.0291452f0, test_loss = 1.2219772f0)
└ @ Main c:\Users\victo\OneDrive\Documents\Desktop_New\Dissertation_Project_Parts\eSGC_Code_Parts\jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_W3sZmlsZQ==.jl:19
┌ Info: (epoch = 4, train_loss = 1.0114286f0, test_loss = 1.2103567f0)
└ @ Main c:\Users\victo\OneDrive\Documents\Desktop_New\Dissertation_Project_Parts\eSGC_Code_Parts\jl_notebook_cell_df34fa98e69747e1a8f8a730347b8e2f_W3sZmlsZQ==.jl:19
┌ Info: (epoch = 5, train_loss = 0.9969785f0, test_l

In [5]:
# We now derived Heterogenity of the eSCG using Synthethic Dataset

#Heterogeneous graphs (also called heterographs), are graphs where each node has a type, 
#that we denote with symbols such as :user and :location. 
#Relations such as :rate or :like can connect nodes of different types.
# We call a 3 parameters (source_node_type, relation_type, target_node_type) 
#the type of a edge, e.g. (:user, :rate, :location).

# We use GraphNeuralNetworks.jl heterographs in GNNHeteroGraph to implement the features

g = GNNHeteroGraph()


GNNHeteroGraph:
  num_nodes: Dict()
  num_edges: Dict()

In [6]:
# We define the Heterogenity using the following synthetic data
vic = GNNHeteroGraph((:user, :like, :actor) => ([1,2,2,3], [1,3,2,9]),
                          (:user, :rate, :location) => ([1,1,2,3], [7,13,5,7]))

GNNHeteroGraph:
  num_nodes: Dict(:actor => 9, :location => 13, :user => 3)
  num_edges: Dict((:user, :like, :actor) => 4, (:user, :rate, :location) => 4)

In [7]:
#We decided to assign the following
vic = GNNHeteroGraph((:user, :rate, :location) => ([1,1,2,3], [7,13,5,7]))

GNNHeteroGraph:
  num_nodes: Dict(:location => 13, :user => 3)
  num_edges: Dict((:user, :rate, :location) => 4)

In [8]:
#We can add new relations are follows using add_edges.

vic = add_edges(g, (:user, :like, :actor) => ([1,2,3,3,3], [3,5,1,9,4]))


GNNHeteroGraph:
  num_nodes: Dict(:actor => 9, :user => 3)
  num_edges: Dict((:user, :like, :actor) => 5)

In [9]:
#Lets add more add_edges
vic = add_edges(g, (:user, :like, :actor) => ([1,5,4,3,3], [3,6,1,8,4]))

GNNHeteroGraph:
  num_nodes: Dict(:actor => 8, :user => 5)
  num_edges: Dict((:user, :like, :actor) => 5)

In [10]:
# Now generate the new  generating random heterographs.

vic = rand_bipartite_heterograph((10, 15), 20)

GNNHeteroGraph:
  num_nodes: Dict(:A => 10, :B => 15)
  num_edges: Dict((:A, :to, :B) => 20, (:B, :to, :A) => 20)

In [11]:
#Let us perform simple queries on the heterographs.
GNNHeteroGraph((:user, :rate, :location) => ([1,1,2,3], [7,13,5,7]))

GNNHeteroGraph:
  num_nodes: Dict(:location => 13, :user => 3)
  num_edges: Dict((:user, :rate, :location) => 4)

In [12]:
vic.num_nodes

Dict{Symbol, Int64} with 2 entries:
  :A => 10
  :B => 15

In [13]:
vic.num_edges

Dict{Tuple{Symbol, Symbol, Symbol}, Int64} with 2 entries:
  (:A, :to, :B) => 20
  (:B, :to, :A) => 20

In [19]:
# source and target node for a given relation
edge_index(g, (:user, :rate, :location))

KeyError: KeyError: key (:user, :rate, :location) not found

In [15]:
# node types
vic.ntypes

2-element Vector{Symbol}:
 :A
 :B

In [16]:
# edge types
vic.etypes

2-element Vector{Tuple{Symbol, Symbol, Symbol}}:
 (:A, :to, :B)
 (:B, :to, :A)

In [17]:
vic[:user].x = rand(Float32, 64, 3);

KeyError: KeyError: key :user not found

In [18]:
g[:user].x = rand(Float32, 64, 3);

KeyError: KeyError: key :user not found

In [20]:
g[:location].z = rand(Float32, 64, 13);

KeyError: KeyError: key :location not found

In [23]:
g[:user, :rate, :location].e = rand(Float32, 64, 4);

KeyError: KeyError: key (:user, :rate, :location) not found

In [24]:
g

GNNHeteroGraph:
  num_nodes: Dict()
  num_edges: Dict()

In [22]:
#heterographs can be batched together.
gs = [rand_bipartite_heterograph((5, 10), 20) for _ in 1:32];
Flux.batch(gs)

GNNHeteroGraph:
  num_nodes: Dict(:A => 160, :B => 320)
  num_edges: Dict((:A, :to, :B) => 640, (:B, :to, :A) => 640)
  num_graphs: 32

In [21]:
#Batching is automatically performed by the DataLoader iterator when the collate option is set to true.
using Flux: DataLoader

data = [rand_bipartite_heterograph((5, 10), 20, 
            ndata=Dict(:A=>rand(Float32, 3, 5))) 
        for _ in 1:320];

train_loader = DataLoader(data, batchsize=16, shuffle=true, collate=true)

for g in train_loader
    @assert g.num_graphs == 16
    @assert g.num_nodes[:A] == 80
    @assert size(g.ndata[:A].x) == (3, 80)    
    # ...
end

In [25]:
#Hetereogeneous Graphs
#The Hetereogeneous Graphs are defined as follows:
#GNNHeteroGraph(data; [ndata, edata, gdata, num_nodes])
#GNNHeteroGraph(pairs...; [ndata, edata, gdata, num_nodes])


#Constructor Arguments definition

#data: A dictionary or an iterable object that maps (source_type, edge_type, target_type) triples to (source, target) index vectors (or to (source, target, weight) if also edge weights are present).
#pairs: Passing multiple relations as pairs is equivalent to passing data=Dict(pairs...).
#ndata: Node features. A dictionary of arrays or named tuple of arrays. The size of the last dimension of each array must be given by g.num_nodes.
#edata: Edge features. A dictionary of arrays or named tuple of arrays. Default nothing. The size of the last dimension of each array must be given by g.num_edges. Default nothing.
#gdata: Graph features. An array or named tuple of arrays whose last dimension has size num_graphs. Default nothing.
#num_nodes: The number of nodes for each type. If not specified, inferred from data. Default nothing.

In [26]:
using GraphNeuralNetworks

nA, nB = 10, 20;

num_nodes = Dict(:A => nA, :B => nB);

edges1 = (rand(1:nA, 20), rand(1:nB, 20))

edges2 = (rand(1:nB, 30), rand(1:nA, 30))

data = ((:A, :rel1, :B) => edges1, (:B, :rel2, :A) => edges2);

hg = GNNHeteroGraph(data; num_nodes)

hg.num_edges

# Let's add some node features

ndata = Dict(:A => (x = rand(2, nA), y = rand(3, num_nodes[:A])),
                    :B => rand(10, nB));

hg = GNNHeteroGraph(data; num_nodes, ndata)

# Access features of nodes of type :A

hg.ndata[:A].x

2×10 Matrix{Float64}:
 0.241043  0.108609  0.203528  0.794064  …  0.276927  0.440398  0.0284367
 0.633331  0.379275  0.18035   0.635819     0.247961  0.491059  0.260467

In [27]:
# HeteroGraphConv 
# Heterogeneous Graph Convolutions
# Heterogeneous graph convolutions are implemented in the type HeteroGraphConv. 
#HeteroGraphConv relies on standard graph convolutional layers to perform message passing on the different relations.

# It is defined as the following

#HeteroGraphConv(itr; aggr = +)
#HeteroGraphConv(pairs...; aggr = +)

#A convolutional layer for heterogeneous graphs.

    #The itr argument is an iterator of pairs of the form edge_t => layer, where edge_t is a 3-tuple of the form (src_node_type, edge_type, dst_node_type), and layer is a convolutional layers for homogeneous graphs.
    
    #Each convolution is applied to the corresponding relation. Since a node type can be involved in multiple relations, the single convolution outputs have to be aggregated using the aggr function. The default is to sum the outputs.


#The Forward Arguments is defined as follows

    #gvic::GNNHeteroGraph: The input graph.
    #x::Union{NamedTuple,Dict}: The input node features. The keys are node types and the values are node feature tensors.

   

In [28]:
#We now implement with synthetic dataset
vic = rand_bipartite_heterograph((10, 15), 20)

GNNHeteroGraph:
  num_nodes: Dict(:A => 10, :B => 15)
  num_edges: Dict((:A, :to, :B) => 20, (:B, :to, :A) => 20)

In [29]:
x = (A = rand(Float32, 64, 10), B = rand(Float32, 64, 15));

In [30]:
layer = HeteroGraphConv((:A, :to, :B) => GraphConv(64 => 32, relu),
                               (:B, :to, :A) => GraphConv(64 => 32, relu));

In [31]:
y = layer(vic, x); # output is a named tuple

In [32]:
size(y.A) == (32, 10) && size(y.B) == (32, 15)

true

In [33]:
# Proceed to implement the Temporary nature of Graphs
#https://arxiv.org/pdf/2101.00414
#https://arxiv.org/pdf/1006.5169

#Temporal Graphs is a graph with diverse methods, representing time varying graphs with time varying features.

#It is presented by 
#TemporalSnapshotsGNNGraph(snapshots::AbstractVector{<:GNNGraph})

#TemporalSnapshotsGNNGraph can store the feature array associated to the graph itself as a DataStore object, and it uses the DataStore objects of each snapshot for the node and edge features. 
#The features can be passed at construction time or added later.

#The construction argument is the varying degree of time
#snapshot: a vector of snapshots, where each snapshot must have the same number of nodes.

In [34]:
using GraphNeuralNetworks

In [35]:
snapshots = [rand_graph(10,20) for i in 1:40];

In [36]:
vic1 = TemporalSnapshotsGNNGraph(snapshots)

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
  num_edges: [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]
  num_snapshots: 40

In [37]:
vic1.tgdata.x = rand(4); # add temporal graph feature

In [38]:
vic1

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
  num_edges: [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20]
  num_snapshots: 40
  tgdata:
	x = 4-element Vector{Float64}

In [39]:
snapshots = [rand_graph(10,20), rand_graph(10,14), rand_graph(10,22)];

In [40]:
#Lets add a new snapshot at time t=40
new_vic1 = add_snapshot(vic1, 40, rand_graph(10, 16)) # add a new snapshot at time 3

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
  num_edges: [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 16, 20]
  num_snapshots: 41
  tgdata:
	x = 4-element Vector{Float64}

In [41]:
using GraphNeuralNetworks

snapshots = [rand_graph(10,20), rand_graph(10,14), rand_graph(10,22)];

In [42]:
vic2 = TemporalSnapshotsGNNGraph(snapshots)

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10]
  num_edges: [20, 14, 22]
  num_snapshots: 3

In [43]:
new_vic2 = remove_snapshot(vic2, 2) # remove snapshot at time 2

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10]
  num_edges: [20, 22]
  num_snapshots: 2

In [44]:
#Random generators on TemporalSnapshotsGNNGraph 

#rand_temporal_radius_graph(number_nodes::Int, 
                           #number_snapshots::Int,
                           #speed::AbstractFloat,
                           #r::AbstractFloat;
                           #self_loops = false,
                           #dir = :in,
                           #kws...)

In [45]:
#Create a random temporal graph given number_nodes nodes and number_snapshots snapshots. 
#First, the positions of the nodes are randomly generated in the unit square. 
#Two nodes are connected if their distance is less than a given radius r. 
#    Each following snapshot is obtained by applying the same construction to new positions obtained as follows. 
#    For each snapshot, the new positions of the points are determined by applying random independent displacement 
#    vectors to the previous positions. The direction of the displacement is chosen uniformly at random 
#   and its length is chosen uniformly in [0, speed]. Then the connections are recomputed. 
#    If a point happens to move outside the boundary, its position is updated as if it had bounced off the boundary.


#Definitions

#number_nodes: The number of nodes of each snapshot.
#number_snapshots: The number of snapshots.
#speed: The speed to update the nodes.
#r: The radius of connection.
#self_loops: If true, consider the node itself among its neighbors, in which case the graph will contain self-loops.
#dir: The direction of the edges. If dir=:in edges go from the neighbors to the central node. If dir=:out we have the opposite direction.
#kws: Further keyword arguments will be passed to the GNNGraph constructor of each snapshot.

In [46]:
#Lets start
n, snaps, s, r = 10, 5, 0.1, 1.5;

In [47]:
vic3 = rand_temporal_radius_graph(n,snaps,s,r) # complete graph at each snapshot

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10, 10, 10]
  num_edges: [90, 90, 90, 90, 90]
  num_snapshots: 5

In [48]:
#Lets implement random temporal hyperbolic graph
#rand_temporal_hyperbolic_graph(number_nodes::Int, 
 #                              number_snapshots::Int;
 #                              α::Real,
 #                             R::Real,
 #                            speed::Real,
 #                           ζ::Real=1,
 #                            self_loop = false,
 #                            kws...)



 #Create a random temporal graph given number_nodes nodes and number_snapshots snapshots. 
 #First, the positions of the nodes are generated with a quasi-uniform distribution (depending on the parameter α)
 # in hyperbolic space within a disk of radius R. Two nodes are connected if their hyperbolic distance is less than R. 
 #Each following snapshot is created in order to keep the same initial distribution.


# Definitions:


#number_nodes: The number of nodes of each snapshot.
#number_snapshots: The number of snapshots.
#α: The parameter that controls the position of the points. If α=ζ, the points are uniformly distributed on the disk of radius R. If α>ζ, the points are more concentrated in the center of the disk. If α<ζ, the points are more concentrated at the boundary of the disk.
#R: The radius of the disk and of connection.
#speed: The speed to update the nodes.
#ζ: The parameter that controls the curvature of the disk.
#self_loops: If true, consider the node itself among its neighbors, in which case the graph will contain self-loops.
#kws: Further keyword arguments will be passed to the GNNGraph constructor of each snapshot.

In [49]:
n, snaps, α, R, speed, ζ = 10, 5, 1.0, 4.0, 0.1, 1.0;

In [50]:
vic4 = rand_temporal_hyperbolic_graph(n, snaps; α, R, speed, ζ)

TemporalSnapshotsGNNGraph:
  num_nodes: [10, 10, 10, 10, 10]
  num_edges: [22, 20, 16, 16, 18]
  num_snapshots: 5