<a href="https://colab.research.google.com/github/vladmark/graph-convolutional-networks-/blob/main/GCN%20and%20GAT%20on%20protein%20databases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0.Prequisitories

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
! pip install torchviz
import numpy as np
import pandas as pd
import string
import json
import torch.nn as nn
import torch
from sklearn import preprocessing
import torch.nn.functional as F
import torchviz

In [None]:
use_cuda = torch.cuda.is_available()
torch.manual_seed(1)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

**Basepath to project folder in drive**

In [None]:
basepath='/content/drive/My Drive/Proiect Deep/'

#1. Data

In [None]:
def get_data_from_file(filename):
    f=open(filename,"r")
    data=json.load(f)
    f.close()
    return(data)

In [None]:
!ls "$basepath"

From https://github.com/williamleif/GraphSAGE

As input, at minimum the code requires that a --train_prefix option is specified which specifies the following data files:

* train_prefix-G.json -- A networkx-specified json file describing the input graph. Nodes have 'val' and 'test' attributes specifying if they are a part of the validation and test sets, respectively.
* train_prefix-id_map.json -- A json-stored dictionary mapping the graph node ids to consecutive integers.
* train_prefix-class_map.json -- A json-stored dictionary mapping the graph node ids to classes.
* train_prefix-feats.npy [optional] --- A numpy-stored array of node features; ordering given by id_map.json. Can be omitted and only identity features will be used.
* train_prefix-walks.txt [optional] --- A text file specifying random walk co-occurrences (one pair per line) (*only for unsupervised version of graphsage)



## Processing data and statistics

In [None]:
class_map=get_data_from_file(basepath+"ppi-class_map.json")
feats=np.load(basepath+"ppi-feats.npy")
G=get_data_from_file(basepath+"ppi-G.json")
id_map=get_data_from_file(basepath+"ppi-id_map.json")

### Id map

In [None]:
id_map_list=id_map.items()
print(id_map_list)

For this dataset `id_map` is not useful, node ids are already consecutive integers.

### The graph

In [None]:
print(f'graph big structure keys: {G.keys()}')

In [None]:
graph=G['graph']
links=G['links']
nodes=G['nodes']
directed=G['directed']
multigraph=G['multigraph']
print(f'graph: {graph}')
print(f'graph keys: {graph.keys()}')
print(f'is it multigraph?: {multigraph}')
print(f'is it directed graph?: {directed}')

In [None]:
print(f'nodes datatype: {type(nodes)}')
print(f'links datatype: {type(links)}')

In [None]:
print(f'first 5 nodes look like: {nodes[:5]}')
print(f'first 5 links look like: {links[:5]}')
print(f'we have {len(nodes)} nodes')
print(f'we have {len(links)} links')

### Classes

In [None]:
class_map_list=[(node_id, class_map[node_id]) for node_id in class_map.keys()]

In [None]:
print(f'{len(class_map_list)} nodes have labeled classes ({"{:.0%}".format(len(class_map_list)/len(nodes))} of all nodes)')
no_classes=len(class_map_list[0][1])
print(f'we have {no_classes} classes')
class_freq=torch.mean(torch.FloatTensor(np.array([class_map_list[i][-1] for i in range(len(class_map_list))])) , dim=0)
print(f'class frequencies are: {class_freq} (for check - of len {len(class_freq)})')
print(f'average of class frequencies is: {torch.mean(class_freq)}')
print(f'std of class frequencies is: {torch.std(class_freq)}')

### Features

In [None]:
print(f'we have {feats.shape[-1]} features')
print(f'a feature sample: {feats[869]}')
print(f'features have frequency: {np.mean(feats, axis=0)}')
print(f'average of feature frequencies: {np.mean(np.mean(feats, axis=0))}')
print(f'std of feature frequencies: {np.std(np.mean(feats, axis=0))}')

In [None]:
zero_feats_row_indexes=[i for i in range(feats.shape[0]) if np.mean(feats[i])==0.]

In [None]:
zero_classes_ids=[idx for (idx, idxlabels) in class_map.items() if np.mean(np.array(idxlabels)) != 0]

In [None]:
zero_classes_ids=[int(el) for el in zero_classes_ids]
zero_feats_non_zero_classes=[i for i in zero_feats_row_indexes if i in zero_classes_ids]

In [None]:
print(f'{"{:.2%}".format(len(zero_feats_row_indexes)/feats.shape[0])} of all nodes have all features 0')
print(f'{"{:.2%}".format(len(zero_feats_non_zero_classes)/feats.shape[0])} of all nodes have all features 0 but output classes non-zero')

In [None]:
feats=torch.FloatTensor(feats)

## Train, val, test **ids** set construction

In [None]:
train_ids=[nodes[i]['id'] for i in range(len(nodes)) if nodes[i]['test']==False and nodes[i]['val']==False]
val_ids=[nodes[i]['id'] for i in range(len(nodes)) if nodes[i]['val']==True]
test_ids=[nodes[i]['id'] for i in range(len(nodes)) if nodes[i]['test']==True]
assert len(train_ids)+len(val_ids)+len(test_ids) == len(nodes), "Seturile de train, validare, test nu dau tot datasetul"

In [None]:
print(f'train is {"{:.00%}".format(len(train_ids)/len(nodes))} percent of dataset')
print(f'validation is {"{:.00%}".format(len(val_ids)/len(nodes))} percent of dataset')
print(f'test is {"{:.00%}".format(len(test_ids)/len(nodes))} percent of dataset')

In [None]:
print(len(train_ids))
print(len(val_ids))
print(len(test_ids))

## Link dict construction (a usable dictionary for GCN)

Construct a dict which has as
* **node ids** as **keys**
* **adjacent nodes** (as a **set**) as **values**

In [None]:
def construct_adjacency_dict(links, directed, always_self_loop=False ):
  adj={}
  for link in links:
    source=link['source']
    target=link['target']
    if source not in adj.keys() and target not in adj.keys():
      adj[source]=set([target])
      if (not directed):
        adj[target]=set([source])
    elif source not in adj.keys():
      adj[source]=set([target])
      if (not directed):
        adj[target]=adj[target] | set([source])
    elif target not in adj.keys():
      adj[source]=adj[source] | set([target])
      if (not directed):
        adj[target]=set([source])
    else:
      adj[source]=adj[source] | set([target])
      adj[target]=adj[target] | set([source])
    if always_self_loop:
      adj[source]=adj[source] | set([source])
      adj[target]=adj[target] | set([target])
  return adj

In [None]:
adj=construct_adjacency_dict(links, directed, always_self_loop=True)
assert len(adj.keys())==len(nodes), "Avem noduri neconectate la nimic"

In [None]:
print(adj[5])
adj_dict=adj
print(type(adj[5]))

## Getting connected components

In [None]:
def connected_components(neighbors):
    seen = set()
    def component(node):
        nodes = set([node])
        while nodes:
            node = nodes.pop()
            seen.add(node)
            nodes |= neighbors[node] - seen
            yield node
    for node in neighbors:
        if node not in seen:
            yield component(node)
connected_comps=[]
for component in connected_components(adj_dict):
  connected_comps.append(set(component))
print(len(connected_comps))

### Component adjacency matrices

In [None]:
# adj_matrices_comps=[]
feats_comps=[]
classes_comps=[]
node_to_index_mappings=[]
index_to_node_mappings=[]
for comp in connected_comps:
  comp=list(comp)
  node_to_index_mapping={}
  index_to_node_mapping={}
  i=0
  for node in comp:
    node_to_index_mapping[node]=i
    index_to_node_mapping[i]=node
    i=i+1
  node_to_index_mappings.append(node_to_index_mapping)
  index_to_node_mappings.append(index_to_node_mapping)
  feats_comps.append(feats[comp])
  classes_comps.append(torch.tensor([class_map[str(node)] for node in comp]) )
  # adj_matrices_comps.append(torch.tensor([ [1 if i in [reordering[neighbor] for neighbor in adj_dict[node]] else 0 for i in range(len(comp))] for node in comp]) )

In [None]:
# import pickle
# with open(basepath+"adj_matrices_comps", "wb") as file: 
#   pickle.dump(adj_matrices_comps, file)

Load component adj matrix

In [None]:
import pickle
with open(basepath+"adj_matrices_comps", "rb") as file: 
  loaded_adj_matrices_comps=pickle.load(file)

In [None]:
adj_matrices_comps=loaded_adj_matrices_comps

In [None]:
# adj_matrices_comps=[matrix.type(torch.FloatTensor) for matrix in loaded_adj_matrices_comps]

In [None]:
# for adj_matrix in adj_matrices_comps:
#   adj_matrix[adj_matrix==0]=float('-inf')

In [None]:
print(len(adj_matrices_comps))

In [None]:
print(torch.mean(torch.Tensor([len(component) for component in connected_comps if len(component)>1])))

In [None]:
print(torch.sum(torch.tensor([len(component)==1 for component in connected_comps])))

In [None]:
print(adj_matrices_comps[0])

### Separate each component into train val test

In [None]:
components_train_indexes = []
components_val_indexes = []
components_test_indexes = []
components_classes_train = []
components_classes_val = []
components_classes_test = []
no_train_comps=0
no_val_comps=0
no_test_comps=0
for component_id in range(len(connected_comps)):
  component_train_ids=set(connected_comps[component_id]) & set(train_ids)
  component_train_indexes=[]
  comp_classes_train=[]
  if component_train_ids:
    no_train_comps+=1
    component_train_indexes=[node_to_index_mappings[component_id][node_id] for node_id in component_train_ids]
    comp_classes_train=classes_comps[component_id][component_train_indexes]
  components_train_indexes.append(component_train_indexes)
  components_classes_train.append(comp_classes_train)

  component_val_ids=set(connected_comps[component_id]) & set(val_ids)
  component_val_indexes=[]
  comp_classes_val=[]
  if component_val_ids:
    no_val_comps+=1
    component_val_indexes=[node_to_index_mappings[component_id][node_id] for node_id in component_val_ids]
    comp_classes_val=classes_comps[component_id][component_val_indexes]
  components_val_indexes.append(component_val_indexes)
  components_classes_val.append(comp_classes_val)

  component_test_ids=set(connected_comps[component_id]) & set(test_ids)
  component_test_indexes=[]
  comp_classes_test=[]
  if component_test_ids:
    no_test_comps+=1
    component_test_indexes=[node_to_index_mappings[component_id][node_id] for node_id in component_test_ids]
    comp_classes_test=classes_comps[component_id][component_test_indexes]
  components_test_indexes.append(component_test_indexes)
  components_classes_test.append(comp_classes_test)


In [None]:
class ComponentsSeparatedData():
  def __init__(self, components_train_indexes = [], components_val_indexes = [], components_test_indexes = [],
                components_classes_train = [], components_classes_val = [], components_classes_test = [], no_train_comps=0, no_val_comps=0, no_test_comps=0):
    self.components_train_indexes=components_train_indexes
    self.components_val_indexes=components_val_indexes
    self.components_test_indexes=components_test_indexes
    self.components_classes_train=components_classes_train
    self.components_classes_val=components_classes_val
    self.components_classes_test=components_classes_test
    self.no_train_comps=no_train_comps
    self.no_val_comps=no_val_comps
    self.no_test_comps=no_test_comps

components_separated_data=ComponentsSeparatedData(components_train_indexes, components_val_indexes, components_test_indexes,
                components_classes_train, components_classes_val, components_classes_test, no_train_comps, no_val_comps, no_test_comps)

##Construct (sparse) adjacency matrix (from previously constructed dict)

In [None]:
def construct_sparse_adjacency_matrix(adj_dict: dict, indeces=None):
  """
  !!WARNING: about 30-40 minute runtime for ppi dataset
  will only construct the indeces and then a vector of ones of length the components of the shape of the indeces vector multiplied
  indeces will be a vector of dim 2 x number of neighbors that need to be specified. first line = node index, second line = neighbor index
  we need to cat on to the indeces along the second dimension because we keep adding columns
  """
  if indeces == None:
    print('We have to construct indeces vector. Brace yourself, it will take a while!')
    indeces=torch.LongTensor(2, 1)
    for node in adj_dict.keys():
      for neighbor in adj_dict[node]:
        indeces=torch.cat( (indeces, torch.unsqueeze(torch.LongTensor([node, neighbor]), dim=1) ), dim=1) #unsqueeze along dim 1 will make 1 column and 2 lines
    indeces=indeces[:, 1:] #eliminates first column which has arbitrary content and was created in declaration
  values=torch.ones(indeces.shape[1])
  adj_sparse_matrix=torch.sparse.ShortTensor(indeces, values, torch.Size([len(adj_dict.keys()),len(adj_dict.keys())]))
  return adj_sparse_matrix


### Working with indeces as they are

In [None]:
indeces_loaded=torch.load(basepath+'sparse_adjacency_matrix_indeces.pt')
print(indeces_loaded[:, 600:700])

In [None]:
adj_matrix=construct_sparse_adjacency_matrix(adj_dict, indeces_loaded)

It is very interesting that only *some* nodes appear as neighbors for themselves in initial graph links. I have decided for easier implementation but also because it might be meaningful that only some nodes have links to themselves in initial graph.

Ajd matrix has therefore at **least 1** on the main diagonal and **it has 2 in the places where there is a link already in initial graph**.

In [None]:
#construct adjacency matrix that has all values on diagonal 1
adj_matrix=adj_matrix+torch.eye(adj_matrix.shape[0]).to_sparse()

### Reprocessing indeces so that they contain all diagonal

Perhaps it doesn't make that much sense after all. Let's painfully add 1 to the diagonal where it's missing.

In [None]:
equal=((indeces_loaded[0]-indeces_loaded[1])==0)
"""
now I want to get the elements of the first line for which the result is 1;
these will be eliminated from the set of nodes that need to be added to the diagonal
"""
already_added_nodes=[indeces_loaded[0,index] for index in range(len(indeces_loaded[0])) if equal[index]]

In [None]:
already_added_nodes=set([int(node) for node in already_added_nodes])
print(already_added_nodes)

In [None]:
print(len(already_added_nodes))
print(set(adj_dict.keys()) - already_added_nodes)

In [None]:
for node in set(adj_dict.keys()) - already_added_nodes:
  indeces_loaded=torch.cat( (indeces_loaded, torch.unsqueeze(torch.LongTensor([node, node]), dim=1) ), dim=1)

In [None]:
torch.save(indeces_loaded, basepath+'sparse_adjacency_matrix_indeces_diag_eye.pt')

### Load full diagonal indeces and construct adj matrix

In [None]:
indeces_loaded=torch.load(basepath+'sparse_adjacency_matrix_indeces_diag_eye.pt')

In [None]:
adj_matrix=construct_sparse_adjacency_matrix(adj_dict, indeces_loaded)

## More data processing

In [None]:
adj_matrix=adj_matrix.type(torch.sparse.FloatTensor)

###Clean the adjacency matrix of its diagonal

Right now adj matrix has diagonal full of ones. If I want diag 0, run this.

In [None]:
adj_matrix=adj_matrix-torch.eye(adj_matrix.shape[0]).to_sparse()

### Get the matrix to normalise adjacency matrix

Because both adjacency matrix and the normal matrix will have to be sparse (too big), I'll pass them both to the forward function and take advantage of matrix multiplication associativity

In [None]:
sums_adj_matrix=torch.sparse.sum(adj_matrix, dim=0).to_dense()

In [None]:
sums_adj_matrix[sums_adj_matrix==0.]=1

In [None]:
norm_indeces=torch.cat( (torch.unsqueeze(torch.LongTensor(range(adj_matrix.shape[0])), dim=0),
                         torch.unsqueeze(torch.LongTensor(range(adj_matrix.shape[0])), dim=0) ), dim=0)
print(norm_indeces.shape)

In [None]:
norm_uninversed_matrix=torch.sparse.FloatTensor(norm_indeces, sums_adj_matrix, adj_matrix.shape)
norm_matrix=norm_uninversed_matrix**-1

In [None]:
assert not torch.any(torch.isnan(torch.mm(norm_matrix, feats))), "Nan values"

# 2. A simple multi-perceptron model

### Model definition

In [None]:
class SimpleNN(nn.Module):
  def __init__(self, in_features, out_classes):
    super().__init__()
    self.lin1=nn.Linear(in_features=in_features, out_features=1000)
    self.lin2=nn.Linear(in_features=1000, out_features=500)
    self.lin3=nn.Linear(in_features=500, out_features=125)
    self.lin4=nn.Linear(in_features=125, out_features=out_classes)
  def forward(self, x):
    out=self.lin1(x)
    out=F.relu(out)
    out=self.lin2(out)
    out=F.relu(out)
    out=self.lin3(out)
    out=F.relu(out)
    out=self.lin4(out)
    # out=torch.sigmoid(out)
    return out

### Loaders

In [None]:
def make_batch_loader_simple_nn(ids_selector: list, id_map: dict, features: np.array, class_map: dict, batch_size: int, shuffle, device):
  """
  id_map: only used for obtaining the right line in features
  ids_selector: list of ids that are in the wanted dataset
  """
  out_feats=torch.FloatTensor(features[[id_map[str(idx)] for idx in ids_selector]])
  out_classes=torch.FloatTensor([class_map[str(idx)] for idx in ids_selector])
  out_feats=out_feats.to(device)
  out_classes=out_classes.to(device)
  from torch.utils.data import TensorDataset
  from torch.utils.data import DataLoader
  dataset = TensorDataset(out_feats, out_classes)
  data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=False)
  return data_loader

In [None]:
train_loader=make_batch_loader_simple_nn(ids_selector=train_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=300, shuffle=True, device=device)
val_loader=make_batch_loader_simple_nn(ids_selector=val_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=300, shuffle=True, device=device)
test_loader=make_batch_loader_simple_nn(ids_selector=test_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=300, shuffle=True, device=device)

### Instancing

In [None]:
no_features=feats.shape[-1]
no_classes=len(class_map['0'])
print(no_features, no_classes)
print(f'no samples: {feats.shape[0]}')

From http://hagan.okstate.edu/NNDesign.pdf#page=469

Nh=Ns/(α∗(Ni+No))

Nh = number of hidden neurons.
Ni = number of input neurons.
No = number of output neurons.
Ns = number of samples in training data set.
α = an arbitrary scaling factor usually 2-10.

Here, Ni+No=50+121=171, Ns=56944

In [None]:
nh=56944/(2*(50+121))
print(nh)

In [None]:
simple_nn=SimpleNN(no_features, no_classes)

In [None]:
optim=torch.optim.Adam(simple_nn.parameters(), lr=10e-5)

### Training

In [None]:
def train_epoch_simple_nn(model: SimpleNN, optim, train_loader: torch.utils.data.DataLoader, device, epoch, print_every=20):
  model=model.to(device)
  total_epoch_loss=0.
  loss_func=torch.nn.MultiLabelSoftMarginLoss()
  num_batches=0
  for batch_idx, (batch, batch_labels) in enumerate(train_loader):
    optim.zero_grad()
    out=model(batch)
    loss=loss_func(out, batch_labels)
    loss.backward()
    with torch.no_grad():
      optim.step()
      total_epoch_loss+=loss.item()
      num_batches+=1
      if (batch_idx%print_every == 0):
        pass
        # print(f'batch {batch_idx} has train loss {loss.item()} on epoch {epoch} \n\n')
  return total_epoch_loss/num_batches

In [None]:
train_feats=torch.FloatTensor(feats[[id_map[str(idx)] for idx in train_ids]])
train_classes=torch.FloatTensor([class_map[str(idx)] for idx in train_ids])
val_feats=torch.FloatTensor(feats[[id_map[str(idx)] for idx in val_ids]])
val_classes=torch.FloatTensor([class_map[str(idx)] for idx in val_ids])
test_feats=torch.FloatTensor(feats[[id_map[str(idx)] for idx in test_ids]])
test_classes=torch.FloatTensor([class_map[str(idx)] for idx in test_ids])

In [None]:
no_epochs=1000
save_every=100

In [None]:
train_every_epoch_loss=[]
val_every_epoch_loss=[]
test_every_epoch_loss=[]

In [None]:
save_identifier='_1000_500_125'

In [None]:
for e in range(no_epochs):
  epoch_train_loss=train_epoch_simple_nn(simple_nn, optim, train_loader, device, e, print_every=200)
  with torch.no_grad():
    print(f'epoch {e}, train loss: {epoch_train_loss}')
    train_loss.append(epoch_train_loss)
    # lossfun=torch.nn.BCEWithLogitsLoss()
    lossfun=torch.nn.MultiLabelSoftMarginLoss()
    epoch_loss=lossfun(simple_nn(val_feats), val_classes)
    val_loss.append(epoch_loss.item())
    epoch_loss=lossfun(simple_nn(test_feats), test_classes)
    test_loss.append(epoch_loss.item())
    if (e%save_every==0):
      torch.save(simple_nn.state_dict(), basepath+'simple_nn'+save_identifier)
      print(f'epoch loss: {epoch_train_loss}')
      import pickle
      with open(basepath+'simple_nn_train_losses'+save_identifier+'.data', 'wb') as filehandle:
        pickle.dump(train_loss, filehandle)
      with open(basepath+'simple_nn_val_losses'+save_identifier+'.data', 'wb') as filehandle:
        pickle.dump(val_loss, filehandle)
      with open(basepath+'simple_nn_test_losses'+save_identifier+'.data', 'wb') as filehandle:
        pickle.dump(test_loss, filehandle)

### Loss plot n debugging

In [None]:

import matplotlib.pyplot as plt 
plt.plot(range(1, len(train_every_epoch_loss)+1), train_every_epoch_loss, label='Train loss', color='red')
plt.plot(range(1, len(val_every_epoch_loss)+1), val_every_epoch_loss, label='Val loss', color='blue')
plt.plot(range(1, len(test_every_epoch_loss)+1), test_every_epoch_loss, label='Test loss', color='green')
plt.legend()
plt.show()

In [None]:
print(train_every_epoch_loss[-100:])

In [None]:
with torch.no_grad():
  out_train=simple_nn(train_feats)
  print(out_train)
  loss_train=torch.mean(torch.abs(torch.round(torch.sigmoid(out_train))-train_classes))
  print(loss_train)
  out_val=simple_nn(val_feats)
  loss_val=torch.mean(torch.abs(torch.round(torch.sigmoid(out_val))-val_classes))
  print(loss_val)
  out_test=simple_nn(test_feats)
  loss_test=torch.mean(torch.abs(torch.round(torch.sigmoid(out_test))-test_classes))
  print(loss_test)

###**On return**

In [None]:
simple_nn.load_state_dict(torch.load(basepath+'simple_nn_2000_400'))

In [None]:
import pickle
with open(basepath+'simple_nn_train_losses2.data', 'rb') as filehandle:
  train_every_epoch_loss=pickle.load(filehandle)
with open(basepath+'simple_nn_val_losses2.data', 'rb') as filehandle:
  val_every_epoch_loss=pickle.load(filehandle)
with open(basepath+'simple_nn_test_losses2.data', 'rb') as filehandle:
  test_every_epoch_loss=pickle.load(filehandle)

#3. Simple variant of GCN (mean aggregator) with no adjacency matrix


In [None]:
class GCN(nn.Module):
  def __init__(self, neighbors_depth, in_features_size, embedding_size, no_classes, aggregation_func=lambda x: torch.mean(x, dim=0)):
    super().__init__()
    self.depth=neighbors_depth-1
    self.agg=aggregation_func
    linears=[]
    self.emb_size=embedding_size
    for i in range(neighbors_depth):
      if i==0:
        linears.append(nn.Linear(in_features_size, embedding_size, bias=False))
      else:
        linears.append(nn.Linear(embedding_size, embedding_size, bias=False))
    self.linears=nn.ModuleList(linears)
    self.classifier=nn.Linear(embedding_size, no_classes)
  def forward(self, x, level):
    out=self.linears[level](self.agg(x))
    if level==self.depth:
      return out
    else:
      return F.relu(out)
  def classify(self, embedded):
    # out=F.relu(embedded)
    out=self.classifier(embedded)
    return out

## Useless now

In [None]:
def get_neighbors(adj_dict, starting_node_id, depth, level=0):
  ids=[]
  neighbors=list(adj_dict[starting_node_id])
  if starting_node_id not in neighbors:
    neighbors.append(starting_node_id)
  print(f'node {starting_node_id} with neighbors {neighbors}')
  if (level<depth-1):
    for neighbor in neighbors:
      ids.append(get_neighbors(adj_dict, neighbor , depth, level=level+1))
  elif (level<depth):
    ids=neighbors
  return ids
a=get_neighbors(adj, 11, 3)
print(len(a))
print(a)

In [None]:
def feed(model: GCN, adj_dict: dict, id_map: dict, starting_node_id: int, feats: torch.Tensor, depth: int, level=0):
  """
  CORE FUNCTION:
  takes a starting node and the model and produces the embedding, by recursively traversing the node's subgraph up to given depth
  for each neighbor of the node it produces the aggregation associated to that neighbor by calling recursively
  if last level is reached then produces aggregation of base features of neighbors
  """
  neighbors=list(adj_dict[starting_node_id])
  if starting_node_id not in neighbors:
    neighbors.append(starting_node_id)
  #we got the neighbors
  if (level<depth):
    feeded, grad=feed(model, adj_dict, id_map, neighbors[0], feats, depth, level=level+1)
    print(f'feeded has grad {feeded.grad}; returned grad is {grad}')
    level_neighbors_repres=torch.unsqueeze(feeded, dim=0)
    for neighbor in neighbors[1:]:
      feeded, grad=feed(model, adj_dict, id_map, neighbor, feats, depth, level=level+1)
      print(f'feeded has grad {feeded.grad}; returned grad is {grad}')
      neighbor_repr=torch.unsqueeze(feeded, dim=0)
      level_neighbors_repres=torch.cat((level_neighbors_repres, neighbor_repr), dim=0)
    # print(f'node {starting_node_id} at level {level}; has {len(neighbors)} neighbors')
    # print(f'representation of {starting_node_id}\'s neighbors has shape {level_neighbors_repres.shape}')
    level_output=model(level_neighbors_repres, level=depth-level)
    return level_output, level_output.grad
  elif (level==depth): #the case where I have to take the base features
    base_features=torch.FloatTensor(feats[[id_map[str(idx)] for idx in neighbors]])
    level_output=model(base_features, level=depth-level)
    return level_output, level_output.grad

## Useful

In [None]:
class Node():
  def __init__(self, node_id: int, adj_dict: dict, depth=3):
    self.idx=node_id
    self.depth=depth
    self.neighbors_ids=list(adj_dict[self.idx])
    if self.idx not in self.neighbors_ids:
      self.neighbors_ids.append(self.idx)
    self.neighbors_nodes=[]
    if depth>0:
      self.neighbors_nodes=[Node(neighbor_id, adj_dict, depth=depth-1) for neighbor_id in self.neighbors_ids]
  def level_feedforward(self, model, id_map: dict, feats: torch.Tensor, level=0):
    if (level==model.depth):
      base_features=torch.FloatTensor(feats[[id_map[str(idx)] for idx in self.neighbors_ids]])
      level_output=model(base_features, level=model.depth-level)
      # print(f'node {self.idx} at level {level} has level output gradient {level_output.grad}')
      return level_output
    elif (level<model.depth):
      
      feeded=self.neighbors_nodes[0].level_feedforward(model, id_map, feats, level=level+1)
      level_neighbors_repres=torch.unsqueeze(feeded, dim=0)

      for neighbor_node in self.neighbors_nodes[1:]:
        feeded=neighbor_node.level_feedforward(model, id_map, feats, level=level+1)
        neighbor_repr=torch.unsqueeze(feeded, dim=0)
        level_neighbors_repres=torch.cat((level_neighbors_repres, neighbor_repr), dim=0)

      level_output=model(level_neighbors_repres, level=model.depth-level)
      # print(f'node {self.idx} at level {level} has level output gradient {level_output.grad}')
      return level_output

In [None]:
# for (name, param) in model.named_parameters():
#   print(f'param {name} is {param}')

In [None]:
# def train_epoch_gcn_old(gcn: GCN, optim, adj_dict: dict, id_map: dict, feats: torch.Tensor, train_ids: list, class_map, epoch, print_every=100):
#   epoch_loss=0.
#   lossfunc=torch.nn.BCEWithLogitsLoss()
#   for node_id in train_ids:
#     optim.zero_grad()
#     node_classes=torch.FloatTensor(class_map[str(node_id)])
#     node_embedding=feed(gcn, adj_dict, id_map, node_id, feats, gcn.depth)
#     node_predictions=gcn.classify(node_embedding)
#     # print(f'model predicts of shape {node_predictions.shape[0]}')
#     node_loss=lossfunc(node_predictions, node_classes)
#     node_loss.backward()
#     with torch.no_grad():
#       optim.step()
#       epoch_loss+=node_loss
#       if node_id%print_every==0:
#         print(f'node {node_id} loss is {node_loss} in epoch {epoch}')
#         # print(f'node {node_id} embedding is {node_embedding} \n\n')
#         for (name, param) in model.named_parameters():
#             print(f'param {name} is {param}')
#   epoch_loss=epoch_loss/len(train_ids)
#   print(f'epoch {epoch} has loss {epoch_loss}')

#   return (epoch_loss)

In [None]:
def train_epoch_gcn(gcn: GCN, optim, adj_dict: dict, id_map: dict, feats: torch.Tensor, train_ids: list, class_map, epoch, print_every=100):
  epoch_loss=0.
  lossfunc=torch.nn.BCEWithLogitsLoss()
  print_losses=[]
  for node_id in train_ids:
    optim.zero_grad()
    node_classes=torch.FloatTensor(class_map[str(node_id)])
    current_node=Node(node_id, adj_dict, depth=model.depth)
    node_embedding=current_node.level_feedforward(gcn, id_map, feats, level=0)
    node_predictions=gcn.classify(node_embedding)
    node_loss=lossfunc(node_predictions, node_classes)
    node_loss.backward()
    with torch.no_grad():
      optim.step()
      epoch_loss+=node_loss
      if node_id%print_every==0:
        # print(f'node {node_id} loss is {node_loss} in epoch {epoch}')
        # # print(f'node {node_id} embedding is {node_embedding} \n\n')
        # for (name, param) in model.named_parameters():
        #     print(f'param {name} has grad {param.grad}')
        # print(f'computational graph:')
        # torchviz.make_dot(node_embedding)
        # print(f'loss total loss until now is {epoch_loss/node_id}')
        print_losses.append(epoch_loss/node_id)
        import matplotlib.pyplot as plt 
        plt.plot(range(1, len(print_losses)+1), print_losses, label='Train loss', color='red')
        plt.legend()
        plt.show()
  epoch_loss=epoch_loss/len(train_ids)
  print(f'epoch {epoch} has loss {epoch_loss}')

  return (epoch_loss)

In [None]:
model=GCN(3, feats.shape[-1], 150, no_classes)

In [None]:
optim=torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
no_epochs=30
train_epoch_losses=[]

In [None]:
for e in range(no_epochs):
  epoch_loss=train_epoch_gcn(model, optim, adj, id_map, feats, train_ids, class_map, e, print_every=100)
  train_epoch_losses.append(epoch_loss)

# 3'. Simple variant of GCN with adjacency matrix

Multiply adjacency matrix with all h's to get next h's.

## Model definitions

In [None]:
class GCNFeatureEmbedder(nn.Module):
  def __init__(self, in_features, out_features, bias=False):
    super().__init__()
    self.lin1=nn.Linear(in_features, in_features*2, bias=bias)
    self.lin2=nn.Linear(in_features*2, out_features, bias=bias)
    # self.lin3=nn.Linear(in_features*2, out_features, bias=bias)
  def forward(self, x):
    out=self.lin1(x)
    out=F.relu(out)
    out=self.lin2(out)
    # out=F.relu(out)
    # out=self.lin3(out)
    return out

In [None]:
class GCN(nn.Module):
  def __init__(self, neighbors_depth, in_features_size, embedding_size, no_classes):
    super().__init__()
    self.depth=neighbors_depth-1
    self.emb_size=embedding_size
    self.feature_size=in_features_size
    linears_for_neighbors=[]
    linears_for_self=[]
    cur_size=in_features_size*2
    for i in range(neighbors_depth-1):
        # linears_for_neighbors.append(nn.Linear(cur_size, cur_size*2, bias=False))
        # linears_for_self.append(nn.Linear(cur_size, cur_size*2, bias=False))

        # linears_for_neighbors.append(GCNFeatureEmbedder(cur_size, cur_size*2))
        # linears_for_self.append(GCNFeatureEmbedder(cur_size, cur_size*2))
        if i==0:
          linears_for_neighbors.append(nn.Linear(in_features_size, cur_size))
          linears_for_self.append(nn.Linear(in_features_size, cur_size))
        else:
          linears_for_neighbors.append(nn.Linear(cur_size, cur_size))
          linears_for_self.append(nn.Linear(cur_size, cur_size))

        # cur_size=cur_size*2
    linears_for_neighbors.append(GCNFeatureEmbedder(cur_size, embedding_size, bias=False))
    # linears_for_neighbors.append(nn.Linear(cur_size, embedding_size, bias=False))


    linears_for_self.append(GCNFeatureEmbedder(cur_size, embedding_size, bias=False))
    # linears_for_self.append(nn.Linear(cur_size, embedding_size, bias=False))


    self.linears_for_neighbors=nn.ModuleList(linears_for_neighbors)
    self.linears_for_self=nn.ModuleList(linears_for_self)
    self.classifier=nn.Linear(embedding_size, no_classes, bias=False)
  def forward(self, node_features: torch.Tensor, adj_matrix: torch.sparse.FloatTensor, norm_matrix: torch.sparse.FloatTensor):
    """
    node_features and adjacency matrix have to have the same size i.e. node features must be provided for all nodes
    apparently two torch sparse tensors can't be multiplied togehter so I'll have to go for the first multiplication and then the second
    norm_matrix is a normalisation diagonal matrix, inverse of the number of the neighbors of each node on the diagonal
    """
    current_feats=node_features
    for i in range(len(self.linears_for_neighbors)) :
      linear_neighbors = self.linears_for_neighbors[i]
      linear_self = self.linears_for_self[i]
      current_feats=linear_neighbors( torch.mm(norm_matrix, torch.mm(adj_matrix, current_feats)) ) + linear_self(torch.mm(norm_matrix,current_feats))
      current_feats=F.relu(current_feats)
    return current_feats
  def classify(self, embedded):
    #no activation here because it already went through activation at previous level and it will go through sigmo once outputed
    out=self.classifier(embedded)
    return out

## Training

### Functions

In [None]:
def get_preds_n_classes(ids_selector, all_preds, class_map: dict):
  out_preds=all_preds[[idx for idx in ids_selector]].to(device)
  out_classes=torch.FloatTensor([class_map[str(idx)] for idx in ids_selector]).to(device)
  return out_preds, out_classes

In [None]:
def train_epoch_adjacency_gcn(model, optim, adj_matrix, norm_matrix, feats, train_ids, class_map, device):
  model=model.to(device)
  
  loss_func=torch.nn.BCEWithLogitsLoss()
  optim.zero_grad()
  preds=model.classify(model(feats, adj_matrix, norm_matrix))
  train_preds, train_classes=get_preds_n_classes(train_ids, preds, class_map)
  loss=loss_func(train_preds, train_classes)
  loss.backward()
  with torch.no_grad():
    optim.step()
    return loss.item(), preds

In [None]:
def get_accuracy(predictions, labels, probability=0.5):
  """
  predictions are of the shape no_nodes x no_classes, representing probability of each node being in each class
  labels are the same shape only binary

  average='weighted' is used in f1_score because it accounts for possible class imbalances
  """
  assert predictions.shape == labels.shape, "Predictions and labels don't have same shape: "+{predictions.shape}+" vs. "+{labels.shape}+" respectively."
  from sklearn.metrics import f1_score
  cutoff=(predictions>probability).type(torch.FloatTensor)
  score=f1_score(y_true=labels, y_pred=cutoff, average='weighted')
  return(score)

### Actual training

In [None]:
no_epochs=10000

In [None]:
model=GCN(3, feats.shape[-1], 150, no_classes)

In [None]:
save_identifier=str(model.emb_size)+'_twiceinfeats_hybrid_3iters_withselfs'

In [None]:
optim=torch.optim.Adam(model.parameters(), lr=10e-4)

In [None]:
train_every_epoch_loss=[]
val_every_epoch_loss=[]
test_every_epoch_loss=[]
train_every_epoch_score=[]
val_every_epoch_score=[]
test_every_epoch_score=[]

In [None]:
def train_gcn_model(model, optim, no_epochs, save_identifier, model_type, save_every):
  for e in range(no_epochs):
    # print(f'epoch {e}')
    train_epoch_loss, preds = train_epoch_adjacency_gcn(model, optim, adj_matrix, norm_matrix, feats, train_ids, class_map, device)
    loss_func=torch.nn.BCEWithLogitsLoss()
    with torch.no_grad():
      val_preds, val_classes=get_preds_n_classes(val_ids, preds, class_map)
      test_preds, test_classes=get_preds_n_classes(test_ids, preds, class_map)
      train_preds, train_classes=get_preds_n_classes(train_ids, preds, class_map)

      val_epoch_loss=loss_func(val_preds, val_classes)
      test_epoch_loss=loss_func(test_preds, test_classes)
      #we already have the train loss so no need to calculate it

      #we pass through simgoid because network doesn't when classifying so that we can use BCEWithLogitsLoss, which expects logits, not probabilities, for numeric stability
      train_epoch_score=get_accuracy(torch.sigmoid(train_preds), train_classes)
      val_epoch_score=get_accuracy(torch.sigmoid(val_preds), val_classes)
      test_epoch_score=get_accuracy(torch.sigmoid(test_preds), test_classes)

      print(f'train epoch {e} loss {train_epoch_loss} and score {train_epoch_score} \n\n\n')

      #adding to total vectors for plot
      train_every_epoch_loss.append(train_epoch_loss)
      val_every_epoch_loss.append(val_epoch_loss)
      test_every_epoch_loss.append(test_epoch_loss)
      train_every_epoch_score.append(train_epoch_score)
      val_every_epoch_score.append(val_epoch_score)
      test_every_epoch_score.append(test_epoch_score)
      if e%save_every==0:
        torch.save(model.state_dict(), basepath+model_type+'_adjacency_emb'+save_identifier)
        import pickle
        with open(basepath+model_type+'_train_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(train_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_adj_val_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(val_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_adj_test_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(test_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_adj_train_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(train_every_epoch_score, filehandle)
        with open(basepath+model_type+'_adj_val_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(val_every_epoch_score, filehandle)
        with open(basepath+model_type+'_adj_test_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(test_every_epoch_score, filehandle)
        

In [None]:
train_gcn_model(model, optim, no_epochs, save_identifier, 'gcn_adj', save_every=100)

In [None]:
#not really used anymore since saving is done in training loop
torch.save(model.state_dict(), basepath+'gcn_adjacency_emb'+save_identifier)

## Loading back and plotting 

Loading

In [None]:
model.load_state_dict(torch.load(basepath+'gcn_adjacency_emb'+load_identifier))

In [None]:
!ls '$basepath'

In [None]:
def load_losses_n_scores(basepath, model_identifier, load_identifier, import_scores=False):
  #for some arhitectures scores weren't computed
  import pickle
  with open(basepath+model_identifier+'train_losses'+load_identifier+'.data', 'rb') as filehandle:
    train_loss=pickle.load(filehandle)
  with open(basepath+model_identifier+'val_losses'+load_identifier+'.data', 'rb') as filehandle:
    val_loss=pickle.load(filehandle)
  with open(basepath+model_identifier+'test_losses'+load_identifier+'.data', 'rb') as filehandle:
    test_loss=pickle.load(filehandle)
  
  if import_scores:
    with open(basepath+model_identifier+'train_scores'+load_identifier+'.data', 'rb') as filehandle:
      train_score=pickle.load(filehandle)
    with open(basepath+model_identifier+'val_scores'+load_identifier+'.data', 'rb') as filehandle:
      val_score=pickle.load(filehandle)
    with open(basepath+model_identifier+'test_scores'+load_identifier+'.data', 'rb') as filehandle:
      test_score=pickle.load(filehandle)
    return (train_loss, val_loss, test_loss, train_score, val_score, test_score)
  else:
    return (train_loss, val_loss, test_loss)

In [None]:
def plot_losses_n_scores(train_loss, val_loss, test_loss, graph_title, begin_plot_index=1000,
         train_score=None, val_score=None, test_score=None):
  import matplotlib.pyplot as plt
  plt.figure() #create new plot
  plt.plot(range(begin_plot_index+1, len(train_loss)+1), train_loss[begin_plot_index:], label='Train loss', color='red')
  plt.plot(range(begin_plot_index+1, len(val_loss)+1), val_loss[begin_plot_index:], label='Val loss', color='blue')
  plt.plot(range(begin_plot_index+1, len(test_loss)+1), test_loss[begin_plot_index:], label='Test loss', color='green')
  if(train_score != None):
    plt.plot(range(begin_plot_index+1, len(train_score)+1), train_score[begin_plot_index:], label='Train score', color='yellow')
  if(val_score != None):
    plt.plot(range(begin_plot_index+1, len(val_score)+1), val_score[begin_plot_index:], label='Val score', color='purple')
  if(test_score != None):
    plt.plot(range(begin_plot_index+1, len(test_score)+1), test_score[begin_plot_index:], label='Test score', color='cyan')
  plt.xlabel('epoch')
  plt.legend()
  plt.title(graph_title)
  plt.show()

In [None]:
def get_n_plot_losses_n_scores(basepath, model_identifier,load_identifier, begin_plot_index=500, scores=False):
  if scores:
    trl, vl, tel, trs, vs, tes=load_losses_n_scores(basepath, model_identifier, load_identifier, scores)
  else:
    trl, vl, tel = load_losses_n_scores(basepath, model_identifier, load_identifier, scores)
    trs, vs, tes = None, None, None
  plot_losses_n_scores(tr, v, te, model_identifier+load_identifier, begin_plot_index, trs, vs, tes)

In [None]:
load_identifier='_'+str(model.emb_size)+'_ladder_embedders_3_selfs'

In [None]:
load_identifier=save_identifier

In [None]:
#if losses and scores are already in memory
plot_losses_n_scores(train_every_epoch_loss, val_every_epoch_loss, test_every_epoch_loss, 'Memory is a weird thing', 20,
         train_every_epoch_score, val_every_epoch_score, test_every_epoch_score)

In [None]:
get_n_plot_losses(basepath, 'gcn_adj_', load_identifier, begin_plot_index=100)

In [None]:
load_ids=['_100_ladder_embedders_3_selfs', '_150_4_hybrid', '_200_ladder_embedders_3_selfs', '_250_3_hybrid']
for load_id in load_ids:
  get_n_plot_losses(basepath, 'gcn_adj_', load_id, 10)
load_ids=['_1000_500_125']
for load_id in load_ids:
  get_n_plot_losses(basepath, 'simple_nn_', load_id, 1)

## Dummy example to test on

In [None]:
#dummy example
dum_feats=torch.FloatTensor([[1, 1, 0, 1], [1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 1, 0], [1, 0, 1, 0]])
dum_classes=torch.FloatTensor([[1, 0], [1, 1], [1, 0], [0,0], [1, 1], [0, 1]])
dum_adj_matrix=torch.FloatTensor([[1, 0, 1, 1, 0, 0], [0, 1, 0, 0, 0, 1],
                                  [1, 1, 1, 0, 0, 0], [0, 1, 0, 1, 0, 1],
                                  [0, 0, 0, 1, 1, 0], [0, 0, 1, 0, 1, 1]
                                  ])
dum_sums_adj_matrix=torch.sum(dum_adj_matrix, dim=0)
dum_sums_adj_matrix[dum_sums_adj_matrix==0.]=1
dum_norm_matrix=torch.diag(dum_sums_adj_matrix**-1)
print(dum_norm_matrix)

In [None]:
dum_model=GCN(3, dum_feats.shape[-1], 100, dum_classes.shape[-1])
dum_optim=torch.optim.Adam(dum_model.parameters(), lr=10e-4)

In [None]:
for e in range(1000):
  loss_func=torch.nn.BCEWithLogitsLoss()
  dum_optim.zero_grad()
  dum_embeddings=dum_model(dum_feats, dum_adj_matrix, dum_norm_matrix)
  dum_preds=dum_model.classify(dum_embeddings)
  dum_loss=loss_func(dum_preds, dum_classes)
  dum_loss.backward()
  with torch.no_grad():
    dum_optim.step()
    print(f'epoch {e}')
    print()
    # print(dum_embeddings)
    print(dum_preds)
    print(dum_classes)
    print(dum_loss.item())
    print('\n\n')

# 4. Graph Attention Network (GAT)

## Model definition

### Old forward method

In [None]:
# # newest old forward
#   def forward(self, node_features: torch.Tensor, index_mapping: dict, adj_dict: dict, focus_ids: list):
#     """
#     -node_features is a tensor of features of nodes required for all iterrations in batch
#     -index_mapping maps actual node_ids to indexes of node_features rows (which are the ones which appear in adj dict)
#     -adj_dict is an adjacency dictionary of all the nodes (no point in constructing one just with the nodes needed for iteration)
#     -focus_ids is a list of lists of all the ids that are important at a given level (from left to right: farthest to nearest)
#     focus_ids[0] will be the ids of nodes that are at level 1, i.e. immediately above leaves etc.
#      The index mapping will need to be updated at each iteration so that we have a clue what's going on
#     """

#     #START THE ITERATIONS
#     current_feats=node_features
#     for i in range(len(self.linears_for_neighbors)) :
#       #an iteration starts
#       print(f'iteration {i}')
#       linear_neighbors = self.linears_for_neighbors[i]
#       attention=self.attentions[i]
      
#       next_feats=torch.FloatTensor(len(focus_ids[i]), linear_neighbors.out_features)
#       new_index_mapping={}
#       current_row=0
#       for node in focus_ids[i]:
#         node_neighbors_feats=current_feats[[index_mapping[neighbor] for neighbor in adj_dict[node]]] #will have shape no_neighbors x no_feats_per_node
#         node_feats=current_feats[[index_mapping[node]]]
#         # print(f'node {node} neighbor feats shape {node_neighbors_feats.shape} and self feats shape {node_feats.shape}')
#         node_attention_feeder=torch.cat( (linear_neighbors(node_neighbors_feats.view(node_neighbors_feats.shape[0], -1)),
#                                           linear_neighbors(node_feats.view(1,-1).repeat(node_neighbors_feats.shape[0], 1)) ), dim=1)
#         #get attention coefficients and normalise them
#         node_attention_coefficients=F.leaky_relu(attention(node_attention_feeder), negative_slope=0.2)
#         node_attention_coefficients=F.softmax(node_attention_coefficients, dim=0)
#         # print(f'after node {node} current attention feeder has shape {current_attention_feeder.shape}')
        
#         #get linouts
#         node_neighbor_linout_feats=linear_neighbors(node_neighbors_feats)
#         #compute next feats
#         next_feats[current_row]=torch.mm(node_attention_coefficients.view(1, len(list(adj_dict[node]))),
#                                     node_neighbor_linout_feats.view(len(list(adj_dict[node])),-1))
#         #sorting the index messiness
#         new_index_mapping[node]=current_row
#         current_row += 1
#       current_feats=next_feats
#       index_mapping=new_index_mapping
#       print(f'final feats out of layer {i} have shape {current_feats.shape}')
#       current_feats=F.relu(current_feats)
#     return current_feats, index_mapping

In [None]:
# #backup "global" epoch forward
# #START THE ITERATIONS
#     current_feats=node_features
#     for i in range(len(self.linears_for_neighbors)) :
#       #an iteration starts
#       linear_neighbors = self.linears_for_neighbors[i]
#       attention=self.attentions[i]
      
#       next_feats=torch.FloatTensor(current_feats.shape[0], linear_neighbors.out_features)
#       for node in range(len(adj_dict.keys())):
#         node_neighbors_feats=current_feats[list(adj_dict[node])] #will have shape no_neighbors x no_feats_per_node
#         node_feats=current_feats[int(node)]
#         node_attention_feeder=torch.cat( (linear_neighbors(node_neighbors_feats.view(node_neighbors_feats.shape[0], -1)),
#                                           linear_neighbors(node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1)) ), dim=1)
#         #variant WITHOUT W
#         # node_attention_feeder=torch.cat( (node_neighbors_feats.view(node_neighbors_feats.shape[0], -1),
#         #                                       node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1) ), dim=1)
#         #node attention feeder will have shape no_neighbors_of_node x no_node_features*2

#         #get attention coefficients and normalise them
#         node_attention_coefficients=F.leaky_relu(attention(node_attention_feeder), negative_slope=0.2)
#         node_attention_coefficients=F.softmax(node_attention_coefficients, dim=0)
#         # print(f'after node {node} current attention feeder has shape {current_attention_feeder.shape}')
        
#         #get linouts
#         node_neighbor_linout_feats=linear_neighbors(current_feats[list(adj_dict[node])])
#         #compute next feats
#         next_feats[node]=torch.mm(node_attention_coefficients.view(1, len(list(adj_dict[node]))),
#                                     node_neighbor_linout_feats.view(len(list(adj_dict[node])),-1))
        
#       current_feats=next_feats
#       print(f'final feats out of layer {i} have shape {current_feats.shape}')
#       current_feats=F.relu(current_feats)
#     return current_feats

In [None]:
# def forward_old(self, node_features: torch.Tensor, adj_dict: dict):
#     #GETTING NODE NEIGHBORS INDECES ONCE AND FOR ALL
#     node_neighbors_indeces=[] #a list indicating up to where in the output attention feeder matrix one should look to find the features of the node neighbors
#     for node in range(len(adj_dict.keys())):
#       if node_neighbors_indeces:
#         node_neighbors_indeces.append(node_neighbors_indeces[-1]+len(adj_dict[node]))
#       else:
#         node_neighbors_indeces.append(len(adj_dict[node]))
  
#     #START THE ITERATIONS
#     current_feats=node_features
#     for i in range(len(self.linears_for_neighbors)) :
#       #an iteration starts
#       linear_neighbors = self.linears_for_neighbors[i]
#       attention=self.attentions[i]



#       #constructing the attention coefficients
#       node_neighbors_feats=current_feats[list(adj_dict[0])] #will have shape no_neighbors x no_feats_per_node
#       node_feats=current_feats[int(0)]
#       # current_attention_feeder=torch.cat( (linear_neighbors(node_neighbors_feats.view(node_neighbors_feats.shape[0], -1)),
#       #                                           linear_neighbors(node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1))), dim=1)
#       #variant WITHOUT W:
#       current_attention_feeder=torch.cat( (node_neighbors_feats.view(node_neighbors_feats.shape[0], -1),
#                                                 node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1) ), dim=1)
#       for node in range(1,len(adj_dict.keys())):
#         node_neighbors_feats=current_feats[list(adj_dict[node])] #will have shape no_neighbors x no_feats_per_node
#         node_feats=current_feats[int(node)]
#         # current_attention_feeder=torch.cat( (current_attention_feeder,
#         #                                       torch.cat( (linear_neighbors(node_neighbors_feats.view(node_neighbors_feats.shape[0], -1)),
#         #                                       linear_neighbors(node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1)) ), dim=1) )
#         # , dim=0)
#         #variant WITHOUT W
#         current_attention_feeder=torch.cat( (current_attention_feeder,
#                                               torch.cat( (node_neighbors_feats.view(node_neighbors_feats.shape[0], -1),
#                                               node_feats.view(1,node_feats.shape[0]).repeat(node_neighbors_feats.shape[0], 1) ), dim=1) )
#         , dim=0)
#         # print(f'after node {node} current attention feeder has shape {current_attention_feeder.shape}')
#       #current attention feeder will have shape no_links_in_graph x no_node_features*2
#       attention_coefficients=F.leaky_relu(attention(current_attention_feeder), negative_slope=0.2) #negative slope of 0.2 was used in the article
#       #attention_coefficients will have shape no_links_in_graph x 1 (1 coef for each link)


#       #CONSTRUCTING NEXT FEATURES
#       next_feats=torch.FloatTensor(current_feats.shape[0], linear_neighbors.out_features)
#       #using attention coefficients to compute next level features
#         #WE NEED TO RECOMPUTE THE RESULTS OF PASSING THROUGH THE LINEARS BECAUSE THEY WERE ALREADY USED IN PASSING THROUGH ATTENTION
#       node_neighbor_linout_feats=linear_neighbors(current_feats[list(adj_dict[0])])
#       #normalising first node attentions
#       attention_coefficients[:node_neighbors_indeces[0]]=F.softmax(attention_coefficients[:node_neighbors_indeces[0]], dim=0)
#       next_feats[0]=torch.mm(attention_coefficients[:node_neighbors_indeces[0]].view(1, node_neighbors_indeces[0]),
#                             node_neighbor_linout_feats.view(node_neighbors_indeces[0], -1))
#       for node in range(1,len(adj_dict.keys())):
#         #I want dot product between attentions of current node and the outputs of current node neighbors through through linear
#         #To use mathmul, attentions need to be shape 1 x no_neighbors_current_node
#         #                outputs through linears need to be shape no_neighbors_current_node x output_features_of_linear_layer

#         #normalising:
#         attention_coefficients[node_neighbors_indeces[node-1]:node_neighbors_indeces[node]]=F.softmax(attention_coefficients[node_neighbors_indeces[node-1]:node_neighbors_indeces[node]], dim=0)

#         #getting output feats again:
#         node_neighbor_linout_feats=linear_neighbors(current_feats[list(adj_dict[node])])

#         #computation:
#         next_feats[node]=torch.mm(attention_coefficients[node_neighbors_indeces[node-1]:node_neighbors_indeces[node]].view(1, node_neighbors_indeces[node]-node_neighbors_indeces[node-1]),
#                                     node_neighbor_linout_feats.view(node_neighbors_indeces[node]-node_neighbors_indeces[node-1],-1))
#       current_feats=next_feats
#       return attention_coefficients
#       print(f'final feats out of layer {i} have shape {current_feats.shape}')
#       current_feats=F.relu(current_feats)
#     return current_feats

Batches will consist of the features of all the nodes needed **for all iterations**, but features will be augmented with a dimension that tells the actual node they correspond to (when making a batch-feature tensor, the node certain features pertain to will no longer be the row number of those features). The model receives an adjacency dict **only of the nodes needed for all iterations** and the features corresponding to that iteration. The batch adjacency dict will be constructed in the train_epoch function, while the batch features will be constructed in a separate function which will (hopefully) act as a loader. I'll also have a torch loader just for ids so I can get batches in a random way.

### Class def

In [None]:
class GAT(nn.Module):
  def __init__(self, no_iters, in_features_size, embedding_size, no_classes):
    """
    2 attentions for each iteration, one for neighbors, the other for nodes; then summed in forward
    """
    super().__init__()
    self.no_iters=no_iters
    self.emb_size=embedding_size
    self.feature_size=in_features_size
    linears_for_neighbors=[]
    linears_for_self=[]
    attentions1=[]
    attentions2=[]
    cur_size=in_features_size*2
    for i in range(no_iters-1):
        if i==0:
          linears_for_neighbors.append(nn.Linear(in_features_size, cur_size, bias=False))
          # linears_for_self.append(nn.Linear(in_features_size, cur_size, bias=False))
          attentions1.append(nn.Linear(cur_size, 1, bias=False))
          attentions2.append(nn.Linear(cur_size, 1, bias=False))
          # attentions.append(nn.Linear(in_features_size*2, 1)) #variant WITHOUT W
        else:
          linears_for_neighbors.append(nn.Linear(cur_size, cur_size, bias=False))
          # linears_for_self.append(nn.Linear(cur_size, cur_size, bias=False))
          #attentions has to have 2*OUTPUT size of linears_for_neighbors
          attentions1.append(nn.Linear(cur_size, 1, bias=False))
          attentions2.append(nn.Linear(cur_size, 1, bias=False))
        # cur_size=cur_size*2
    # linears_for_neighbors.append(GCNFeatureEmbedder(cur_size, embedding_size, bias=False))
    linears_for_neighbors.append(nn.Linear(cur_size, embedding_size, bias=False))
    # linears_for_self.append(nn.Linear(cur_size, embedding_size, bias=False))
    attentions1.append(nn.Linear(embedding_size, 1, bias=False))
    attentions2.append(nn.Linear(embedding_size, 1, bias=False))
    # attentions.append(nn.Linear(cur_size*2,1)) #variant WITHOUT W
    self.linears_for_neighbors=nn.ModuleList(linears_for_neighbors)
    self.attentions1=nn.ModuleList(attentions1)
    self.attentions2=nn.ModuleList(attentions2)
    self.classifier=nn.Linear(embedding_size, no_classes, bias=False)
  
  def forward(self, feats: torch.Tensor, adj_matrix: torch.Tensor):
    """
    -feats are all the features of all nodes in a connected component
    -adj_matrix is the adjacency matrix of that component (has dimension no_nodes_in_component^2)
    """
    #START THE ITERATIONS
    current_feats=feats
    for i in range(len(self.linears_for_neighbors)) :
      #an iteration starts
      proj_neighbors=self.attentions1[i](self.linears_for_neighbors[i](current_feats))
      proj_nodes=self.attentions2[i](self.linears_for_neighbors[i](current_feats))
      #proj_neighbors and _nodes will both be of size no_nodes_in_batch x 1
      attention_coefs=torch.add(proj_neighbors.view(1, -1), proj_nodes.view(-1, 1))

      #now element attention_coefs[i, j] is attention of node j seen as (potential) neighbor of node i
      #lin i of attention_coefs will be coefs associate with node i (coefs of neighbors)

      #masking again: (element wise matrix multiplication with adjacency matrix, which is masked with -inf where there are no links for softmax - OUTSIDE FWD)
      
      # attention_coefs=torch.mul(adj_matrix, attention_coefs)
      # nans=torch.isnan(attention_coefs)
      # infs=torch.isinf(attention_coefs)
      # if torch.any(nans):
      #   attention_coefs[nans]=float('-inf') #the 0's in the attention_coefs matrix that were multiplied by -inf
      # if torch.any(infs):
      #   attention_coefs[infs]=float('-inf') #the negative numbers in the coefs matrix that were multiplied by -inf => +inf
      
      attention_coefs[adj_matrix==0]=float('-inf')
      
      #passing through leakyReLU
      leaky=torch.nn.LeakyReLU(0.2)
      attention_coefs=leaky(attention_coefs)


      attention_coefs=torch.softmax(attention_coefs, dim=1) #dim=1 i.e. softmax along each ROW

      current_feats=torch.mm(attention_coefs, self.linears_for_neighbors[i](current_feats))
      # if (current_feats.shape[0]==1): print(f'iteration {i} feats: {current_feats}')
    return current_feats
  def classify(self, embedded):
    #no activation here because it already went through activation at previous level and it will go through sigmo once outputed
    out=self.classifier(embedded)
    return out

## Dummy example to test on

In [None]:
#dummy example
dum_feats=torch.FloatTensor([[1, 1, 0, 1], [1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 0, 1, 0], [1, 0, 1, 0]])
dum_classes=torch.FloatTensor([[1, 0], [1, 1], [1, 0], [0,0], [1, 1], [0, 1]])
dum_adj_matrix=torch.FloatTensor([[1, 0, 1, 1, 0, 0], [0, 1, 0, 0, 0, 1],
                                  [1, 1, 1, 0, 0, 0], [0, 1, 0, 1, 0, 1],
                                  [0, 0, 0, 1, 1, 0], [0, 0, 1, 0, 1, 1]
                                  ])
dum_adj_dict={0: [0, 2, 3], 1: [1, 5], 2:[0, 1, 2], 3:[1, 3, 5], 4:[3,4], 5:[2,4,5]}
print(dum_adj_dict)

In [None]:
dum_model=GAT(2, dum_feats.shape[-1], 100, dum_classes.shape[-1])
dum_optim=torch.optim.Adam(dum_model.parameters(), lr=10e-4)

In [None]:
a=dum_model(dum_feats,dum_adj_matrix)
print(a)

In [None]:
for e in range(1):
  loss_func=torch.nn.BCEWithLogitsLoss()
  dum_optim.zero_grad()
  dum_embeddings=dum_model(dum_feats, dum_adj_dict)
  # dum_loss=torch.sum(dum_embeddings)
  dum_preds=dum_model.classify(dum_embeddings)
  dum_loss=loss_func(dum_preds, dum_classes)
  print(torchviz.make_dot(dum_loss))
  dum_loss.backward()
  with torch.no_grad():
    dum_optim.step()
    print(f'epoch {e}')
    print()
    # print(dum_embeddings)
    print(dum_preds)
    print(dum_classes)
    print(dum_loss.item())
    print('\n\n')

##Train model

### Old functions backup

In [None]:
# def train_epoch_gat(model:GAT, optim, adj_dict, feats, train_loader, class_map, device):
#   model=model.to(device)
  
#   loss_func=torch.nn.BCEWithLogitsLoss()
#   optim.zero_grad()

#   epoch_loss=0.
#   epoch_score=0.
#   for component_id in range(len(adj_matrices_comps)):
#     batch_node_ids=[elem.item() for elem in batch_node_ids]
#     batch_focus_ids, batch_features, index_mapping = get_batch_forward_information(model.no_iters, batch_node_ids, adj_dict, feats)
    
#     #model takes as forward args: node_features: torch.Tensor, index_mapping: dict, adj_dict: dict, focus_ids: list
#     batch_preds, returned_index_mapping=model(batch_features, index_mapping, adj_dict, batch_focus_ids)
#     batch_preds=model.classify(batch_preds)
#     print('classified')

#     #we need to recover the batch preds ordering. we know that the batch classes are ordered according to the batch_node_ids
#     #we need to order the batch_preds accordingly as well
#     ordered_batch_preds=torch.Tensor(batch_preds.shape)
#     for row_index in range(len(batch_node_ids)):
#       ordered_batch_preds[row_index]=batch_preds[returned_index_mapping[batch_node_ids[row_index]]]

#     loss=loss_func(ordered_batch_preds, batch_classes)
#     print(f'loss computed at batch {batch_no} is {loss.item()}')
#     loss.backward()
#     print('backward complete')
#     with torch.no_grad():
#       optim.step()
#       epoch_loss+=loss.item()
#       no_batches+=1
#       score=get_accuracy(torch.sigmoid(ordered_batch_preds), batch_classes)
#       epoch_score+=score
#   return epoch_loss/no_batches, epoch_score/no_batches

In [None]:
# def get_batch_forward_information(no_iters, batch_node_ids, adj_dict, feats):
#   """
#   function that returns:
#   -features of leaves nodes for all trees of nodes in batch_node_ids
#   -a list of lists of node ids corresponding to each level of the reunion of the trees of nodes in batch_node_ids
#   -index_mapping: a map of the actual node ids in batch_node_ids to rows of features

#   feats is the entire features matrix
#   """
#   #we want to get the focus nodes for the no_iters of the model
#   #farthest first, nearest last; last focus nodes will be nodes in batch, first ones will be the ones just before leaves
#   batch_focus_ids=[batch_node_ids]
#   for i in range(1,no_iters):
#     iteration_nodes=set([])
#     for node_id in batch_focus_ids[0]:
#       iteration_nodes=iteration_nodes | set(adj_dict[node_id])
#     batch_focus_ids.insert(0, list(iteration_nodes))
#     assert (len(set(batch_focus_ids[0]) & set(batch_focus_ids[1])) == len(set(batch_focus_ids[1])) ), "Not all nodes were kept for next iteration"
#   for i in range(len(batch_focus_ids)):
#     print(f'there are {len(batch_focus_ids[i])} nodes for iteration {i}')
#   print('\n\n')
#   #now on position 0 of batch_focus_ids we have the direct fathers of nodes that have just leaves; we also want the batch features
#   #the batch features will just be the features of the leaves, because the adj_dict was constructed such that every node has itself as its' neighbor

#   leaf_nodes=set([])
#   for node_id in batch_focus_ids[0]:
#     leaf_nodes=leaf_nodes | set(adj_dict[node_id])

#   batch_features=torch.Tensor(len(leaf_nodes), model.feature_size)
#   index_mapping={}
#   current_row=0
#   for node in list(leaf_nodes):
#     batch_features[current_row]=feats[node]
#     index_mapping[node]=current_row
#     current_row+=1
#   return batch_focus_ids, batch_features, index_mapping

### Train loader - NO LONGER NEEDED

In [None]:
def make_batch_loader_gat(ids_selector: list, id_map: dict, features: np.array, class_map: dict, batch_size: int, shuffle, device):
  """
  id_map: only used for obtaining the right line in features
  ids_selector: list of ids that are in the wanted dataset
  """
  out_classes=torch.FloatTensor([class_map[str(idx)] for idx in ids_selector])
  out_classes=out_classes.to(device)
  from torch.utils.data import TensorDataset
  from torch.utils.data import DataLoader
  dataset = TensorDataset(torch.tensor(ids_selector), out_classes)
  data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=False)
  return data_loader

In [None]:
train_loader=make_batch_loader_gat(ids_selector=train_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=300, shuffle=True, device=device)
# val_loader=make_batch_loader_gat(ids_selector=val_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=len(val_ids), shuffle=False, device=device)
# test_loader=make_batch_loader_gat(ids_selector=test_ids, id_map=id_map, features=feats, class_map=class_map, batch_size=len(test_ids), shuffle=False, device=device)

### Score function

In [None]:
def get_accuracy(predictions, labels, probability=0.5):
  """
  predictions are of the shape no_nodes x no_classes, representing probability of each node being in each class
  labels are the same shape only binary

  average='weighted' is used in f1_score because it accounts for possible class imbalances
  """
  assert predictions.shape == labels.shape, "Predictions and labels don't have same shape: "+{predictions.shape}+" vs. "+{labels.shape}+" respectively."
  from sklearn.metrics import f1_score
  cutoff=(predictions>probability).type(torch.FloatTensor)
  score=f1_score(y_true=labels, y_pred=cutoff, average='weighted')
  return(score)

### Train functions

In [None]:
def train_epoch_gat(model:GAT, optim, connected_comps, feats_comps, adj_matrices_comps, 
                    components_train_indexes, components_classes_train, no_train_comps, device):
  model=model.to(device)
  loss_func=torch.nn.BCEWithLogitsLoss()
  optim.zero_grad()

  epoch_train_loss=0.

  for component_id in range(len(connected_comps)):
    if components_train_indexes[component_id]:
      comp_preds=model(feats_comps[component_id], adj_matrices_comps[component_id])
      comp_preds=model.classify(comp_preds)

      comp_preds_train=comp_preds[components_train_indexes[component_id]]
      print(comp_preds_train)
      comp_classes_train=components_classes_train[component_id]
      print(comp_classes_train)
      
      train_loss=loss_func(comp_preds_train, comp_classes_train.type(torch.FloatTensor))

      train_loss.backward()

      with torch.no_grad():
        optim.step()
        epoch_train_loss+=train_loss.item()

    
  return epoch_train_loss/no_train_comps

In [None]:
def train_gat_model(model, optim, components_separated_data, no_epochs, save_identifier, model_type, save_every):
  components_train_indexes=components_separated_data.components_train_indexes
  components_val_indexes=components_separated_data.components_val_indexes
  components_test_indexes=components_separated_data.components_test_indexes
  components_classes_train=components_separated_data.components_classes_train
  components_classes_val=components_separated_data.components_classes_val
  components_classes_test=components_separated_data.components_classes_test
  no_train_comps=components_separated_data.no_train_comps
  no_val_comps=components_separated_data.no_val_comps
  no_test_comps=components_separated_data.no_test_comps
  for e in range(no_epochs):
    epoch_train_loss = train_epoch_gat(model, optim, connected_comps, feats_comps, adj_matrices_comps,
                                       components_train_indexes, components_classes_train, no_train_comps, device)
    with torch.no_grad():
      if e%save_every==0:
        torch.save(model.state_dict(), basepath+model_type+save_identifier)
     
      #adding to total vectors for plot
      epoch_train_score=0.
      epoch_val_loss=0.
      epoch_val_score=0.
      epoch_test_loss=0.
      epoch_test_score=0.
      for component_id in range(len(connected_comps)):
        comp_preds=model(feats_comps[component_id], adj_matrices_comps[component_id])
        comp_preds=model.classify(comp_preds)
        loss_func=torch.nn.BCEWithLogitsLoss()

        if components_train_indexes[component_id]:
            comp_preds_train=comp_preds[components_train_indexes[component_id]]
            comp_classes_train=components_classes_train[component_id]
            train_score=get_accuracy(torch.sigmoid(comp_preds_train), comp_classes_train)
            epoch_train_score+=train_score

        
        if components_val_indexes[component_id]:
            comp_preds_val=comp_preds[components_val_indexes[component_id]]
            comp_classes_val=components_classes_val[component_id]
            val_loss=loss_func(comp_preds_val, comp_classes_val.type(torch.FloatTensor))
            epoch_val_loss+=val_loss.item()
            val_score=get_accuracy(torch.sigmoid(comp_preds_val), comp_classes_val)
            epoch_val_score+=val_score

        if components_test_indexes[component_id]:
            comp_preds_test=comp_preds[components_test_indexes[component_id]]
            comp_classes_test=components_classes_test[component_id]
            test_loss=loss_func(comp_preds_test, comp_classes_test.type(torch.FloatTensor))
            epoch_test_loss+=test_loss.item()
            test_score=get_accuracy(torch.sigmoid(comp_preds_test), comp_classes_test)
            epoch_test_score+=test_score

      epoch_train_score/=no_train_comps
      epoch_val_loss/=no_val_comps
      epoch_val_score/=no_val_comps
      epoch_test_loss/=no_test_comps
      epoch_test_score/=no_test_comps
      print(f'epoch {e} train loss {epoch_train_loss} and score {epoch_train_score}')
      train_every_epoch_loss.append(epoch_train_loss)
      val_every_epoch_loss.append(epoch_val_loss)
      test_every_epoch_loss.append(epoch_test_loss)
      train_every_epoch_score.append(epoch_train_score)
      val_every_epoch_score.append(epoch_val_score)
      test_every_epoch_score.append(epoch_test_score)
      if e%save_every==0:
        import pickle
        with open(basepath+model_type+'_train_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(train_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_val_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(val_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_test_losses_'+save_identifier, 'wb') as filehandle:
          pickle.dump(test_every_epoch_loss, filehandle)
        with open(basepath+model_type+'_train_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(train_every_epoch_score, filehandle)
        with open(basepath+model_type+'_val_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(val_every_epoch_score, filehandle)
        with open(basepath+model_type+'_test_scores_'+save_identifier, 'wb') as filehandle:
          pickle.dump(test_every_epoch_score, filehandle)
        

### Actual train

In [None]:
model=GAT(3, feats.shape[-1], 200, no_classes)

In [None]:
optim=torch.optim.Adam(model.parameters(), lr=10e-5)

In [None]:
no_epochs=1000

In [None]:
save_identifier=str(model.emb_size)+'_3layer_'+'nobias'+'_intermtwiceinput'
model_type='gat'

In [None]:
train_every_epoch_loss=[]
val_every_epoch_loss=[]
test_every_epoch_loss=[]
train_every_epoch_score=[]
val_every_epoch_score=[]
test_every_epoch_score=[]

In [None]:
train_gat_model(model, optim, components_separated_data, no_epochs, save_identifier, model_type='gat', save_every=5)

In [None]:
model.load_state_dict(torch.load(basepath+'gat'+save_identifier))

Plot

In [None]:
plot_losses_n_scores(train_every_epoch_loss, val_every_epoch_loss, test_every_epoch_loss, 'hassh', 0,
         train_every_epoch_score, val_every_epoch_score, test_every_epoch_score)

Load back

In [None]:
!ls "$basepath"

In [None]:
save_identifier="150_2layer_nobias_intermtwiceinput"

In [None]:
import pickle
with open(basepath+model_type+'_train_losses_'+save_identifier, 'rb') as filehandle:
  train_every_epoch_loss=pickle.load(filehandle)
with open(basepath+model_type+'_val_losses_'+save_identifier, 'rb') as filehandle:
  val_every_epoch_loss=pickle.load(filehandle)
with open(basepath+model_type+'_test_losses_'+save_identifier, 'rb') as filehandle:
  test_every_epoch_loss=pickle.load(filehandle)
with open(basepath+model_type+'_train_scores_'+save_identifier, 'rb') as filehandle:
  train_every_epoch_score=pickle.load(filehandle)
with open(basepath+model_type+'_val_scores_'+save_identifier, 'rb') as filehandle:
  val_every_epoch_score=pickle.load(filehandle)
with open(basepath+model_type+'_test_scores_'+save_identifier, 'rb') as filehandle:
  test_every_epoch_score=pickle.load(filehandle)

In [None]:
print(train_every_epoch_loss)
print(train_every_epoch_score)
print(val_every_epoch_loss)
print(val_every_epoch_score)
print(test_every_epoch_loss)
print(test_every_epoch_score)