In [1]:
pwd

'/home/yang1641/PINNACLE'

In [4]:
cd pinnacle

/home/yang1641/PINNACLE/pinnacle


In [5]:
pwd

'/home/yang1641/PINNACLE/pinnacle'

In [7]:
# General
import numpy as np
import random
import argparse
import os
import copy

# Pytorch
import torch
import torch.nn as nn
from torch_geometric.utils.convert import to_networkx, to_scipy_sparse_matrix
from torch_geometric.data import Data
from torch_geometric.utils import negative_sampling

# Center loss
from center_loss import CenterLoss

# W&B
import wandb

# Own code
from generate_input import read_data, get_metapaths, get_centerloss_labels
import model as mdl
import utils
import minibatch_utils as mb_utils
from parse_args import get_args, get_hparams

In [8]:
# Seed
seed = 3
print("SEED:", seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) 
np.random.seed(seed)
random.seed(seed)
# torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

SEED: 3


In [9]:
torch.cuda.is_available()

True

In [20]:
# see https://github.com/mims-harvard/PINNACLE/blob/main/pinnacle/run_pinnacle.sh
# and https://github.com/mims-harvard/PINNACLE/blob/main/pinnacle/parse_args.py

args_manual = {
    'pc_att_channels': 16,
    'feat_mat': 1024, 
    'output': 16,
    'hidden': 64,
    'lr': 0.01,
    'wd': 5e-4,
    'dropout': 0.6,
    'gradclip': 1.0,
    'n_heads': 8,
    'lmbda': 0.1,
    'theta': 0.3,
    'lr_cent': 0.1,
    'loss_type': "BCE",
    'plot': False,
    'G_f': '/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/global_ppi_edgelist.txt', 
    'ppi_dir': '/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/ppi_edgelists/', 
    'mg_f': '/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/mg_edgelist.txt', 
    'epochs': 10, 
    'resume_run': '', 
    'loader': 'graphsaint', 
    'batch_size': 8, 
    'norm': None,
    'save_prefix': '/scratch/gilbreth/yang1641/exome/results/pinnacle/reproduce/epoch10/try1'
              }

In [21]:
import argparse

# Convert dictionary to argparse Namespace
args = argparse.Namespace(**args_manual)

type(args)
print(args)

# Generate hyperparameters
hparams_raw = get_hparams(args)

Namespace(G_f='/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/global_ppi_edgelist.txt', batch_size=8, dropout=0.6, epochs=10, feat_mat=1024, gradclip=1.0, hidden=64, lmbda=0.1, loader='graphsaint', loss_type='BCE', lr=0.01, lr_cent=0.1, mg_f='/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/mg_edgelist.txt', n_heads=8, norm=None, output=16, pc_att_channels=16, plot=False, ppi_dir='/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/ppi_edgelists/', resume_run='', save_prefix='/scratch/gilbreth/yang1641/exome/results/pinnacle/reproduce/epoch10/try1', theta=0.3, wd=0.0005)
Hyperparameters: {'pc_att_channels': 16, 'feat_mat': 1024, 'output': 16, 'hidden': 64, 'lr': 0.01, 'wd': 0.0005, 'dropout': 0.6, 'gradclip': 1.0, 'n_heads': 8, 'lambda': 0.1, 'theta': 0.3, 'lr_cent': 0.1, 'loss_type': 'BCE', 'plot': False}


In [22]:
!mkdir -p /scratch/gilbreth/yang1641/exome/results/pinnacle/reproduce/epoch10

In [23]:
# Define paths using args.save_prefix
save_log = args.save_prefix + "_gnn_train.log"
save_graph = args.save_prefix + "_graph.pkl"
save_model = args.save_prefix + "_model_save.pth"
save_plots = args.save_prefix + "_train_embed_plots.pdf"
save_ppi_embed = args.save_prefix + "_protein_embed.pth"
save_mg_embed = args.save_prefix + "_mg_embed.pth"
save_labels_dict = args.save_prefix + "_labels_dict.txt"

print(f"Saving log file to: {save_log}")


log_f = open(save_log, "w")
log_f.write("Number of epochs: %s \n" % args.epochs)
log_f.write("Save model directory: %s \n" % save_model)
log_f.write("Save embeddings directory: %s, %s \n" % (save_ppi_embed, save_mg_embed))
log_f.write("Save graph: %s \n" % save_graph)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
if device.type == 'cuda': print(torch.cuda.get_device_name(0))
best_val_acc = -1
best_model = None
eps = 10e-4

Saving log file to: /scratch/gilbreth/yang1641/exome/results/pinnacle/reproduce/epoch10/try1_gnn_train.log
Using device: cuda
NVIDIA A30


NVIDIA A30 with CUDA capability sm_80 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 compute_37.
If you want to use the NVIDIA A30 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [24]:
wandb.init(config = hparams_raw, project = "pinnacle", entity = "yang1641-purdue-university")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/yang1641/.netrc


In [25]:
hparams = wandb.config

In [27]:
hparams['feat_mat']

1024

In [29]:
args.G_f

'/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/global_ppi_edgelist.txt'

In [30]:
args.ppi_dir

'/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/ppi_edgelists/'

In [31]:
args.mg_f

'/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/mg_edgelist.txt'

In [36]:
G_f = args.G_f
ppi_dir = args.ppi_dir
mg_f = args.mg_f
feat_mat_dim = hparams['feat_mat']

In [34]:
import glob
from collections import Counter
import pandas as pd
import numpy as np
import random
import networkx as nx
import torch
from torch_geometric.data import Data

In [38]:
from generate_input import split_data, read_ppi, create_data, read_global_ppi, read_data

In [39]:
G = nx.read_edgelist(G_f)
#G = read_global_ppi(G_f)

feat_mat = torch.normal(torch.zeros(len(G.nodes), feat_mat_dim), std=1)

# Read PPI layers
orig_ppi_layers, ppi_layers, ppi_train, ppi_val, ppi_test = read_ppi(ppi_dir)
print("Number of PPI layers:", len(ppi_layers), len(ppi_train), len(ppi_val), len(ppi_test))

# Read metagraph
metagraph = nx.read_edgelist(mg_f, data=False, delimiter = "\t", create_using=nx.DiGraph)
assert nx.is_connected(metagraph.to_undirected())
mg_feat_mat = torch.zeros(len(metagraph.nodes), feat_mat_dim)

Number of PPI layers: 0 0 0 0


In [42]:
ppi_dir

'/scratch/gilbreth/yang1641/exome/data/PINNACLE/networks/ppi_edgelists/'

In [43]:
# inside read_ppi

orig_ppi_layers = dict()
ppi_layers = dict()
ppi_train = dict()
ppi_val = dict()
ppi_test = dict()

In [44]:
glob.glob(ppi_dir + "*edgelist.txt")

[]

In [None]:
for f in glob.glob(ppi_dir + "*edgelist.txt"): # Expected format of filename: <PPI_DIR>/<CONTEXT>.<suffix>

        # Parse name of context
        context = f.split(ppi_dir)[1].split(".")[0].replace("_edgelist", "")
        context = "cluster:" + context.replace("_", " ")

        # Read edgelist
        ppi = nx.read_edgelist(f)

        # Relabel PPI nodes
        mapping = {n: idx for idx, n in enumerate(ppi.nodes())}
        ppi_layers[context] = nx.relabel_nodes(ppi, mapping)
        orig_ppi_layers[context] = ppi
        assert nx.is_connected(ppi_layers[context])

        # Split into train/val/test
        ppi_train[context], ppi_val[context], ppi_test[context] = split_data(len(ppi_layers[context].edges))

In [28]:
# Read data
ppi_data, mg_data, edge_attr_dict, celltype_map, tissue_neighbors, ppi_layers, metagraph = read_data(args.G_f, args.ppi_dir, args.mg_f, hparams['feat_mat'])

Number of PPI layers: 0 0 0 0
Number of nodes: 218 Number of edges: 4018
{}


AssertionError: {'endothelial cell', 'endothelial cell of vascular tree', 'corneal keratocyte', 'hillock cell of prostate epithelium', 'conjunctival epithelial cell', 'transit amplifying cell of large intestine', 'intrahepatic cholangiocyte', 'keratinocyte', 'lacrimal gland functional unit cell', 'melanocyte', 'goblet cell', 'artery endothelial cell', 'ciliated cell', 'pericyte cell', 'naive regulatory t cell', 'schwann cell', 'adipocyte', 'langerhans cell', 'ciliated epithelial cell', 'lymphatic endothelial cell', 'smooth muscle cell', 'respiratory mucous cell', 'vascular associated smooth muscle cell', 'salivary gland cell', 'retinal bipolar neuron', 'cd8-positive, alpha-beta cytotoxic t cell', 'hepatocyte', 'sperm', 'retina horizontal cell', 'liver dendritic cell', 'duodenum glandular cell', 'plasmacytoid dendritic cell', 'cardiac endothelial cell', 'fibroblast of cardiac tissue', 'pulmonary ionocyte', 'limbal stromal cell', 'dn1 thymic pro-t cell', 'fast muscle cell', 'dn4 thymocyte', 'duct epithelial cell', 'erythroid progenitor', 'club cell of prostate epithelium', 'tongue muscle cell', 'type i pneumocyte', 'hillock-club cell of prostate epithelium', 'retinal blood vessel endothelial cell', 'pancreatic beta cell', 'bronchial smooth muscle cell', 'granulocyte', 'cell of skeletal muscle', 'pancreatic alpha cell', 't follicular helper cell', 'muscle cell', 'paneth cell of epithelium of large intestine', 'pancreatic stellate cell', 'connective tissue cell', 'endothelial cell of lymphatic vessel', 'regulatory t cell', 'cd24 neutrophil', 'intestinal crypt stem cell of small intestine', 'small intestine goblet cell', 'radial glial cell', 'mesothelial cell', 'retinal ganglion cell', 'myeloid progenitor', 'epithelial cell of lacrimal sac', 'immune cell', 'pancreatic delta cell', 'skeletal muscle satellite stem cell', 'mucus secreting cell', 'endothelial cell of artery', 'cd141-positive myeloid dendritic cell', 'non-classical monocyte', 'classical monocyte', 'myofibroblast cell', 'fibroblast', 'respiratory goblet cell', 'type i nk t cell', 'ciliary body', 'acinar cell of salivary gland', 'b cell', 'corneal epithelial cell', 'ionocyte', 'immature natural killer cell', 'pancreatic acinar cell', 'myeloid dendritic cell', 'secretory cell', 'lung microvascular endothelial cell', 'basal cell of prostate epithelium', 'cd4-positive helper t cell', 'myeloid cell', 'macrophage', 'nk cell', 'epithelial cell of uterus', 'pancreatic ductal cell', 'nampt neutrophil', 'muller cell', 'memory b cell', 'plasmablast', 'serous cell of epithelium of trachea', 'hematopoietic stem cell', 'intestinal enteroendocrine cell', 'cd8-positive, alpha-beta cytokine secreting effector t cell', 'intestinal crypt stem cell', 'alveolar fibroblast', 'basal cell', 'enterocyte of epithelium of small intestine', 'epithelial cell', 'cardiac muscle cell', 'intestinal tuft cell', 'luminal cell of prostate epithelium', 'myoepithelial cell', 'adventitial cell', 'serous cell of epithelium of bronchus', 'capillary endothelial cell', 'ocular surface cell', 'gut endothelial cell', 'paneth cell of epithelium of small intestine', 'type ii pneumocyte', 'medullary thymic epithelial cell', 'dn3 thymocyte', 'cd1c-positive myeloid dendritic cell', 'monocyte', 'plasma cell', 'pancreatic pp cell', 'bladder urothelial cell', 'myometrial cell', 'microglial cell', 'vein endothelial cell', 'cd4-positive, alpha-beta memory t cell', 'transit amplifying cell of small intestine', 'intestinal crypt stem cell of large intestine', 'stromal cell', 'immature enterocyte', 'mesenchymal stem cell', 'bronchial vessel endothelial cell', 'limbal stem cell', 'kidney epithelial cell', 'enterocyte of epithelium of large intestine', 'tendon cell', 'mature conventional dendritic cell', 'club cell', 'lung ciliated cell', 'dendritic cell', 'eye photoreceptor cell', 'intermediate monocyte', 'mature nk t cell', 'tracheal goblet cell', 'luminal epithelial cell of mammary gland', 'mature enterocyte', 'retinal pigment epithelial cell', 'endothelial cell of hepatic sinusoid', 'large intestine goblet cell', 'double-positive, alpha-beta thymocyte', 'naive thymus-derived cd4-positive, alpha-beta t cell', 'fibroblast of breast'}