In [1]:
%load_ext autoreload
%autoreload 2

import os
import h5py
import pickle
import time
from pathlib import Path

import argparse
import numpy as np
import seaborn as sns
import networkx as nx
import torch
import torch.nn as nn
import pytorch_lightning as pl
import pytorch_lightning.loggers as pl_loggers
import torch_geometric
from torch_geometric.utils import from_networkx, to_networkx
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

import florah
from florah_analysis import envs, utils, preprocess, sampling
from florah_analysis import tree_utils

from ml_collections import config_flags
from absl import flags, logging

import training_utils, models, generators

In [2]:
_CONFIG = config_flags.DEFINE_config_file('my_config')
FLAG = flags.FLAGS

AttributeError: config

In [None]:
dset_path = '/mnt/ceph/users/tnguyen/florah/datasets/experiments/GUREFT05-Nanc1.pkl'
with open(dset_path, 'rb') as f:
    data = pickle.load(f)

# convert networkx to pytorch geometric
data = [from_networkx(d) for d in data]

def prepare_dataloader(data, train_frac=0.8, batch_size=1024, num_workers=1):

    num_total = len(data)
    num_train = int(num_total * train_frac)

    np.random.shuffle(data)

    # calculate the normaliziation statistics
    x = torch.cat([d.x for d in data[:num_train]])
    x_mean = x.mean(dim=0)
    x_std = x.std(dim=0)
    norm_dict = {"x_mean": x_mean.numpy(), "x_std": x_std.numpy()}
    for d in data:
        d.x = (d.x - x_mean) / x_std

    train_loader = DataLoader(
        data[:num_train], batch_size=batch_size, shuffle=True, 
        num_workers=num_workers, pin_memory=True)
    val_loader = DataLoader(
        data[num_train:], batch_size=batch_size, shuffle=False, 
        num_workers=num_workers, pin_memory=True)

    return train_loader, val_loader

In [528]:

model = generators.SequenceClassifier(
    input_size=input_size, 
    num_classes=num_classes,
    sum_features=sum_features, 
    num_samples_per_graph=num_samples_per_graph,
    d_time=d_time, 
    featurizer_args=feautrizer_args,
    classifier_args=classifier_args,
    optimizer_args=optimizer_args,
    scheduler_args=scheduler_args,
)

In [529]:
max_epochs = 1000
max_steps = 100000
patience = 50
grad_clip = 0.5
log_dir = 'logs/'
name = 'test'

callbacks = [
    pl.callbacks.EarlyStopping(
        monitor='val_loss', patience=patience, mode='min', verbose=True),
    pl.callbacks.ModelCheckpoint(
        monitor='val_loss', save_top_k=5, mode='min',
        save_weights_only=False)
    pl.callbacks.LearningRateMonitor("epoch"),
]
logger=pl_loggers.TensorBoardLogger(log_dir, name=name)
trainer = pl.Trainer(
    callbacks=callbacks,
    logger=logger,
    max_epochs=max_epochs,
    gradient_clip_val=grad_clip,
)
trainer.fit(model, train_loader, val_loader,)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs/test
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                  | Params
----------------------------------------------------------
0 | featurizer      | TransformerFeaturizer | 397 K 
1 | time_projection | Linear                | 256   
2 | classifier      | MLPClassifier         | 49.7 K
----------------------------------------------------------
447 K     Trainable params
0         Non-trainable params
447 K     Total params
1.791     Total estimated model params size (MB)


Epoch 53: 100%|██████████| 125/125 [00:07<00:00, 15.93it/s, v_num=0, train_loss_step=0.311, train_acc_step=0.857, val_loss_step=0.473, val_acc_step=0.833, val_loss_epoch=0.426, val_acc_epoch=0.925, train_loss_epoch=0.181, train_acc_epoch=0.938] 
