In [None]:
import numpy as np
import time
import random
from copy import deepcopy
import datetime
from load_data import load_data
from basic_GNN import HGNN
from eval_tools import evaluate_results_nc
from dqn_agent import DQNAgent
from gnn_env import gnn_env
from utils import weights_init

import torch

SEED = random.randint(1,10000001)
# SEED = 8684795 # 123 4329
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
        torch.cuda.manual_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('SEED', SEED)

import warnings
warnings.filterwarnings('ignore')
# %matplotlib inline

In [None]:
data_name = 'IMDB'
num_train = 0 # 0 5 10 15 20
adam_lr = 0.005 # 0.01 0.001 0.005
agent_lr = 1e-4 # 0.01 0.001 0.005
weight_decay = 0.0001 # 5e-4, 7e-4
batch_size = 4278 # 128 2048 4278
hid_dim = 256
# state_mode = 2 # 1-node feature 2-unique OH feature
agent_batch_coef = 5
agent_replay_memory_coef = 100
agent_norm_sample = 10
agent_action_mode = 1 # 1-action on M nodes, 2-action on All nodes
walk_length = 3 # 2, 3, 4, 5, 6, 7
max_timesteps = 1 # 1 10
max_episodes = 325 # 200 325
local_test = False
pretrain_gnn = False # 1-True 2-False
gnn_type = 'GAT' # GCN GAT NONE 
gnn_layers = 2
agg_type = 2 # 1: agg all steps once; 2: agg by step
rnn_type = 'NONE' # gru lstm bi-gru bi-lstm
dropout = 0.5 # 0-1
act_type = 'relu' # relu elu
reward_mode = 'acc p' # dis-eu p acc
reward_coef = 100 # 100
baseline_experience = 50
agent_mlp = [128, 64, 32, 16] # [32, 64, 128, 64, 32]
agent_mode = 2 # 0-random 1-by-batch 2-by-item
early_stop = 100

In [None]:
model_config={'data_name': data_name,
              'local_test': local_test,
              # 'state_mode': state_mode,
              'pretrain_gnn': pretrain_gnn,
              'num_train': num_train,
              'walk_length': walk_length,
              'batch_size': batch_size,
              'agent_batch_coef': agent_batch_coef,
              'agent_replay_memory_coef': agent_replay_memory_coef,
              'agent_norm_sample': agent_norm_sample,
              'agent_action_mode': agent_action_mode,
              'max_timesteps': max_timesteps,
              'max_episodes': max_episodes,
              'reward_mode': reward_mode,
              'reward_coef': reward_coef,
              'baseline_experience': baseline_experience,
              'adam_lr': adam_lr, # 1e-2, 5e-3
              'agent_lr': agent_lr,
              'weight_decay': weight_decay, 
              # 'num_diff_actions': num_diff_actions,
              'hid_dim': hid_dim,
              # 'jump_mode': jump_mode,
              # 'str_mode': str_mode,
              'gnn_type': gnn_type,
              'gnn_layers': gnn_layers,
              'agg_type': agg_type,
              'rnn_type': rnn_type,
              'dropout': dropout,
              'act_type': act_type,
              'agent_mlp': agent_mlp,
              'agent_mode': agent_mode,
              'early_stop': early_stop,
              'SEED': SEED}
if torch.cuda.is_available():
    model_config['use_cuda'] = True
else:
    model_config['use_cuda'] = False

if num_train == 0:
    semisup = True
    model_config['semisup'] = True
else:
    semisup = False
    model_config['semisup'] = False

print('Parameters of Model are:')
for _ in model_config:
    print(_, model_config[_])
    
# set up device
device = torch.device('cuda:'+str(0) if model_config['use_cuda'] else 'cpu')
model_config['device'] = device

In [None]:
# load data
data, adj_graph = load_data(
    data_name = data_name, num_train = num_train,
    local_test = local_test, device = device,
)

In [None]:
model = HGNN(
    pretrain_gnn=pretrain_gnn,
    walk_length=walk_length,
    num_relations=data.num_relation,
    feat_dim=data.x.shape[1],
    hid_dim=hid_dim,
    out_dim=data.y.unique().shape[0],
    gnn_type=gnn_type,
    gnn_layers=gnn_layers,
    agg_type=agg_type,
    rnn_type=rnn_type,
    dropout=dropout, 
    act_type=act_type,
    device=device
)

# release gpu memory
torch.cuda.empty_cache()
model = model.to(device)
model.apply(weights_init)
print(model)

In [None]:
env = gnn_env(
    model, data, model_config, adj_graph, '', device
)
env.seed(SEED)

In [None]:
agent = DQNAgent(
    scope='dqn',
    action_num=env.action_num,
    step_num=env.walk_length,
    replay_memory_size=agent_replay_memory_coef * data.val_mask.sum().tolist(), # Size of the replay memory 20000
    update_target_estimator_every=50, # 100 1000 # Copy parameters from the Q estimator to the target estimator every N steps 1000
    discount_factor=0.99, # Gamma discount factor 0.99
    epsilon_start=1.0, # Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value 1.0 
    epsilon_end=0.05, # The final minimum value of epsilon after decaying is done 0.1
    epsilon_decay_steps=max_episodes // 2, # Number of steps to decay epsilon over 20000
    batch_size=agent_batch_coef * data.val_mask.sum().tolist(), # 32
    norm_sample= agent_norm_sample * data.val_mask.sum().tolist(), # The number of the sample used form noramlize state 100
    learning_rate=model_config['agent_lr'], # 0.0005
    state_shape = env.observation_space.shape,
    mlp_layers=agent_mlp, # [32, 64, 128, 64, 32]
    agent_mode=agent_mode, # 0-random 1-by-batch 2-by-item
    device=torch.device('cpu')
)
agent.seed(SEED)
env.policy = agent

In [None]:
best_episode, best_test, best_val = 0, 0, 0

# Training: Learning meta-policy
print("Training Meta-policy on Validation Set")
for i_episode in range(1, max_episodes+1):
    print('\nStart episode {}.'.format(i_episode))
    if i_episode == 1:
        while agent.total_t < agent.norm_sample:
            print('Charging agent normalizer...')
            start = time.time()
            loss, val_acc, test_acc, test_emb, test_label = agent.learn(env, max_timesteps)
        print('Agent normalizer charging is done.\n')
        while len(agent.memory.memory) < agent.memory.batch_size:
            print('Charging agent memory...')
            loss, val_acc, test_acc, test_emb, test_label = agent.learn(env, max_timesteps)
        print('Agent memory charging is done.\n')
        agent.ready_train = True
    loss, val_acc, test_acc, test_emb, test_label = agent.learn(env, max_timesteps)
    if val_acc > best_val: # check whether gain improvement on validation set
        best_policy = deepcopy(agent) # save the best policy
        best_val = val_acc
        best_test = test_acc
        best_episode = i_episode
        if semisup:
            best_macro, best_micro = evaluate_results_nc(
                embeddings=test_emb, 
                labels=test_label, 
                num_classes=data.y.unique().shape[0]
            )
    print("Training Meta-policy: {}, DQN Loss:{:.5}, Val_Acc: {:.5f}, Test_Acc: {:.5f}".\
                  format(i_episode, loss, val_acc, test_acc))    
    print("Best Episode {}, Val_Acc: {:.5f}, Test_Acc: {:.5f}".format(
        best_episode, best_val, best_test)
    )
    if semisup:
        print('Best Micro-F1: ' + ', '.join(
            ['{:.6f}~{:.6f} ({:.1f})'.format(micro_f1_mean, micro_f1_std, train_size) for
                        (micro_f1_mean, micro_f1_std), train_size in
                        zip(best_micro, [0.8, 0.6, 0.4, 0.2])]
        ))
        print('Best Macro-F1: ' + ', '.join(
            ['{:.6f}~{:.6f} ({:.1f})'.format(macro_f1_mean, macro_f1_std, train_size) for
                        (macro_f1_mean, macro_f1_std), train_size in
                        zip(best_macro, [0.8, 0.6, 0.4, 0.2])]
        ))