# Introduction

This documentation shows how to train a neural network on the QM9 dataset and visualize the network with PiNNboard

In [None]:
import os
import yaml
import tensorflow as tf

from glob import glob
from pinn.models import potential_model
from pinn.networks import pinet
from pinn.utils import get_atomic_dress
from pinn.io import load_qm9, sparse_batch
from tensorboard_plugin_pinnboard.summary import pinnboard_summary
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Read the dataset 

In [None]:
filelist = glob('/home/yunqi/datasets/QM9/dsgdb9nsd/*.xyz')

with open('/home/yunqi/datasets/QM9/3195404') as f:
    lines = f.readlines()
out = [int(l.split()[0]) for l in lines[9:-1]]
filelist = [i for i in filelist if int(i[-10:-4]) not in out]

print("{} structures in total.".format(len(filelist)))
dataset = lambda: load_qm9(filelist, split={'train':8, 'test':2}, seed=0)
dress, error = get_atomic_dress(dataset()['train'].apply(sparse_batch(100)),[1,6,7,8,9], max_iter=10)

# Configure the training process

Note the `keep_checkpoint_max` is set to `None` in the RunConfig, 
this keeps every checkpoint so that we can use it later to
produce the visualization.

In [None]:
params={
    'model_dir': 'PiNet_QM9',
    'network': 'pinet',
    'network_params': {
        'ii_nodes':[5],
        'pi_nodes':[5],
        'pp_nodes':[5],
        'en_nodes':[3, 3],
        'depth': 2,
        'rc': 4.5,
        'basis_type': 'gaussian',
        'n_basis': 10,
        'atom_types': [1, 6, 7, 8, 9]},
    'model_params':{
        'e_scale': 627.5,
        'e_dress': dress,
        'learning_rate':3e-3,
        'decay_rate': 0.994,
    }}

config = tf.estimator.RunConfig(log_step_count_steps=5000,
                                keep_checkpoint_max=None,
                                save_summary_steps=5000,
                                save_checkpoints_secs=60)


pre_fn = lambda tensors: pinet(tensors, preprocess=True, **params['network_params'])
train = lambda: dataset()['train'].apply(sparse_batch(100)).map(pre_fn).cache().repeat().shuffle(500)
test = lambda: dataset()['test'].apply(sparse_batch(100)).map(pre_fn)
                         
train_spec = tf.estimator.TrainSpec(input_fn=train, max_steps=3e6)
eval_spec = tf.estimator.EvalSpec(input_fn=test, throttle_secs=300)

model = potential_model(params, config)
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

# Write the summary

Load all the checkpoints and write a summary for each of them.

*This might not be the best way to do this. It should also be possible to save the automatically during training and evaluation.*

In [None]:
all_ckpt = ['.'.join(s.split('.')[:-1]) for s in glob('PiNet_QM9/*ckpt*index')]
all_ckpt.sort(key=lambda x: int(x.split('-')[-1]))
params = yaml.load(open('PiNet_QM9/params.yml'))

tf.reset_default_graph()

# We only take the first 10 elements in the test set and use them for the visualization
tensors = dataset()['test'].apply(sparse_batch(10)).take(1).repeat().make_one_shot_iterator().get_next()
pinet(tensors, **params['network_params'])

summary_ops = pinnboard_summary(params)
writer = tf.summary.FileWriter('PiNet_QM9/pinnboard')
sess = tf.Session()
 
for (i,ckpt) in enumerate(all_ckpt):
    tf.train.init_from_checkpoint(ckpt, {'/':'/'})
    sess.run(tf.global_variables_initializer())
    summary = sess.run(summary_ops)
    for s in summary.values():
        writer.add_summary(s, i)
writer.close()  
sess.close()