In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
from sklearn.metrics import mean_squared_error
import sys
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import ase.io
from amp import Amp
from amp.model.neuralnetwork import NeuralNetwork
from amp.model import LossFunction
import operator
import amptorch
import copy
import matplotlib
from skorch import NeuralNetRegressor
from skorch.dataset import CVSplit
from skorch.callbacks import Checkpoint, EpochScoring
from skorch.callbacks.lr_scheduler import LRScheduler, WarmRestartLR
import skorch.callbacks.base
from amptorch.gaussian import SNN_Gaussian
from amptorch.model import BPNN, CustomMSELoss
from amptorch.skorch_model import AMP
from amptorch.skorch_model.utils import target_extractor
from amptorch.analysis import parity_plot
from torch.utils.data import DataLoader
from torch.nn import init
from skorch.utils import to_numpy
import matplotlib.pyplot as plt
import os
import random
import pandas as pd
from sklearn.model_selection import ShuffleSplit
from torch.optim.lr_scheduler import CosineAnnealingLR
# from amptorch.modifications import AtomsDataset_per_image, CustomMSELoss_per_image, energy_score
from amptorch.data_preprocess import collate_amp


In [4]:
images = ase.io.read('traj_taged_adsorptionenergy.traj', index=':')
def Split(images):
  '''random split'''
  args = (np.arange(len(images)),)
  cv = ShuffleSplit(n_splits=10, random_state=None, test_size=0.2, train_size=None)
  idx_train, idx_test = next(iter(cv.split(*args, groups=None)))
  train_images = [images[index] for index in idx_train]
  test_images = [images[index] for index in idx_test]
  return train_images, test_images
train_images_original, test_images_original = Split(images)
train_images = copy.deepcopy(train_images_original)
test_images = copy.deepcopy(test_images_original)

In [5]:

Gs = {}
Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
Gs["G2_rs_s"] = [0] * 4
Gs["G4_etas"] = [0.005]
Gs["G4_zetas"] = [1.0]
Gs["G4_gammas"] = [+1.0, -1]
Gs["cutoff"] = 6.5
# Gs define
DFT_energies_test = [image.get_potential_energy() for image in test_images]
torch.set_num_threads(1)
# loads best validation loss at the end of training
class train_end_load_best_valid_loss(skorch.callbacks.base.Callback):
    def on_train_end(self, net, X, y):
        net.load_params('valid_best_params.pt')
cp = Checkpoint(monitor='valid_loss_best', fn_prefix='valid_best_')
load_best_valid_loss = train_end_load_best_valid_loss()
# hyperparameters and Gs 

In [6]:
training_data = AtomsDataset_per_image(train_images, SNN_Gaussian, Gs, forcetraining=False,
        label="zeolite", cores=2, delta_data=None, specific_atoms=True)
# database 

Calculating fingerprints...
Fingerprints Calculated!


In [15]:
assignments = {
    'epochs': 3000,
    'learning_rate': 0.005,
    'hidden_layers': 4,
    'num_nodes': 70,
    'optimizer' : optim.AdamW,
    'batchsize' : 40,
    'T_max' : 20,
    'weight_decay': 0.0,
    'LRScheduler': skorch.callbacks.WarmRestartLR
}

In [16]:
net = NeuralNetRegressor(
    module=BPNN(
        training_data.elements,
        [training_data.fp_length, assignments["hidden_layers"], assignments["num_nodes"]],
        "cpu",
        forcetraining=False,
    ),
    criterion=CustomMSELoss_per_image,
    criterion__force_coefficient=0,
    optimizer=assignments["optimizer"],
    lr=assignments["learning_rate"],
    batch_size=assignments["batchsize"],
    max_epochs=assignments["epochs"],
    iterator_train__collate_fn=collate_amp,
    iterator_train__shuffle=True,
    iterator_valid__collate_fn=collate_amp,
    iterator_valid__shuffle=False,
    optimizer__weight_decay=assignments["weight_decay"],
    device="cpu",
    train_split=CVSplit(5),
    callbacks=[
        EpochScoring(
            energy_score,
            name='energy_score_valid',
            on_train=False,
            use_caching=True,
            target_extractor=target_extractor,
        ),
        EpochScoring(
            energy_score,
            name='energy_score_train',
            on_train=True,
            use_caching=True,
            target_extractor=target_extractor,
        ),
        ('lr_scheduler',
          LRScheduler(policy=assignments['LRScheduler'], max_lr=0.005)
        ),
        # skorch.callbacks.LRScheduler(skorch.callbacks.WarmRestartLR),
        cp,
        load_best_valid_loss,
        # LR_schedule
    ],
)

In [None]:
# train
calc = AMP(training_data, net, 'zeolite', specific_atoms=True)
calc.train(overwrite=True)
energy_rmse_train = net.history[:, ('train_loss')]
energy_rmse_valid = net.history[:, ('valid_loss')]
for image in test_images:
  image.set_calculator(calc)
pred_energies_test = [image.get_potential_energy() for image in test_images]
energy_rmse_test = np.sqrt(mean_squared_error(pred_energies_test, DFT_energies_test))

  epoch    energy_score_train    energy_score_valid    train_loss    valid_loss    cp     dur
-------  --------------------  --------------------  ------------  ------------  ----  ------
      1                [36m6.6033[0m                [32m2.6512[0m       [35m42.4316[0m        [31m6.7037[0m     +  0.5539




      2                [36m2.6155[0m                [32m2.1344[0m        [35m6.7013[0m        [31m4.4581[0m     +  0.5031
      3                [36m2.3216[0m                2.1732        [35m5.3576[0m        4.6271        0.4903
      4                [36m2.2877[0m                2.4651        [35m5.3002[0m        5.8628        0.5082
      5                2.3577                [32m2.0610[0m        5.3774        [31m4.1668[0m     +  0.5036
      6                [36m2.1343[0m                2.0681        [35m4.6199[0m        4.2394        0.5039
      7                [36m2.1210[0m                2.0747        [35m4.4764[0m        4.3058        0.5037
      8                [36m2.0824[0m                2.0786        [35m4.2309[0m        4.2429        0.5146
      9                2.0889                2.0931        4.2775        4.3787        1.9057
     10                [36m2.0754[0m                2.0856        4.2737        4.2840        1.5760
 

In [None]:
# plot
import seaborn as sns
%matplotlib inline
experiment = 'adamW_minibatch_cos'
epoch = assignments['epochs']
epochs = [i for i in range(1,epoch+1)]
df = pd.DataFrame(
    {
        'RMSE': energy_rmse_train + energy_rmse_valid,
        'Experiment':[experiment] * (epoch * 2),
        'Category': ['Train'] * epoch + ['valid']  * epoch,
        'epochs': epochs * 2,
    })
g = sns.relplot(x="epochs", y="RMSE", hue="Category", kind="line", data=df)
stdv = np.std([image.get_potential_energy() for image in images])
g.ax.text(max(df['epochs']), stdv, 'std')
g.ax.plot([0, max(df['epochs'])], [stdv, stdv], ls=':',linewidth=1)
g.ax.set_ylim(0,5)
label = 'test_RMSE'
value = energy_rmse_test
g.ax.text(max(df['epochs']), value, label)
g.ax.plot([0, max(df['epochs'])], [value, value], ls=':',linewidth=1)