In [1]:
# skorch/net.py --> evaluation_step --> set training=True
# skorch/net.py --> validation --> comment out "with no_grad()"

# skorch/dataset.py --> get_len(data) -- > comment if(len_data)!=1 out
# skorch/callbacks/scoring.py --> comment "is_multimetrics" out

In [2]:
import os
import sys

import numpy as np
import mongo
import pickle
import tqdm

import torch
from dogss.data import collate_pool, MergeDataset
from dogss.dogss import DOGSS

import skorch
from skorch.dataset import CVSplit
from sklearn.model_selection import ShuffleSplit, train_test_split
from sklearn.metrics import get_scorer
from skorch import NeuralNetRegressor
from skorch.callbacks import Checkpoint #needs skorch 0.4.0, conda-forge version at 0.3.0 doesn't cut it
from skorch.callbacks.lr_scheduler import LRScheduler

from utils.adamwr.adamw import AdamW



In [3]:
SDT_list_path = 'path/to/dataset'
docs_path = 'path/to/dataset'

In [4]:
SDT_list = pickle.load(open(SDT_list_path , 'rb'))
docs = pickle.load(open(docs_path, 'rb'))
target_list = np.array([sdt[-1][sdt[-2]].numpy() for sdt in SDT_list]).reshape(-1,1) #get final_pos of free atoms ONLY


  This is separate from the ipykernel package so we can avoid doing imports until


In [5]:
structures = SDT_list[0]
orig_node_fea_size = structures[0].shape[-1]
edge_fea_size = structures[1].shape[-1]

In [6]:
cuda = torch.cuda.is_available()
if cuda:
    device = torch.device("cuda")
else:
    device='cpu'

#Make a checkpoint to save parameters every time there is a new best for validation lost
cp = Checkpoint(monitor='valid_loss_best',fn_prefix='valid_best_')

#Callback to load the checkpoint with the best validation loss at the end of training

class train_end_load_best_valid_loss(skorch.callbacks.base.Callback):
    def on_train_end(self, net, X, y):
        net.load_params('valid_best_params.pt')
        
load_best_valid_loss = train_end_load_best_valid_loss()
print('device:', device)

device: cuda


In [7]:
import torch
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, SGD
from torch.optim.lbfgs import LBFGS

from torch.optim import lr_scheduler

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ShuffleSplit, train_test_split
from sklearn.metrics import get_scorer
from skorch import NeuralNetRegressor
from skorch.callbacks import Checkpoint, LoadInitState #needs skorch 0.4.0, conda-forge version at 0.3.0 doesn't cut it
import skorch.callbacks.base
from skorch.dataset import CVSplit
from skorch.callbacks.lr_scheduler import WarmRestartLR, LRScheduler

In [8]:
SDT_training, SDT_test, target_training, target_test, docs_training, docs_test \
= train_test_split(SDT_list, target_list, docs, test_size=0.1, random_state=42)

In [9]:
train_test_splitter = ShuffleSplit(n_splits=1, test_size=0.1, random_state=42)

batchsize = 18
LR_schedule = LRScheduler("MultiStepLR", milestones=[100], gamma=0.1)

class NewDOGSS(NeuralNetRegressor):
    def get_loss(self, y_pred, y_true, **kwargs):
        y_pred = y_pred[0] if isinstance(y_pred, tuple) else y_pred  # discard the 2nd output
        differ=torch.sum((y_pred-y_true.cuda())**2.0,dim=1)
        if torch.nonzero(differ).shape[0] != differ.shape[0]:
            print('zero sqrt for Loss')
        differ = torch.clamp(differ, min=1e-8)
        return torch.mean(torch.sqrt(differ))


net = NewDOGSS(
    DOGSS,
    batch_size=batchsize, #214
    module__orig_node_fea_size = orig_node_fea_size,
    module__edge_fea_size = edge_fea_size,
    lr=0.0037704604911552916,
    max_epochs= 200,
    module__energy_mode="Harmonic", #["Harmonic", "Morse", "LJ"], Default = "Harmonic"
    module__node_fea_size=103, #46,
    module__h_fea_len=169,
    module__h_fea_len_dist=18,
    module__h_fea_len_const=18,
    module__h_fea_len_D=18,
    module__n_conv=12, #8
    module__n_h_dist=16,
    module__n_h_const=0,
    module__n_h_D= 12,
    module__min_opt_steps=30,
    module__max_opt_steps=150,
    module__momentum=0.8,
    optimizer=AdamW,
    optimizer__weight_decay=0.000045399929762484854,
    iterator_train__pin_memory=True,
    iterator_train__num_workers=0,
    iterator_train__shuffle=True,
    iterator_train__collate_fn = collate_pool,
    iterator_valid__pin_memory=True,
    iterator_valid__num_workers=0,
    iterator_valid__collate_fn = collate_pool,
    device=device,
    criterion=torch.nn.L1Loss,
    dataset=MergeDataset,
    train_split = CVSplit(cv=train_test_splitter),
    callbacks=[cp,LR_schedule , load_best_valid_loss],
)

In [1]:
# # To train DOGSS,
# net.initialize()
# net.fit(SDT_training, target_training)

In [None]:
## Loading pre-trained DOGSS
net.initialize()
net.load_params(f_history = './valid_best_history.json',
               f_optimizer = './valid_best_optimizer.pt',
               f_params = './valid_best_params.pt')