In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# model parameters
reuse = False
block = MEGNet_block
head = feedforwardHead_Update
data = '../Data/{}_data_ACSF_expand_PCA.pickle'
batch_size = 32
dim = 128
epochs = 50
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

#### train base

In [3]:
train_dl,val_dl = get_data(data,batch_size)

model = GNN_multiHead_interleave(reuse,block,head,head_mol,head_atom,head_edge,\
                                 dim,layer1,layer2,factor,**data_dict[data],interleave=True).to('cuda:0')

paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5)

model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,scheduler=scheduler)

epoch:0, train_loss: +0.353, val_loss: -0.137, 
train_vector: +4.19|+0.96|+0.12|-0.38|-0.58|+0.11|-0.62|-0.96, 
val_vector  : +3.88|+0.13|-0.32|-1.13|-1.07|-0.21|-1.05|-1.32

epoch:1, train_loss: -0.340, val_loss: -0.694, 
train_vector: +2.08|+0.30|-0.33|-0.97|-1.10|-0.26|-1.06|-1.38, 
val_vector  : +0.52|-0.04|-0.53|-1.15|-1.33|-0.39|-1.09|-1.54

epoch:2, train_loss: -0.655, val_loss: -0.823, 
train_vector: +0.87|+0.06|-0.50|-1.20|-1.30|-0.39|-1.21|-1.56, 
val_vector  : +0.38|-0.08|-0.59|-1.53|-1.30|-0.49|-1.30|-1.68

epoch:3, train_loss: -0.777, val_loss: -0.884, 
train_vector: +0.78|-0.09|-0.62|-1.40|-1.43|-0.48|-1.31|-1.68, 
val_vector  : +0.84|-0.41|-0.73|-1.63|-1.53|-0.57|-1.44|-1.60

epoch:4, train_loss: -0.849, val_loss: -0.876, 
train_vector: +0.74|-0.13|-0.70|-1.47|-1.53|-0.55|-1.40|-1.76, 
val_vector  : +0.79|-0.08|-0.80|-1.45|-1.61|-0.62|-1.41|-1.82

epoch:5, train_loss: -0.908, val_loss: -0.987, 
train_vector: +0.70|-0.21|-0.77|-1.53|-1.59|-0.59|-1.45|-1.82, 
val_vector  :

epoch:47, train_loss: -1.856, val_loss: -1.769, 
train_vector: -0.49|-1.38|-1.52|-2.46|-2.62|-1.25|-2.29|-2.83, 
val_vector  : -0.37|-1.26|-1.52|-2.51|-2.43|-1.26|-2.22|-2.58

epoch:48, train_loss: -1.859, val_loss: -1.792, 
train_vector: -0.49|-1.37|-1.52|-2.47|-2.63|-1.26|-2.29|-2.84, 
val_vector  : -0.59|-1.34|-1.52|-2.33|-2.41|-1.26|-2.29|-2.60

epoch:49, train_loss: -1.865, val_loss: -1.825, 
train_vector: -0.50|-1.38|-1.53|-2.48|-2.63|-1.26|-2.30|-2.84, 
val_vector  : -0.49|-1.41|-1.53|-2.54|-2.48|-1.28|-2.29|-2.59

Training completed in 4058.0178639888763s


In [4]:
save_results(train_loss_list,val_loss_list,reuse,block,\
             head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='_interleave')
save_model_type(bestWeight,opt,reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='_interleave')

### make submissions

In [13]:
def make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='base'):
    # set up
    model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
    submission = pd.read_csv('../Data/sample_submission.csv')
    
    for i in range(8):
        # load test data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**d) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)
        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)
    
    
        # load model
        checkpoint = torch.load('../Model/{}.tar'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                            for i in [reuse,block,head,data,batch_size,dim,clip,\
                                                  layer1,layer2,factor,epochs,'type_'+str(i)+postStr]])))
        model.load_state_dict(checkpoint['model_state_dict'])
    
    
        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        
        
        # join
        submit_ = dict(zip(test_id,yhat))
        submission['type_'+str(i)] = submission.id.map(submit_)
    
    # save types results    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'all_types'+postStr]])),\
                      index=False)
    
    # save final results for submission
    submission['scalar_coupling_constant'] = submission.iloc[:,2:].mean(1)
    submission = submission[['id','scalar_coupling_constant']]
    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'final'+postStr]])),\
                      index=False)

In [17]:
make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)