In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [5]:
# model parameters
reuse = False
block = MEGNet_block
head = feedforwardHead_Update
data = '../Data/{}_data_ACSF_expand_PCA.pickle'
batch_size = 32
dim = 128
epochs = 50
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

#### train base

In [6]:
train_dl,val_dl = get_data(data,batch_size)

model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')

paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5)

model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                            logLoss=False,scheduler=scheduler)

epoch:0, train_loss: +1.845, val_loss: -0.428, 
train_vector: +1.17|+0.60|+0.17|-0.36|-0.40|+0.22|-0.39|-0.74, 
val_vector  : +0.53|+0.06|-0.24|-0.83|-0.88|-0.12|-0.87|-1.07

epoch:1, train_loss: +0.819, val_loss: -0.479, 
train_vector: +0.71|+0.09|-0.27|-0.90|-0.91|-0.16|-0.87|-1.10, 
val_vector  : +0.56|+0.14|-0.46|-0.47|-1.11|-0.29|-0.97|-1.23

epoch:2, train_loss: +0.687, val_loss: -0.706, 
train_vector: +0.48|-0.05|-0.42|-1.07|-1.09|-0.29|-0.99|-1.26, 
val_vector  : +0.27|-0.26|-0.59|-0.94|-1.27|-0.39|-1.12|-1.35

epoch:3, train_loss: +0.600, val_loss: -0.757, 
train_vector: +0.33|-0.19|-0.54|-1.23|-1.23|-0.40|-1.10|-1.36, 
val_vector  : +0.21|-0.08|-0.67|-0.96|-1.33|-0.49|-1.24|-1.48

epoch:4, train_loss: +0.549, val_loss: -0.791, 
train_vector: +0.23|-0.27|-0.62|-1.32|-1.31|-0.47|-1.18|-1.44, 
val_vector  : +0.37|+0.29|-0.73|-1.53|-1.43|-0.54|-1.25|-1.50

epoch:5, train_loss: +0.512, val_loss: -0.996, 
train_vector: +0.16|-0.34|-0.69|-1.39|-1.38|-0.54|-1.24|-1.50, 
val_vector  :

epoch:47, train_loss: +0.242, val_loss: -1.589, 
train_vector: -0.57|-1.15|-1.43|-2.11|-2.19|-1.24|-1.95|-2.30, 
val_vector  : -0.70|-1.19|-1.46|-1.77|-2.12|-1.25|-2.02|-2.20

epoch:48, train_loss: +0.241, val_loss: -1.648, 
train_vector: -0.57|-1.15|-1.43|-2.12|-2.19|-1.25|-1.95|-2.30, 
val_vector  : -0.76|-1.16|-1.46|-2.13|-2.16|-1.29|-2.00|-2.23

epoch:49, train_loss: +0.239, val_loss: -1.647, 
train_vector: -0.58|-1.17|-1.44|-2.11|-2.20|-1.26|-1.96|-2.31, 
val_vector  : -0.64|-1.25|-1.48|-2.13|-2.13|-1.28|-2.03|-2.25

Training completed in 4211.929825544357s


In [7]:
save_results(train_loss_list,val_loss_list,reuse,block,\
             head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='MAE')
save_model_type(bestWeight,opt,reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='MAE')

### make submissions

In [8]:
def make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='base'):
    # set up
    model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
    submission = pd.read_csv('../Data/sample_submission.csv')
    
    for i in range(8):
        # load test data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**d) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)
        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)
    
    
        # load model
        checkpoint = torch.load('../Model/{}.tar'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                            for i in [reuse,block,head,data,batch_size,dim,clip,\
                                                  layer1,layer2,factor,epochs,'type_'+str(i)+postStr]])))
        model.load_state_dict(checkpoint['model_state_dict'])
    
    
        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        
        
        # join
        submit_ = dict(zip(test_id,yhat))
        submission['type_'+str(i)] = submission.id.map(submit_)
    
    # save types results    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'all_types'+postStr]])),\
                      index=False)
    
    # save final results for submission
    submission['scalar_coupling_constant'] = submission.iloc[:,2:].mean(1)
    submission = submission[['id','scalar_coupling_constant']]
    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'final'+postStr]])),\
                      index=False)

In [9]:
make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='MAE')