In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# model parameters
reuse = False
block = MEGNet_block
head = feedforwardHead_Update
head_mol,head_atom,head_edge = head_mol,head_atom,head_edge
data = '../Data/{}_data_ACSF_expand_PCA_otherInfo.pickle'
batch_size = 32
dim = 128
epochs = 60
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

#### train base

In [3]:
train_dl,val_dl = get_data(data,batch_size)

model = GNN_multiHead(reuse,block,head,head_mol,head_atom,head_edge,\
                      dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')

paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5)

model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,scheduler=scheduler)

epoch:0, train_loss: +1.456, val_loss: +0.102, 
train_vector: +4.32|+1.41|+0.09|-0.37|-0.54|+0.16|-0.54|-0.93, 
val_vector  : +4.00|+0.26|-0.32|-0.62|-0.93|-0.16|-0.95|-1.21

epoch:1, train_loss: +0.561, val_loss: -0.568, 
train_vector: +1.89|+0.32|-0.37|-1.00|-1.11|-0.23|-1.01|-1.35, 
val_vector  : +0.51|+0.18|-0.52|-1.39|-1.18|-0.35|-1.08|-1.42

epoch:2, train_loss: +0.228, val_loss: -0.693, 
train_vector: +0.84|+0.06|-0.52|-1.24|-1.31|-0.37|-1.19|-1.52, 
val_vector  : +0.72|-0.05|-0.68|-1.51|-1.46|-0.45|-1.19|-1.62

epoch:3, train_loss: +0.101, val_loss: -0.716, 
train_vector: +0.78|-0.07|-0.62|-1.36|-1.43|-0.45|-1.28|-1.64, 
val_vector  : +0.91|-0.41|-0.73|-1.08|-1.52|-0.54|-1.32|-1.71

epoch:4, train_loss: +0.002, val_loss: -0.859, 
train_vector: +0.74|-0.20|-0.69|-1.46|-1.52|-0.52|-1.35|-1.72, 
val_vector  : +0.75|-0.33|-0.83|-1.72|-1.56|-0.60|-1.53|-1.73

epoch:5, train_loss: -0.074, val_loss: -0.959, 
train_vector: +0.67|-0.26|-0.75|-1.52|-1.59|-0.57|-1.41|-1.78, 
val_vector  :

epoch:47, train_loss: -1.261, val_loss: -1.669, 
train_vector: -0.46|-1.36|-1.45|-2.43|-2.60|-1.19|-2.21|-2.76, 
val_vector  : -0.58|-1.20|-1.47|-2.24|-2.42|-1.21|-2.24|-2.56

epoch:48, train_loss: -1.272, val_loss: -1.635, 
train_vector: -0.47|-1.38|-1.46|-2.45|-2.60|-1.20|-2.21|-2.77, 
val_vector  : -0.05|-1.17|-1.47|-2.53|-2.43|-1.21|-2.24|-2.55

epoch:49, train_loss: -1.275, val_loss: -1.734, 
train_vector: -0.46|-1.37|-1.46|-2.45|-2.60|-1.20|-2.21|-2.77, 
val_vector  : -0.61|-1.43|-1.48|-2.51|-2.44|-1.20|-2.20|-2.56

epoch:50, train_loss: -1.283, val_loss: -1.675, 
train_vector: -0.46|-1.38|-1.47|-2.46|-2.61|-1.20|-2.22|-2.78, 
val_vector  : -0.43|-1.11|-1.47|-2.52|-2.43|-1.22|-2.23|-2.56

epoch:51, train_loss: -1.293, val_loss: -1.656, 
train_vector: -0.47|-1.40|-1.47|-2.45|-2.62|-1.20|-2.23|-2.79, 
val_vector  : -0.50|-1.20|-1.47|-2.25|-2.36|-1.22|-2.23|-2.57

epoch:52, train_loss: -1.301, val_loss: -1.634, 
train_vector: -0.48|-1.39|-1.47|-2.47|-2.63|-1.21|-2.23|-2.80, 
val_vec

In [4]:
save_results(train_loss_list,val_loss_list,reuse,block,\
             head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)
save_model_type(bestWeight,opt,reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)

### make submissions

In [5]:
def make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='base'):
    # set up
    model = GNN_multiHead(reuse,block,head,head_mol,head_atom,head_edge\
                          ,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
    submission = pd.read_csv('../Data/sample_submission.csv')
    
    for i in range(8):
        # load test data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**d) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)
        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)
    
    
        # load model
        checkpoint = torch.load('../Model/{}.tar'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                            for i in [reuse,block,head,data,batch_size,dim,clip,\
                                                  layer1,layer2,factor,epochs,'type_'+str(i)+postStr]])))
        model.load_state_dict(checkpoint['model_state_dict'])
    
    
        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        
        
        # join
        submit_ = dict(zip(test_id,yhat))
        submission['type_'+str(i)] = submission.id.map(submit_)
    
    # save types results    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'all_types'+postStr]])),\
                      index=False)
    
    # save final results for submission
    submission['scalar_coupling_constant'] = submission.iloc[:,2:].mean(1)
    submission = submission[['id','scalar_coupling_constant']]
    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'final'+postStr]])),\
                      index=False)

In [6]:
make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)