In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# model parameters
reuse = False
block = MEGNet_block
head = feedforwardHead_Update
data = '../Data/{}_data_atomInfo.pickle'
batch_size = 32
dim = 128
epochs = 50
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

#### train base

In [3]:
train_dl,val_dl = get_data(data,batch_size)

model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')

paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5)

model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,scheduler=scheduler)

epoch:0, train_loss: +0.328, val_loss: -0.065, 
train_vector: +4.22|+0.99|+0.00|-0.50|-0.59|+0.08|-0.67|-0.91, 
val_vector  : +3.94|+0.45|-0.35|-1.13|-0.93|-0.21|-1.07|-1.21

epoch:1, train_loss: -0.268, val_loss: -0.648, 
train_vector: +2.59|+0.32|-0.37|-1.01|-1.10|-0.25|-1.06|-1.27, 
val_vector  : +0.95|+0.06|-0.48|-1.37|-1.31|-0.37|-1.25|-1.41

epoch:2, train_loss: -0.632, val_loss: -0.735, 
train_vector: +0.88|+0.13|-0.52|-1.25|-1.28|-0.38|-1.21|-1.44, 
val_vector  : +0.73|+0.07|-0.66|-1.17|-1.41|-0.48|-1.41|-1.55

epoch:3, train_loss: -0.747, val_loss: -0.901, 
train_vector: +0.79|-0.04|-0.61|-1.39|-1.40|-0.46|-1.31|-1.56, 
val_vector  : +0.58|-0.35|-0.72|-1.74|-1.48|-0.54|-1.35|-1.60

epoch:4, train_loss: -0.819, val_loss: -0.963, 
train_vector: +0.76|-0.13|-0.68|-1.49|-1.48|-0.52|-1.38|-1.63, 
val_vector  : +0.53|-0.36|-0.81|-1.67|-1.59|-0.60|-1.47|-1.73

epoch:5, train_loss: -0.871, val_loss: -0.977, 
train_vector: +0.72|-0.17|-0.74|-1.54|-1.54|-0.56|-1.43|-1.70, 
val_vector  :

epoch:47, train_loss: -1.769, val_loss: -1.684, 
train_vector: -0.40|-1.25|-1.44|-2.47|-2.51|-1.18|-2.22|-2.68, 
val_vector  : -0.21|-0.92|-1.46|-2.53|-2.37|-1.20|-2.27|-2.50

epoch:48, train_loss: -1.765, val_loss: -1.724, 
train_vector: -0.39|-1.22|-1.44|-2.47|-2.51|-1.18|-2.22|-2.69, 
val_vector  : -0.29|-1.14|-1.46|-2.58|-2.38|-1.20|-2.25|-2.49

epoch:49, train_loss: -1.776, val_loss: -1.767, 
train_vector: -0.40|-1.25|-1.45|-2.48|-2.52|-1.19|-2.23|-2.70, 
val_vector  : -0.43|-1.34|-1.46|-2.54|-2.40|-1.20|-2.26|-2.50

Training completed in 4186.085424900055s


In [4]:
save_results(train_loss_list,val_loss_list,reuse,block,\
             head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)
save_model_type(bestWeight,opt,reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)

### make submissions

In [3]:
def make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='base'):
    # set up
    model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
    submission = pd.read_csv('../Data/sample_submission.csv')
    
    for i in range(8):
        # load test data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**d) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)
        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)
    
    
        # load model
        checkpoint = torch.load('../Model/{}.tar'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                            for i in [reuse,block,head,data,batch_size,dim,clip,\
                                                  layer1,layer2,factor,epochs,'type_'+str(i)+postStr]])))
        model.load_state_dict(checkpoint['model_state_dict'])
    
    
        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        
        
        # join
        submit_ = dict(zip(test_id,yhat))
        submission['type_'+str(i)] = submission.id.map(submit_)
    
    # save types results    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'all_types'+postStr]])),\
                      index=False)
    
    # save final results for submission
    submission['scalar_coupling_constant'] = submission.iloc[:,2:].mean(1)
    submission = submission[['id','scalar_coupling_constant']]
    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'final'+postStr]])),\
                      index=False)

In [4]:
make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)