In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [6]:
# model parameters
head = feedforwardHead_Update
data = '../Data/{}_data_ACSF_expand_PCA_otherInfo.pickle'
batch_size = 32
dim = 128
epochs = 50
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

In [7]:
train_dl,val_dl = get_data(data,batch_size)

In [12]:
model = GNN_MataLayer(head,head_mol,head_atom,head_edge,\
                        dim,layer1,layer2,factor,**data_dict[data],interleave = True).to('cuda:0')

In [None]:
paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5)

model,train_loss_list,val_loss_list,bestWeight = train_type_earlyStop(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                                      scheduler=scheduler,weight=0.06)

In [4]:
save_results(train_loss_list,val_loss_list,reuse,block,\
             head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='_interleave')
save_model_type(bestWeight,opt,reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='_interleave')

### make submissions

In [13]:
def make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs,postStr='base'):
    # set up
    model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
    submission = pd.read_csv('../Data/sample_submission.csv')
    
    for i in range(8):
        # load test data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**d) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)
        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)
    
    
        # load model
        checkpoint = torch.load('../Model/{}.tar'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                            for i in [reuse,block,head,data,batch_size,dim,clip,\
                                                  layer1,layer2,factor,epochs,'type_'+str(i)+postStr]])))
        model.load_state_dict(checkpoint['model_state_dict'])
    
    
        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        
        
        # join
        submit_ = dict(zip(test_id,yhat))
        submission['type_'+str(i)] = submission.id.map(submit_)
    
    # save types results    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'all_types'+postStr]])),\
                      index=False)
    
    # save final results for submission
    submission['scalar_coupling_constant'] = submission.iloc[:,2:].mean(1)
    submission = submission[['id','scalar_coupling_constant']]
    
    submission.to_csv('../Submission/{}.csv'.format('_'.join([str(i).split('}')[1] if '}' in str(i) else str(i) \
                                        for i in [reuse,block,head,data,batch_size,dim,clip,\
                                              layer1,layer2,factor,epochs,'final'+postStr]])),\
                      index=False)

In [17]:
make_submission(reuse,block,head,data,batch_size,dim,clip,layer1,layer2,factor,epochs)