In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
#from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# fixed parameters
block = MEGNet_block
head_mol,head_atom,head_edge = head_mol,head_atom,head_edge
clip = 0.5
batch_size = 64
threshold = 1e3
reuse = False
lr = 1e-4

In [3]:
# changing parameters
head = SimplyInteraction
data = '../Data/{}_data_stacking_0815.pickle'
dim = 22
logLoss = False
layer = 4
factor = 3
epochs = 19
aggr = 'max'
sample_rate = 0.2

In [4]:
with open(data.format('train'), 'rb') as handle:
    train_data = pickle.load(handle)
with open(data.format('val'), 'rb') as handle:
    val_data = pickle.load(handle)

In [5]:
all_data = train_data + val_data

In [6]:
def sample_feature(dict_,sample_index):
    dict_copy = copy.deepcopy(dict_)
    dict_copy['edge_attr4'] = dict_copy['edge_attr4'][:,sample_index]
    dict_copy['x'] = dict_copy['x'][:,sample_index]
    return dict_copy

In [7]:
test_df = pd.read_csv('../Data/test.csv')

In [8]:
class GNN_multiHead_interleave_stacking(torch.nn.Module):
    def __init__(self,reuse,block,head,dim,layer,factor,\
                 edge_in3=8,aggr='mean'):
        # block,head are nn.Module
        # node_in,edge_in are dim for bonding and edge_in4,edge_in3 for coupling
        super(GNN_multiHead_interleave_stacking, self).__init__()
       
        if reuse:
            self.conv = block(dim=dim,aggr=aggr)
        else:
            self.conv = nn.ModuleList([block(dim=dim,aggr=aggr) for _ in range(layer)])        
        self.head = head(dim)
        
        
    def forward(self, data,IsTrain=False,typeTrain=False,logLoss=True,weight=None):
        out = data.x
        # edge_*3 only does not repeat for undirected graph. Hence need to add (j,i) to (i,j) in edges
        edge_index3 = torch.cat([data.edge_index3,data.edge_index3[[1,0]]],1)
        n = data.edge_attr3.shape[0]
        edge_attr3 = torch.cat([data.edge_attr4,data.edge_attr4],0)
          
        for conv in self.conv:
            out,edge_attr3 = conv(out,edge_index3,edge_attr3)

        
        edge_attr3 = edge_attr3[:n]

        if typeTrain:
            if IsTrain:
                y = data.y[data.type_attr]
            edge_attr3 = edge_attr3[data.type_attr]
            edge_index3 = data.edge_index3[:,data.type_attr]
            edge_attr3_old = data.edge_attr3[data.type_attr]
        else:
            if IsTrain:
                y = data.y
            edge_index3 = data.edge_index3
            edge_attr3_old = data.edge_attr3
            
        yhat = self.head(out,edge_index3,edge_attr3,edge_attr3_old)
        
        if IsTrain:
            k = torch.sum(edge_attr3_old,0)
            nonzeroIndex = torch.nonzero(k).squeeze(1)
            abs_ = torch.abs(y-yhat).unsqueeze(1)
            loss_perType = torch.zeros(8,device='cuda:0')
            if logLoss:
                loss_perType[nonzeroIndex] = torch.log(torch.sum(abs_ * edge_attr3_old[:,nonzeroIndex],0)/k[nonzeroIndex])
                loss = torch.sum(loss_perType)/nonzeroIndex.shape[0]
                return loss,loss_perType         
            else:
                loss_perType[nonzeroIndex] = torch.sum(abs_ * edge_attr3_old[:,nonzeroIndex],0)/k[nonzeroIndex]
                loss = torch.sum(loss_perType)/nonzeroIndex.shape[0]
                loss_perType[nonzeroIndex] = torch.log(loss_perType[nonzeroIndex])
                return loss,loss_perType
        else:
            return yhat

In [12]:
for bag_i in range(10):
    print('-----start bag: {}-----'.format(bag_i))
    # sample rows
    np.random.shuffle(all_data)
    
    # sample columns
    sample_index = torch.rand(dim)>sample_rate
    d_bag_i = sample_index.sum().item()
    
    train_list = [Data(**sample_feature(d,sample_index)) for d in all_data[:70000]]
    train_dl = DataLoader(train_list,batch_size,shuffle=True)
    val_list = [Data(**sample_feature(d,sample_index)) for d in all_data[70000:]]
    val_dl = DataLoader(val_list,batch_size,shuffle=False)   

    model = GNN_multiHead_interleave_stacking(reuse,block,head,d_bag_i,layer,factor).to('cuda:0')
    paras = trainable_parameter(model)
    opt = RAdam(paras,lr=lr)
    scheduler = ReduceLROnPlateau(opt, 'min',factor=0.5,patience=5,min_lr=1e-05)
    
    model,train_loss_perType,val_loss_perType,bestWeight = train_type_earlyStop(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                                    scheduler=scheduler,logLoss=logLoss,threshold=threshold)
    # predict test
    for type_i in range(8):
        # load val data and type_id
        with open(data.format('test').split('pickle')[0][:-1]+'_type_'+str(type_i)+'.pickle', 'rb') as handle:
            test_data = pickle.load(handle)
        test_list = [Data(**sample_feature(d,sample_index)) for d in test_data]
        test_dl = DataLoader(test_list,batch_size,shuffle=False)

        with open(data.format('test').split('pickle')[0][:-1]+'_id_type_'+str(type_i)+'.pickle', 'rb') as handle:
            test_id = pickle.load(handle)

        # load model
        model.load_state_dict(bestWeight[type_i])

        # predict
        model.eval()
        yhat_list = []
        with torch.no_grad():
            for data_torch in test_dl:
                data_torch = data_torch.to('cuda:0')
                yhat_list.append(model(data_torch,False,typeTrain=True))
        yhat = torch.cat(yhat_list).cpu().detach().numpy()        

        # join
        assert yhat.shape[0]==test_id.shape[0],'yhat and test_id should have same shape'
        submit_ = dict(zip(test_id,yhat))
        test_df['bag_'+str(bag_i)+'type_'+str(type_i)] = test_df.id.map(submit_)
        #test_df['fold'+str(i)+'_type'+str(type_i)] = test_df.id.map(submit_)

-----start bag: 0-----
epoch:0, train_loss: +5.435, val_loss: -2.408, 
train_vector: +1.03|+0.69|-1.20|-1.36|-1.77|-0.87|-0.92|-2.25, 
val_vector  : -1.54|-1.76|-2.38|-2.65|-2.90|-2.17|-2.83|-3.03

epoch:1, train_loss: +0.105, val_loss: -2.465, 
train_vector: -1.53|-1.67|-2.44|-2.68|-2.97|-2.24|-2.95|-3.10, 
val_vector  : -1.54|-1.94|-2.48|-2.34|-2.98|-2.30|-3.04|-3.09

epoch:2, train_loss: +0.096, val_loss: -2.572, 
train_vector: -1.52|-1.83|-2.51|-2.91|-3.01|-2.33|-3.07|-3.17, 
val_vector  : -1.46|-1.88|-2.51|-3.07|-3.00|-2.35|-3.12|-3.18

epoch:3, train_loss: +0.093, val_loss: -2.621, 
train_vector: -1.54|-1.88|-2.53|-3.04|-3.03|-2.36|-3.11|-3.19, 
val_vector  : -1.55|-1.92|-2.55|-3.20|-3.03|-2.37|-3.15|-3.20

epoch:4, train_loss: +0.091, val_loss: -2.634, 
train_vector: -1.55|-1.90|-2.55|-3.10|-3.03|-2.37|-3.13|-3.21, 
val_vector  : -1.56|-1.95|-2.54|-3.28|-3.03|-2.37|-3.14|-3.20

epoch:5, train_loss: +0.089, val_loss: -2.603, 
train_vector: -1.57|-1.94|-2.56|-3.12|-3.05|-2.39|-3.1

epoch:8, train_loss: +0.091, val_loss: -2.585, 
train_vector: -1.49|-1.90|-2.57|-3.31|-3.05|-2.40|-3.15|-3.23, 
val_vector  : -1.19|-1.94|-2.57|-3.36|-3.01|-2.37|-3.10|-3.14

epoch:9, train_loss: +0.090, val_loss: -2.649, 
train_vector: -1.50|-1.92|-2.57|-3.32|-3.05|-2.40|-3.15|-3.23, 
val_vector  : -1.37|-2.08|-2.58|-3.36|-3.01|-2.41|-3.13|-3.24

epoch:10, train_loss: +0.090, val_loss: -2.589, 
train_vector: -1.48|-1.94|-2.57|-3.33|-3.05|-2.40|-3.16|-3.24, 
val_vector  : -1.47|-1.87|-2.57|-3.22|-2.96|-2.41|-3.08|-3.14

epoch:11, train_loss: +0.090, val_loss: -2.656, 
train_vector: -1.49|-1.90|-2.58|-3.33|-3.05|-2.40|-3.16|-3.23, 
val_vector  : -1.60|-2.07|-2.58|-3.17|-3.02|-2.40|-3.18|-3.24

epoch:12, train_loss: +0.090, val_loss: -2.663, 
train_vector: -1.50|-1.92|-2.58|-3.33|-3.06|-2.41|-3.17|-3.24, 
val_vector  : -1.54|-2.07|-2.58|-3.33|-3.05|-2.41|-3.09|-3.24

epoch:13, train_loss: +0.089, val_loss: -2.662, 
train_vector: -1.52|-1.93|-2.58|-3.33|-3.06|-2.41|-3.17|-3.24, 
val_vecto

epoch:16, train_loss: +0.085, val_loss: -2.685, 
train_vector: -1.56|-2.04|-2.58|-3.37|-3.07|-2.41|-3.19|-3.25, 
val_vector  : -1.59|-2.02|-2.60|-3.28|-3.08|-2.43|-3.22|-3.26

epoch:17, train_loss: +0.085, val_loss: -2.708, 
train_vector: -1.57|-2.04|-2.58|-3.37|-3.07|-2.41|-3.19|-3.25, 
val_vector  : -1.52|-2.12|-2.60|-3.43|-3.10|-2.43|-3.23|-3.24

epoch:18, train_loss: +0.083, val_loss: -2.720, 
train_vector: -1.59|-2.08|-2.59|-3.39|-3.08|-2.41|-3.20|-3.26, 
val_vector  : -1.59|-2.11|-2.60|-3.43|-3.10|-2.43|-3.23|-3.27

Training completed in 673.7353270053864s
-----start bag: 5-----
epoch:0, train_loss: +7.878, val_loss: -2.414, 
train_vector: +1.61|+2.07|-0.80|-1.42|-1.68|-0.44|-1.34|-1.72, 
val_vector  : -1.43|-1.71|-2.27|-3.04|-2.87|-2.18|-2.87|-2.95

epoch:1, train_loss: +0.117, val_loss: -2.535, 
train_vector: -1.45|-1.39|-2.41|-2.93|-2.94|-2.24|-2.94|-3.05, 
val_vector  : -1.37|-1.78|-2.48|-3.25|-2.96|-2.29|-3.02|-3.14

epoch:2, train_loss: +0.101, val_loss: -2.568, 
train_vect

epoch:5, train_loss: +0.091, val_loss: -2.616, 
train_vector: -1.48|-1.98|-2.54|-3.32|-3.04|-2.32|-3.10|-3.22, 
val_vector  : -1.58|-2.08|-2.53|-3.18|-2.97|-2.34|-3.10|-3.15

epoch:6, train_loss: +0.092, val_loss: -2.602, 
train_vector: -1.44|-1.95|-2.55|-3.33|-3.04|-2.35|-3.13|-3.23, 
val_vector  : -1.33|-1.81|-2.54|-3.36|-3.05|-2.34|-3.15|-3.22

epoch:7, train_loss: +0.091, val_loss: -2.603, 
train_vector: -1.45|-1.97|-2.56|-3.34|-3.05|-2.37|-3.15|-3.23, 
val_vector  : -1.46|-1.91|-2.53|-3.25|-3.03|-2.36|-3.11|-3.16

epoch:8, train_loss: +0.090, val_loss: -2.651, 
train_vector: -1.46|-1.98|-2.56|-3.34|-3.06|-2.37|-3.16|-3.23, 
val_vector  : -1.59|-1.96|-2.56|-3.37|-2.97|-2.37|-3.18|-3.20

epoch:9, train_loss: +0.088, val_loss: -2.641, 
train_vector: -1.51|-1.99|-2.56|-3.35|-3.06|-2.38|-3.17|-3.24, 
val_vector  : -1.43|-1.90|-2.56|-3.38|-3.07|-2.37|-3.17|-3.23

epoch:10, train_loss: +0.088, val_loss: -2.673, 
train_vector: -1.51|-2.01|-2.57|-3.35|-3.07|-2.38|-3.18|-3.24, 
val_vector  

epoch:13, train_loss: +0.091, val_loss: -2.602, 
train_vector: -1.50|-1.87|-2.56|-3.35|-3.06|-2.37|-3.15|-3.23, 
val_vector  : -1.51|-1.87|-2.56|-3.09|-3.08|-2.39|-3.11|-3.21

epoch:14, train_loss: +0.091, val_loss: -2.542, 
train_vector: -1.49|-1.91|-2.56|-3.35|-3.06|-2.37|-3.15|-3.23, 
val_vector  : -1.57|-1.85|-2.53|-3.19|-2.91|-2.33|-2.95|-2.99

epoch:15, train_loss: +0.091, val_loss: -2.610, 
train_vector: -1.50|-1.89|-2.57|-3.36|-3.06|-2.37|-3.16|-3.23, 
val_vector  : -1.48|-2.09|-2.57|-2.86|-3.10|-2.39|-3.16|-3.23

epoch:16, train_loss: +0.090, val_loss: -2.596, 
train_vector: -1.52|-1.90|-2.57|-3.37|-3.07|-2.37|-3.16|-3.23, 
val_vector  : -1.57|-2.01|-2.56|-3.33|-2.88|-2.38|-3.04|-3.01

epoch:17, train_loss: +0.087, val_loss: -2.614, 
train_vector: -1.54|-2.00|-2.57|-3.39|-3.07|-2.37|-3.17|-3.24, 
val_vector  : -1.53|-1.96|-2.56|-3.07|-3.09|-2.39|-3.10|-3.22

epoch:18, train_loss: +0.086, val_loss: -2.652, 
train_vector: -1.55|-2.02|-2.57|-3.39|-3.08|-2.38|-3.17|-3.24, 
val_vec

In [None]:
# with open('../Model/GNN_stacking_0815.pickle', 'wb') as handle:
#     pickle.dump(bestWeight, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('../Model/GNN_stacking_0815.pickle', 'rb') as handle:
#     bestWeight = pickle.load(handle)

# model = GNN_multiHead_interleave_stacking(reuse,block,head,dim,layer,factor,edge_in4).to('cuda:0')    

In [13]:
test_df['scalar_coupling_constant'] = np.nanmean(test_df.iloc[:,5:],1)
#test = test[['id','yhat']]
test_df[['id','scalar_coupling_constant']].to_csv('../Submission/GNN_stacking_0815_bags',index=False)