In [None]:
import torch.nn as nn
import torch.nn.functional as F
from dgl.contrib.deprecation import deprecated
from dgl.nn.pytorch import Set2Set, NNConv

class MPNNModel(nn.Module):
    @deprecated('Import MPNNPredictor from dgllife.model instead.', 'class')
    def __init__(self,
                 node_input_dim=15,
                 edge_input_dim=5,
                 #output_dim=12,
                 node_hidden_dim=64,
                 edge_hidden_dim=128,#64,#
                 num_step_message_passing=6,#4,#
                 num_step_set2set=6,#4,#
                 num_layer_set2set=3):#2):#
        super(MPNNModel, self).__init__()

        self.num_step_message_passing = num_step_message_passing
        self.lin0 = nn.Linear(node_input_dim, node_hidden_dim)
        edge_network = nn.Sequential(
            nn.Linear(edge_input_dim, edge_hidden_dim), nn.ReLU(),
            nn.Linear(edge_hidden_dim, node_hidden_dim * node_hidden_dim))
        self.conv = NNConv(in_feats=node_hidden_dim,
                           out_feats=node_hidden_dim,
                           edge_func=edge_network,
                           aggregator_type='sum')
        self.gru = nn.GRU(node_hidden_dim, node_hidden_dim)

        self.set2set = Set2Set(node_hidden_dim, num_step_set2set, num_layer_set2set)
        self.lin1 = nn.Linear(2 * node_hidden_dim, node_hidden_dim)
        #self.lin2 = nn.Linear(node_hidden_dim, output_dim)
        ###self.lin2_1 = nn.Linear(node_hidden_dim, node_hidden_dim)
        ###self.lin2_2 = nn.Linear(node_hidden_dim, node_hidden_dim)
        
        self.lin2_1 = nn.Linear(node_hidden_dim, 121)
        self.lin2_2 = nn.Linear(node_hidden_dim, 1)

    def forward(self, g, n_feat, e_feat):
        out = F.relu(self.lin0(n_feat))                 # (B1, H1)
        h = out.unsqueeze(0)                            # (1, B1, H1)
        
        for i in range(self.num_step_message_passing):
            m = F.relu(self.conv(g, out, e_feat))       # (B1, H1)
            out, h = self.gru(m.unsqueeze(0), h)
            out = out.squeeze(0)
            
        out = self.set2set(g, out)
        out = F.relu(self.lin1(out))
        #out = self.lin2(out)
        
        ###out1 = F.relu(self.lin2_1(out))
        ###out2 = F.relu(self.lin2_2(out))
        
        out1=self.lin2_1(out)#for classification
        out2=self.lin2_2(out)#for regression
        return [out1,out2]

import numpy as np
import time
import torch
import torch.nn as nn
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import DataLoader
from dgl import model_zoo,DGLGraph
import math
import networkx as nx
import matplotlib.pyplot as plt
from utils_mpnn_multitask import Meter, set_random_seed, collate, EarlyStopping,load_brl_dataset,regress
import argparse
# from sklearn import svm, datasets
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import plot_confusion_matrix
import seaborn as sn
import pandas as pd
from sklearn import metrics
import collections
import sys; sys.argv=['']; del sys
parser = argparse.ArgumentParser(description='Molecule Regression')
parser.add_argument('-m', '--model', type=str,default='MPNN',help='Model to use')#choices=['MPNN', 'SCHNET', 'MGCN', 'AttentiveFP'],
#parser.add_argument('-d', '--dataset', type=str, default='bridge',help='Dataset to use')#choices=['Alchemy', 'Aromaticity'],                
parser.add_argument('-p', '--pre-trained', action='store_true', default=False, help='Whether to skip training and use a pre-trained model')
args = parser.parse_args().__dict__
training_setting= {
    'random_seed': 0,
    'batch_size': 64,#64,#
    'num_epochs': 2000,#900,
    'node_in_feats': 7,
    'edge_in_feats': 6,
    #'output_dim': 120,
    'lr': 0.001,#0.001,#
    'patience': 300,#20,
    'metric_name': 'l1',#'roc_auc',#
    'weight_decay': 0,
    'n_task':41,
}
args.update(training_setting)
args['device'] = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
args['data_path_x']='data/data_mpnn/'
args['data_path_label']='data/label_multitask/'
set_random_seed(args['random_seed'])

print(torch.cuda.current_device())
torch.cuda.set_device(1)
print(torch.cuda.current_device())

train_loader,val_loader,test_loader=load_brl_dataset(args)#for batch_id, batch_data in enumerate(train_loader):bg, labels = batch_data;print(a)
if args['pre_trained']:
    args['num_epochs'] = 0
    model = model_zoo.chem.load_pretrained(args['exp'])
else:
    #model = load_model(args)
    model = MPNNModel(node_input_dim=args['node_in_feats'],
                      edge_input_dim=args['edge_in_feats'])
    if args['model'] in ['SCHNET', 'MGCN']:
        model.set_mean_std(train_set.mean, train_set.std, args['device'])
    #loss_fn =BCEWithLogitsLoss()#nn.L1Loss(reduction='none')
    loss_fn_cablenumber =nn.CrossEntropyLoss()
    loss_fn_area =nn.L1Loss(reduction='none')
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    stopper = EarlyStopping(mode='lower', patience=args['patience'],
                            filename='model_saved/multitask/model2_5000/early_stop.pth')

model.to(args['device'])

    
def run_a_train_epoch_cr(args, epoch, model, data_loader,
                      loss_criterion_cablenumber,loss_criterion_area, optimizer):
    model.train()
    train_meter = Meter()
    correct=0
    total=0
    for batch_id, batch_data in enumerate(data_loader):
        bg, labels_cablenumber,labels_area = batch_data
        labels_cablenumber = labels_cablenumber.to(args['device'])
        labels_area = labels_area.to(args['device'])
        labels=[labels_cablenumber,labels_area]
        prediction = regress(args, model, bg)
        
        loss_cablenumber = (loss_criterion_cablenumber(prediction[0], labels_cablenumber).float()).mean()
        loss_area = (loss_criterion_area(prediction[1],labels_area).float()).mean()
        loss=(loss_cablenumber+loss_area)
       
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_meter.update(prediction[1], labels_area)
        
        _, predicted = torch.max(prediction[0].data, 1)
        total += labels_cablenumber.size(0)
        correct += (predicted == labels_cablenumber).sum().item();del predicted
        
    total_score_acc=100-100 * correct / total
    total_score = np.mean(train_meter.compute_metric(args['metric_name']))
    print('epoch {:d}/{:d}, training {} {:.4f} / accuracy(%) {:.4f}'.format(
        epoch + 1, args['num_epochs'], args['metric_name'], total_score, total_score_acc)) 
    #print('epoch {:d}/{:d}, training total_score_acc {:.4f}'.format(epoch + 1, args['num_epochs'], total_score_acc)) 
    
def run_an_eval_epoch_cr(args, model, data_loader):
    model.eval()
    eval_meter = Meter()
    correct=0
    total=0
    with torch.no_grad():
        for batch_id, batch_data in enumerate(data_loader):
            bg, labels_cablenumber,labels_area = batch_data
            labels_cablenumber = labels_cablenumber.to(args['device'])
            labels_area = labels_area.to(args['device'])
            labels=[labels_cablenumber,labels_area]
            prediction = regress(args, model, bg)
            
            eval_meter.update(prediction[1], labels_area)
            
            _, predicted = torch.max(prediction[0].data, 1)
            total += labels_cablenumber.size(0)
            correct += (predicted == labels_cablenumber).sum().item();del predicted
        total_score_acc=100-100 * correct / total
        total_score = np.mean(eval_meter.compute_metric(args['metric_name']))
    return total_score,total_score_acc

In [None]:
state=torch.load('model_saved/multitask/model2_5000/last_4000.pth') 
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer'])

In [None]:
    
for epoch in range(1000):#250+100
    st=time.time()
    if epoch%500==0 and epoch!=0:
        print('save')
        torch.save({'model_state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()}, 'model_saved/multitask/'+str(epoch)+'.pth')
    # Train
    run_a_train_epoch_cr(args, epoch, model, train_loader, loss_fn_cablenumber, loss_fn_area, optimizer)
    # Validation and early stop
    val_score,val_acc = run_an_eval_epoch_cr(args, model, val_loader)
    early_stop = stopper.step(val_acc, model,optimizer)
    print('epoch {:d}/{:d}, validation {} {:.4f}, accuracy(%) {:.4f}, best validation {} {:.4f}, time{:.1f}'.format(
        epoch + 1, args['num_epochs'], args['metric_name'], val_score,val_acc,args['metric_name'], stopper.best_score,time.time()-st))
    #if early_stop:
    #    break
    torch.save({'model_state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()}, 'model_saved/multitask/model2_5000/last.pth')

In [None]:
# torch.save({'model_state_dict': model.state_dict(),
#             'optimizer': optimizer.state_dict()}, 'model_saved/multitask/model2_5000/1.pth')
state=torch.load('model_saved/multitask/model2_5000/early_stop1.pth') 
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer'])
for param_group in optimizer.param_groups: print(param_group['lr'])
for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr']*0.1
for param_group in optimizer.param_groups: print(param_group['lr'])
 

In [None]:
    
for epoch in range(5000):#250+100
    st=time.time()
    if epoch%500==0 and epoch!=0:
        print('save')
        torch.save({'model_state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()}, 'model_saved/multitask/model2_5000/'+str(epoch)+'.pth')
    # Train
    run_a_train_epoch_cr(args, epoch, model, train_loader, loss_fn_cablenumber, loss_fn_area, optimizer)
    # Validation and early stop
    val_score,val_acc = run_an_eval_epoch_cr(args, model, val_loader)
    early_stop = stopper.step(val_acc, model,optimizer)
    print('epoch {:d}/{:d}, validation {} {:.4f}, accuracy(%) {:.4f}, best validation {} {:.4f}, time{:.1f}'.format(
        epoch + 1, args['num_epochs'], args['metric_name'], val_score,val_acc,args['metric_name'], stopper.best_score,time.time()-st))
    #if early_stop:
    #    break
    torch.save({'model_state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()}, 'model_saved/multitask/model2_5000/last.pth')

In [None]:
state=torch.load('model_saved/multitask/model2_5000/early_stop.pth') 
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer'])

model.eval()
#eval_meter = Meter()
cn_p=[];area_p=[];cn_t=[];area_t=[]
with torch.no_grad():
    for batch_id, batch_data in enumerate(test_loader):
        
        bg, labels_cablenumber,labels_area = batch_data
        labels_cablenumber = labels_cablenumber.to(args['device'])
        labels_area = labels_area.to(args['device'])
        #labels=[labels_cablenumber,labels_area]
        cn_t.append(labels_cablenumber.cpu().detach().numpy())
        area_t.append(labels_area.cpu().detach().numpy())
        
        prediction = regress(args, model, bg)
        cn_p.append(prediction[0].cpu().detach().numpy())
        area_p.append(prediction[1].cpu().detach().numpy())

cn_p=np.concatenate(cn_p,axis=0)
cn_p=np.argmax(cn_p,axis=1)
area_p=np.concatenate(area_p,axis=0)

cn_t=np.concatenate(cn_t,axis=0)
area_t=np.concatenate(area_t,axis=0)

print(cn_p.
      shape,area_p.shape,cn_t.shape,area_t.shape)

acc=np.sum(cn_p==cn_t)/len(cn_p)
print('acc: ',acc)
# plt.plot(area_t,area_p,'.')

w_idx=np.where(cn_p!=cn_t)
c_idx=np.where(cn_p==cn_t)
area=np.load('data/label_multitask/a_label_test.npz')['arr_0']
print('acc mean',np.mean(area[w_idx]),np.mean(area[c_idx]))

import sklearn.metrics as metrics
t_area=area_t.copy();p_area=area_p.copy()
#plt.plot(p_area.flatten(),t_area.flatten(),'.',color='darkblue',alpha=0.2)
plt.plot(t_area[c_idx].flatten(),p_area[c_idx].flatten(),'.',color='deepskyblue',alpha=0.5)
plt.plot(t_area[w_idx].flatten(),p_area[w_idx].flatten(),'.',color='crimson',alpha=0.3)
#plt.ylim(np.min([a_p,a_t]), np.max([a_p,a_t]))
#plt.xlim(np.min([a_p,a_t]), np.max([a_p,a_t]))
plt.ylim(-0.1,1.1)
plt.xlim(-0.1,1.1)
plt.xlabel('actual')
plt.ylabel('pred')
plt.grid()
plt.savefig('images/multitask/test_area.png', bbox_inches = 'tight')
plt.show()

y_t=area_t.copy();y_p=area_p.copy()
print(y_t.shape,y_p.shape)#(
import sklearn.metrics as metrics
mae = metrics.mean_absolute_error(y_t, y_p)
mse = metrics.mean_squared_error(y_t, y_p)
rmse=np.sqrt(mse)
print('mae', mae, '| rmse:',rmse)
print('actual mean', np.mean(y_t),'| pred mean',np.mean(y_p))
print(np.corrcoef(y_t.flatten(),y_p.flatten()))
print('rmse/range',rmse/(np.max(y_t)-np.min(y_t)))
print('mape',np.mean(np.abs(y_t-y_p)/y_t))
iqr= np.subtract(*np.percentile(y_t, [75, 25]))
print('rmse/iqr',rmse/iqr)
print('rmse/mean',rmse/np.mean(y_t))
print('actual min max', np.min(y_t),np.max(y_t))
print('pred min max', np.min(y_p),np.max(y_p))

In [None]:
#validation set

state=torch.load('model_saved/multitask/model2_5000/early_stop.pth') 
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer'])

model.eval()
#eval_meter = Meter()
cn_p=[];area_p=[];cn_t=[];area_t=[]
with torch.no_grad():
    for batch_id, batch_data in enumerate(val_loader):
        
        bg, labels_cablenumber,labels_area = batch_data
        labels_cablenumber = labels_cablenumber.to(args['device'])
        labels_area = labels_area.to(args['device'])
        #labels=[labels_cablenumber,labels_area]
        cn_t.append(labels_cablenumber.cpu().detach().numpy())
        area_t.append(labels_area.cpu().detach().numpy())
        
        prediction = regress(args, model, bg)
        cn_p.append(prediction[0].cpu().detach().numpy())
        area_p.append(prediction[1].cpu().detach().numpy())

cn_p=np.concatenate(cn_p,axis=0)
cn_p=np.argmax(cn_p,axis=1)
area_p=np.concatenate(area_p,axis=0)

cn_t=np.concatenate(cn_t,axis=0)
area_t=np.concatenate(area_t,axis=0)

print(cn_p.
      shape,area_p.shape,cn_t.shape,area_t.shape)

acc=np.sum(cn_p==cn_t)/len(cn_p)
print('acc: ',acc)
# plt.plot(area_t,area_p,'.')

w_idx=np.where(cn_p!=cn_t)
c_idx=np.where(cn_p==cn_t)
area=np.load('data/label_multitask/a_label_val.npz')['arr_0']
print('acc mean',np.mean(area[w_idx]),np.mean(area[c_idx]))


import sklearn.metrics as metrics
t_area=area_t.copy();p_area=area_p.copy()
#plt.plot(p_area.flatten(),t_area.flatten(),'.',color='darkblue',alpha=0.2)
plt.plot(t_area[c_idx].flatten(),p_area[c_idx].flatten(),'.',color='deepskyblue',alpha=0.5)
plt.plot(t_area[w_idx].flatten(),p_area[w_idx].flatten(),'.',color='crimson',alpha=0.3)
#plt.ylim(np.min([a_p,a_t]), np.max([a_p,a_t]))
#plt.xlim(np.min([a_p,a_t]), np.max([a_p,a_t]))
plt.ylim(-0.1,1.1)
plt.xlim(-0.1,1.1)
plt.xlabel('actual')
plt.ylabel('pred')
plt.grid()
plt.savefig('images/multitask/val_area.png', bbox_inches = 'tight')
plt.show()

y_t=area_t.copy();y_p=area_p.copy()
print(y_t.shape,y_p.shape)#(
import sklearn.metrics as metrics
mae = metrics.mean_absolute_error(y_t, y_p)
mse = metrics.mean_squared_error(y_t, y_p)
rmse=np.sqrt(mse)
print('mae', mae, '| rmse:',rmse)
print('actual mean', np.mean(y_t),'| pred mean',np.mean(y_p))
print(np.corrcoef(y_t.flatten(),y_p.flatten()))
print('rmse/range',rmse/(np.max(y_t)-np.min(y_t)))
print('mape',np.mean(np.abs(y_t-y_p)/y_t))
iqr= np.subtract(*np.percentile(y_t, [75, 25]))
print('rmse/iqr',rmse/iqr)
print('rmse/mean',rmse/np.mean(y_t))
print('actual min max', np.min(y_t),np.max(y_t))
print('pred min max', np.min(y_p),np.max(y_p))

In [None]:
state=torch.load('model_saved/multitask/model2_5000/early_stop.pth') 
model.load_state_dict(state['model_state_dict'])
optimizer.load_state_dict(state['optimizer'])

model.eval()
#eval_meter = Meter()
cn_p=[];area_p=[];cn_t=[];area_t=[]
with torch.no_grad():
    for batch_id, batch_data in enumerate(train_loader):
        
        bg, labels_cablenumber,labels_area = batch_data
        labels_cablenumber = labels_cablenumber.to(args['device'])
        labels_area = labels_area.to(args['device'])
        #labels=[labels_cablenumber,labels_area]
        cn_t.append(labels_cablenumber.cpu().detach().numpy())
        area_t.append(labels_area.cpu().detach().numpy())
        
        prediction = regress(args, model, bg)
        cn_p.append(prediction[0].cpu().detach().numpy())
        area_p.append(prediction[1].cpu().detach().numpy())

cn_p=np.concatenate(cn_p,axis=0)
cn_p=np.argmax(cn_p,axis=1)
area_p=np.concatenate(area_p,axis=0)

cn_t=np.concatenate(cn_t,axis=0)
area_t=np.concatenate(area_t,axis=0)

print(cn_p.
      shape,area_p.shape,cn_t.shape,area_t.shape)

acc=np.sum(cn_p==cn_t)/len(cn_p)
print('acc: ',acc)
# plt.plot(area_t,area_p,'.')

w_idx=np.where(cn_p!=cn_t)
c_idx=np.where(cn_p==cn_t)
area=np.load('data/label_multitask/a_label_train.npz')['arr_0']
print('acc mean',np.mean(area[w_idx]),np.mean(area[c_idx]))


import sklearn.metrics as metrics
t_area=area_t.copy();p_area=area_p.copy()
#plt.plot(p_area.flatten(),t_area.flatten(),'.',color='darkblue',alpha=0.2)
plt.plot(t_area[c_idx].flatten(),p_area[c_idx].flatten(),'.',color='deepskyblue',alpha=0.5)
plt.plot(t_area[w_idx].flatten(),p_area[w_idx].flatten(),'.',color='crimson',alpha=0.3)
#plt.ylim(np.min([a_p,a_t]), np.max([a_p,a_t]))
#plt.xlim(np.min([a_p,a_t]), np.max([a_p,a_t]))
plt.ylim(-0.1,1.1)
plt.xlim(-0.1,1.1)
plt.xlabel('actual')
plt.ylabel('pred')
plt.grid()
plt.savefig('images/multitask/train_area.png', bbox_inches = 'tight')
plt.show()

y_t=area_t.copy();y_p=area_p.copy()
print(y_t.shape,y_p.shape)#(
import sklearn.metrics as metrics
mae = metrics.mean_absolute_error(y_t, y_p)
mse = metrics.mean_squared_error(y_t, y_p)
rmse=np.sqrt(mse)
print('mae', mae, '| rmse:',rmse)
print('actual mean', np.mean(y_t),'| pred mean',np.mean(y_p))
print(np.corrcoef(y_t.flatten(),y_p.flatten()))
print('rmse/range',rmse/(np.max(y_t)-np.min(y_t)))
print('mape',np.mean(np.abs(y_t-y_p)/y_t))
iqr= np.subtract(*np.percentile(y_t, [75, 25]))
print('rmse/iqr',rmse/iqr)
print('rmse/mean',rmse/np.mean(y_t))
print('actual min max', np.min(y_t),np.max(y_t))
print('pred min max', np.min(y_p),np.max(y_p))