In [1]:
import pickle
import torch
from torch_geometric.data import Data,DataLoader
from functions_refactor import *
from pytorch_util import *
from torch.optim import Adam

In [2]:
# model parameters
reuse = False
block = MEGNet_block
head = feedforwardHead_Update
data = '../Data/{}_data_ACSF_expand_PCA.pickle'
batch_size = 32
dim = 128
epochs = 10
clip = 0.4
layer1 = 3
layer2 = 3
factor = 2
lr = 1e-4

In [4]:
class Data2(Data):
    def apply_ignore_index(self, func, *keys):
        r"""Applies the function :obj:`func` to all tensor attributes
        :obj:`*keys`. If :obj:`*keys` is not given, :obj:`func` is applied to
        all present attributes.
        """
        for key, item in self(*keys):
            if torch.is_tensor(item):
                if 'index' not in key:
                    self[key] = func(item)
        return self
    
    def to(self, device, *keys):
        r"""Performs tensor dtype and/or device conversion to all attributes
        :obj:`*keys`.
        If :obj:`*keys` is not given, the conversion is applied to all present
        attributes."""
        #print(str(device))
        if 'cuda' in str(device):
            return self.apply(lambda x: x.to(device), *keys)
        else:
            return self.apply_ignore_index(lambda x: x.to(device), *keys)

def get_data2(data,batch_size):
    with open(data.format('train'), 'rb') as handle:
        train_data = pickle.load(handle)
    with open(data.format('val'), 'rb') as handle:
        val_data = pickle.load(handle)
    
    train_list = [Data2(**d) for d in train_data]
    train_dl = DataLoader2(train_list,batch_size,shuffle=True)
    val_list = [Data2(**d) for d in val_data]
    val_dl = DataLoader2(val_list,batch_size,shuffle=False)
    
    return train_dl,val_dl

class Batch2(Data2):
    r"""A plain old python object modeling a batch of graphs as one big
    (dicconnected) graph. With :class:`torch_geometric.data.Data` being the
    base class, all its methods can also be used here.
    In addition, single graphs can be reconstructed via the assignment vector
    :obj:`batch`, which maps each node to its respective graph identifier.
    """

    def __init__(self, batch=None, **kwargs):
        super(Batch2, self).__init__(**kwargs)
        self.batch = batch

    @staticmethod
    def from_data_list(data_list, follow_batch=[]):
        r"""Constructs a batch object from a python list holding
        :class:`torch_geometric.data.Data` objects.
        The assignment vector :obj:`batch` is created on the fly.
        Additionally, creates assignment batch vectors for each key in
        :obj:`follow_batch`."""

        keys = [set(data.keys) for data in data_list]
        keys = list(set.union(*keys))
        assert 'batch' not in keys

        batch = Batch2()

        for key in keys:
            batch[key] = []

        for key in follow_batch:
            batch['{}_batch'.format(key)] = []

        batch.batch = []

        cumsum = 0
        for i, data in enumerate(data_list):
            for key in data.keys:
                item = data[key]
                item = item + cumsum if data.__cumsum__(key, item) else item
                batch[key].append(item)

            for key in follow_batch:
                size = data[key].size(data.__cat_dim__(key, data[key]))
                item = torch.full((size, ), i, dtype=torch.long)
                batch['{}_batch'.format(key)].append(item)

            num_nodes = data.num_nodes
            if num_nodes is not None:
                item = torch.full((num_nodes, ), i, dtype=torch.long)
                batch.batch.append(item)
                cumsum += num_nodes

        if num_nodes is None:  # pragma: no cover
            batch.batch = None

        for key in batch.keys:
            item = batch[key][0]
            if torch.is_tensor(item):
                batch[key] = torch.cat(
                    batch[key], dim=data_list[0].__cat_dim__(key, item))
            elif isinstance(item, int) or isinstance(item, float):
                batch[key] = torch.tensor(batch[key])
            else:
                raise ValueError('Unsupported attribute type.')

        # Copy custom data functions to batch.
        # if data_list.__class__ != Data:
        #     org_funcs = set(Data.__dict__.keys())
        #     funcs = set(data_list[0].__class__.__dict__.keys())
        #     batch.__custom_funcs__ = funcs.difference(org_funcs)
        #     for func in funcs.difference(org_funcs):
        #         setattr(batch, func, getattr(data_list[0], func))

        return batch.contiguous()

    @property
    def num_graphs(self):
        """Returns the number of graphs in the batch."""
        return self.batch[-1].item() + 1
    
    
class DataLoader2(torch.utils.data.DataLoader):
    r"""Data loader which merges data objects from a
    :class:`torch_geometric.data.dataset` to a mini-batch.

    Args:
        dataset (Dataset): The dataset from which to load the data.
        batch_size (int, optional): How may samples per batch to load.
            (default: :obj:`1`)
        shuffle (bool, optional): If set to :obj:`True`, the data will be
            reshuffled at every epoch (default: :obj:`False`)
        follow_batch (list or tuple, optional): Creates assignment batch
            vectors for each key in the list. (default: :obj:`[]`)
    """

    def __init__(self,
                 dataset,
                 batch_size=1,
                 shuffle=False,
                 follow_batch=[],
                 **kwargs):
        super(DataLoader2,self).__init__(
            dataset,
            batch_size,
            shuffle,
            collate_fn=lambda data_list: Batch2.from_data_list(
                data_list, follow_batch),
            **kwargs)


In [3]:
train_dl,val_dl = get_data(data,batch_size)

In [5]:
# no apex
model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                            UseAmp=False,AMP_clip=False)

epoch:0, train_loss: +0.351, val_loss: -0.104, 
train_vector: +4.16|+0.90|+0.13|-0.42|-0.58|+0.12|-0.61|-0.91, 
val_vector  : +3.79|+0.22|-0.28|-1.13|-1.07|-0.18|-0.96|-1.22

epoch:1, train_loss: -0.366, val_loss: -0.524, 
train_vector: +1.74|+0.36|-0.34|-0.99|-1.11|-0.24|-1.04|-1.31, 
val_vector  : +1.22|+0.15|-0.53|-1.20|-1.22|-0.37|-0.90|-1.34

epoch:2, train_loss: -0.647, val_loss: -0.682, 
train_vector: +0.85|+0.10|-0.51|-1.24|-1.31|-0.38|-1.20|-1.49, 
val_vector  : +0.44|+0.62|-0.64|-1.11|-1.33|-0.47|-1.37|-1.59

epoch:3, train_loss: -0.759, val_loss: -0.899, 
train_vector: +0.79|-0.05|-0.62|-1.38|-1.43|-0.47|-1.30|-1.60, 
val_vector  : +0.47|-0.23|-0.75|-1.52|-1.50|-0.56|-1.41|-1.69

epoch:4, train_loss: -0.838, val_loss: -0.910, 
train_vector: +0.75|-0.16|-0.69|-1.47|-1.52|-0.53|-1.38|-1.70, 
val_vector  : +0.72|-0.15|-0.80|-1.51|-1.64|-0.62|-1.51|-1.75

epoch:5, train_loss: -0.897, val_loss: -0.989, 
train_vector: +0.73|-0.21|-0.76|-1.55|-1.59|-0.59|-1.44|-1.76, 
val_vector  :

In [5]:
# apex-O1
model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
model, opt = amp.initialize(model, opt, opt_level="O1")
model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                            UseAmp=True,AMP_clip=False)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
epoch:0, train_loss: +0.431, val_loss: -0.095, 
train_vector: +4.35|+1.30|+0.15|-0.41|-0.57|+0.14|-0.59|-0.91, 
val_vector  : +4.15|+0.14|-0.32|-1.24|-1.

In [6]:
train_dl,val_dl = get_data2(data,batch_size)

In [7]:
class GNN_edgeUpdate(torch.nn.Module):

    def __init__(self,reuse,block,head,dim,layer1,layer2,factor,\
                 node_in,edge_in,edge_in4,edge_in3=8,aggr='mean'):
        # block,head are nn.Module
        # node_in,edge_in are dim for bonding and edge_in4,edge_in3 for coupling
        super(GNN_edgeUpdate, self).__init__()
        self.lin_node = Sequential(BatchNorm1d(node_in),Linear(node_in, dim*factor),LeakyReLU(), \
                                   BatchNorm1d(dim*factor),Linear(dim*factor, dim),LeakyReLU())

        self.edge1 = Sequential(BatchNorm1d(edge_in),Linear(edge_in, dim*factor),LeakyReLU(), \
                                   BatchNorm1d(dim*factor),Linear(dim*factor, dim),LeakyReLU())

        self.edge2 = Sequential(BatchNorm1d(edge_in4+edge_in3),Linear(edge_in4+edge_in3, dim*factor),LeakyReLU(), \
                                   BatchNorm1d(dim*factor),Linear(dim*factor, dim),LeakyReLU())        
        if reuse:
            self.conv1 = block(dim=dim,aggr=aggr)
            self.conv2 = block(dim=dim,aggr=aggr)
        else:
            self.conv1 = nn.ModuleList([block(dim=dim,aggr=aggr) for _ in range(layer1)])
            self.conv2 = nn.ModuleList([block(dim=dim,aggr=aggr) for _ in range(layer2)])            
        
        self.head = head(dim)
        
    def forward(self, data,IsTrain=False,typeTrain=False,logLoss=True):
        out = self.lin_node(data.x)
        # edge_*3 only does not repeat for undirected graph. Hence need to add (j,i) to (i,j) in edges
        edge_index3 = torch.cat([data.edge_index3,data.edge_index3[[1,0]]],1)
        n = data.edge_attr3.shape[0]
        temp_ = self.edge2(torch.cat([data.edge_attr3,data.edge_attr4],1))
        edge_attr3 = torch.cat([temp_,temp_],0)
        
        edge_attr = self.edge1(data.edge_attr)
        for conv in self.conv1:
            out,edge_attr = conv(out,data.edge_index,edge_attr)
        
        for conv in self.conv2:
            out,edge_attr3 = conv(out,edge_index3,edge_attr3)    
        
        edge_attr3 = edge_attr3[:n]
        if typeTrain:
            if IsTrain:
                y = data.y[data.type_attr]
            edge_attr3 = edge_attr3[data.type_attr]
            edge_index3 = data.edge_index3[:,data.type_attr]
            edge_attr3_old = data.edge_attr3[data.type_attr]
        else:
            if IsTrain:
                y = data.y
            edge_index3 = data.edge_index3
            edge_attr3_old = data.edge_attr3
            
        yhat = self.head(out,edge_index3,edge_attr3)
        
        if IsTrain:
            k = torch.sum(edge_attr3_old,0)
            nonzeroIndex = torch.nonzero(k).squeeze(1)
            abs_ = torch.abs(y-yhat).unsqueeze(1)
            loss_perType = torch.zeros(8,device='cuda:0').to(torch.float16)
            if logLoss:
                loss_perType[nonzeroIndex] = torch.log(torch.sum(abs_ * edge_attr3_old[:,nonzeroIndex],0)/k[nonzeroIndex])
                loss = torch.sum(loss_perType)/nonzeroIndex.shape[0]
                return loss,loss_perType
            else:
                loss_perType[nonzeroIndex] = torch.sum(abs_ * edge_attr3_old[:,nonzeroIndex],0)/k[nonzeroIndex]
                loss = torch.sum(loss_perType)/nonzeroIndex.shape[0]
                loss_perType[nonzeroIndex] = torch.log(loss_perType[nonzeroIndex])
                return loss,loss_perType
        else:
            return yhat

In [8]:
# apex-O2
model = GNN_edgeUpdate(reuse,block,head,dim,layer1,layer2,factor,**data_dict[data]).to('cuda:0')
paras = trainable_parameter(model)
opt = Adam(paras,lr=lr)
model, opt = amp.initialize(model, opt, opt_level="O2")
model,train_loss_list,val_loss_list,bestWeight = train_type(opt,model,epochs,train_dl,val_dl,paras,clip,\
                                                            UseAmp=True,AMP_clip=True)

Selected optimization level O2:  FP16 training with FP32 batchnorm and FP32 master weights.

Defaults for this optimization level are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O2
cast_model_type        : torch.float16
patch_torch_functions  : False
keep_batchnorm_fp32    : True
master_weights         : True
loss_scale             : dynamic
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
epoch:0, train_loss: +0.386, val_loss: -0.024, 
train_vector: +