In [1]:
import os
import sys
import random
os.environ["KERAS_BACKEND"] = "tensorflow"

import glob
try:
    if not ("CUDA_VISIBLE_DEVICES" in os.environ):
        os.environ['CUDA_VISIBLE_DEVICES']='0'
        print("importing setGPU")
        import setGPU
except:
    print("Could not import setGPU, please make sure you configure CUDA_VISIBLE_DEVICES manually")
    pass

import pickle
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
import pandas
import time
from tqdm import tqdm
import itertools
import io
import sklearn
import sklearn.cluster
import tensorflow as tf
from numpy.lib.recfunctions import append_fields

import scipy
import scipy.special

from mpnn import MessagePassing, ReadoutGraph, Aggregation

importing setGPU
Could not import setGPU, please make sure you configure CUDA_VISIBLE_DEVICES manually


In [2]:
import tensorflow as tf
import json, os
import numpy as np

# Tested with TensorFlow 2.1.0
print('version={}, CUDA={}, GPU={}, TPU={}'.format(
    tf.__version__, tf.test.is_built_with_cuda(),
    # GPU attached?
    len(tf.config.list_physical_devices('GPU')) > 0,
    # TPU accessible? (only works on Colab)
    'COLAB_TPU_ADDR' in os.environ))

version=2.1.0, CUDA=True, GPU=True, TPU=False


In [3]:
try:
    num_gpus = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
    print("num_gpus=", num_gpus)
    if num_gpus > 1:
        strategy = tf.distribute.MirroredStrategy()
    else:
        strategy = tf.distribute.OneDeviceStrategy("gpu:0")
except Exception as e:
    print(e)
    print("fallback to CPU")
    strategy = tf.distribute.OneDeviceStrategy("cpu")

num_gpus= 1


In [4]:
#tf.config.experimental_run_functions_eagerly(True)

In [5]:
def dist(A,B):
    na = tf.reduce_sum(tf.square(A), -1)
    nb = tf.reduce_sum(tf.square(B), -1)
 
    na = tf.reshape(na, [tf.shape(na)[0], -1, 1])
    nb = tf.reshape(nb, [tf.shape(na)[0], 1, -1])
    Dsq = tf.clip_by_value(na - 2*tf.linalg.matmul(A, B, transpose_a=False, transpose_b=True) + nb, 1e-12, 1e12)
    D = tf.sqrt(Dsq)
    return D


In [6]:
#Given a list of [Nbatch, Nelem, Nfeat] input nodes, computes the dense [Nbatch, Nelem, Nelem] adjacency matrices
class Distance(tf.keras.layers.Layer):

    def __init__(self, dist_shape, *args, **kwargs):
        super(Distance, self).__init__(*args, **kwargs)

    def call(self, inputs1, inputs2):
        #compute the pairwise distance matrix between the vectors defined by the first two components of the input array
        #inputs1, inputs2: [Nbatch, Nelem, distance_dim] embedded coordinates used for element-to-element distance calculation
        D = dist(inputs1, inputs2)
      
        #adjacency between two elements should be high if the distance is small.
        #this is equivalent to radial basis functions. 
        #self-loops adj_{i,i}=1 are included, as D_{i,i}=0 by construction
        adj = tf.math.exp(-1.0*D)

        #optionally set the adjacency matrix to 0 for low values in order to make the matrix sparse.
        #need to test if this improves the result.
        #adj = tf.keras.activations.relu(adj, threshold=0.01)

        return adj

In [7]:
class InputEncoding(tf.keras.layers.Layer):
    def __init__(self, num_input_classes):
        super(InputEncoding, self).__init__()
        self.num_input_classes = num_input_classes
        
    def call(self, X):
        #X: [Nbatch, Nelem, Nfeat] array of all the input detector element feature data

        #X[:, :, 0] - categorical index of the element type
        Xid = tf.one_hot(tf.cast(X[:, :, 0], tf.int32), self.num_input_classes)

        #X[:, :, 1:] - all the other non-categorical features
        Xprop = X[:, :, 1:]
        return tf.concat([Xid, Xprop], axis=-1)

In [8]:
## Graph Highway network
# https://arxiv.org/pdf/2004.04635.pdf
#https://github.com/gcucurull/jax-ghnet/blob/master/models.py 
class GHConv(tf.keras.layers.Layer):
    def __init__(self, k, *args, **kwargs):
        self.activation = kwargs.pop("activation")
        self.hidden_dim = args[0]
        self.k = k

        super(GHConv, self).__init__(*args, **kwargs)

        self.W_t = self.add_weight(shape=(self.hidden_dim, self.hidden_dim), name="w_t", initializer="random_normal")
        self.b_t = self.add_weight(shape=(self.hidden_dim, ), name="b_t", initializer="zeros")
        self.W_h = self.add_weight(shape=(self.hidden_dim, self.hidden_dim), name="w_h", initializer="random_normal")
        self.theta = self.add_weight(shape=(self.hidden_dim, self.hidden_dim), name="theta", initializer="random_normal")
 
    def call(self, x, adj):
        #compute the normalization of the adjacency matrix
        in_degrees = tf.reduce_sum(adj, axis=-1)
        #add epsilon to prevent numerical issues from 1/sqrt(x)
        norm = tf.expand_dims(tf.pow(in_degrees + 1e-6, -0.5), -1)
        norm_k = tf.pow(norm, self.k)
        adj_k = tf.pow(adj, self.k)

        f_hom = tf.linalg.matmul(x, self.theta)
        f_hom = tf.linalg.matmul(adj_k, f_hom*norm_k)*norm_k

        f_het = tf.linalg.matmul(x, self.W_h)
        gate = tf.nn.sigmoid(tf.linalg.matmul(x, self.W_t) + self.b_t)
        #tf.print(tf.reduce_mean(f_hom), tf.reduce_mean(f_het), tf.reduce_mean(gate))

        out = gate*f_hom + (1-gate)*f_het
        return out

## Simple Graph Conv layer
class SGConv(tf.keras.layers.Dense):
    def __init__(self, k, *args, **kwargs):
        super(SGConv, self).__init__(*args, **kwargs)
        self.k = k
    
    def call(self, inputs, adj):
        W = self.weights[0]
        b = self.weights[1]

        #compute the normalization of the adjacency matrix
        in_degrees = tf.reduce_sum(adj, axis=-1)
        #add epsilon to prevent numerical issues from 1/sqrt(x)
        norm = tf.expand_dims(tf.pow(in_degrees + 1e-6, -0.5), -1)
        norm_k = tf.pow(norm, self.k)

        support = (tf.linalg.matmul(inputs, W))
     
        #k-th power of the normalized adjacency matrix is nearly equivalent to k consecutive GCN layers
        adj_k = tf.pow(adj, self.k)
        out = tf.linalg.matmul(adj_k, support*norm_k)*norm_k

        return self.activation(out + b)

In [9]:
#Simple message passing based on a matrix multiplication
class DNNSuperCluster(tf.keras.Model):
    
    def __init__(self, activation=tf.nn.selu, nclass_labels=2, hidden_dim=256, distance_dim=256, num_conv=4, convlayer="ghconv", dropout=0.1):
        super(DNNSuperCluster, self).__init__()
        self.activation = activation
        self.nclass_labels = nclass_labels

        #self.enc = InputEncoding(3)
        
        # layers for distance coordinate extraction
        self.layer_distcoords1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="distcoords1")
        self.layer_distcoords2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="distcoords2")
        #self.layer_distcoords3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="distcoords3")
        self.layer_distcoords = tf.keras.layers.Dense(distance_dim, activation="linear", name="distcoords")

        # layers for feature extraction 
        self.layer_input1 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="input1")
        self.layer_input1_do = tf.keras.layers.Dropout(dropout)
        self.layer_input2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="input2")
        self.layer_input2_do = tf.keras.layers.Dropout(dropout)
        self.layer_input3 = tf.keras.layers.Dense(2*hidden_dim, activation=activation, name="input3")
        self.layer_input3_do = tf.keras.layers.Dropout(dropout)
        
        self.layer_dist = Distance(distance_dim, name="distance")

        # Graph convolutions
        if convlayer == "sgconv":
            self.layer_conv1 = SGConv(num_conv, 2*hidden_dim, activation=activation, name="conv1")
            #self.layer_conv2 = SGConv(num_conv, 2*hidden_dim+len(class_labels), activation=activation, name="conv2")
        elif convlayer == "ghconv":
            self.layer_conv1 = GHConv(num_conv, 2*hidden_dim, activation=activation, name="conv1")
            #self.layer_conv2 = GHConv(num_conv, 2*hidden_dim+len(class_labels), activation=activation, name="conv2")

        # Output layers
        self.layer_id1 = tf.keras.layers.Dense(2*hidden_dim, activation=activation, name="id1")
        self.layer_id2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="id2")
        #self.layer_id3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="id3")
        self.layer_id = tf.keras.layers.Dense(nclass_labels, activation="linear", name="out_id")
        #self.layer_charge = tf.keras.layers.Dense(1, activation="linear", name="out_charge")
        
        #self.layer_momentum1 = tf.keras.layers.Dense(2*hidden_dim, activation=activation, name="momentum1")
        #self.layer_momentum2 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="momentum2")
        #self.layer_momentum3 = tf.keras.layers.Dense(hidden_dim, activation=activation, name="momentum3")
        #self.layer_momentum = tf.keras.layers.Dense(3, activation="linear", name="out_momentum")
 
    def predict_distancematrix(self, inputs, training=True):

        x = self.layer_distcoords1(inputs)
        x = self.layer_distcoords2(x)
        #x = self.layer_distcoords3(x)
        distcoords = self.layer_distcoords(x)

        dm = self.layer_dist(distcoords, distcoords)
        
        # masking if the first element is -1
        msk_elem = tf.expand_dims(tf.cast(inputs[:, :, 0] != -1, dtype=tf.float32), -1)
        dm = dm*msk_elem

        return dm

    #@tf.function(input_signature=[tf.TensorSpec(shape=[None, 15], dtype=tf.float32)])
    def call(self, inputs, training=True):
        X = inputs
        msk_input = tf.expand_dims(tf.cast(X[:, :, 0] != -1, tf.float32), -1)

        dm = self.predict_distancematrix(X, training=training)

        x = self.layer_input1(X)
        x = self.layer_input1_do(x, training)
        x = self.layer_input2(x)
        x = self.layer_input2_do(x, training)
        x = self.layer_input3(x)
        x = self.layer_input3_do(x, training)
        x = self.layer_conv1(x, dm)
        x = self.layer_id1(x)
        x = self.layer_id2(x)
        #x = self.layer_id3(x)
        out_id_logits = self.layer_id(x)
        
        energies = tf.expand_dims(X[:,:,3], axis=-1)
        # add the cluster energies in the output, in the future we can add here corrections
        output = tf.concat([out_id_logits,energies], axis=-1)
        # mask to 0 the padded output
        output_masked = output * msk_input
        
        #return masked output logits and the predicted total energy
        return output_masked

# Loss definition

In [10]:
def separate_true(y, nclass_labels):
    # one-hot encoding for true label (signal,PU,noise)
    # the padded elements have -1 so they are one_hot to (0,0)
    y_onehot = tf.one_hot(tf.cast(y[:,1:], tf.int32), nclass_labels)
    true_en = y[:,0]
    return y_onehot, true_en

def true_mask(y):
    # mask for elements that should be included in supercluster
    in_sc = tf.cast(y[:,1:] == 1., tf.float32)
    # number of padding elements
    padded = tf.reduce_sum(tf.cast(y[:,1:] == -1., tf.float32), axis=-1)
    return in_sc, padded
    
    
def separate_pred(ypred):
    ens = ypred[:,:,2]
    ypred_onehot = ypred[:,:,:2]
    # 0 not include in energy sum, 1 include in energy sum
    # masked elements have pred_id=0 so they do not enter in the energy sum
    pred_id = tf.cast(tf.argmax(ypred_onehot, axis=-1), tf.float32)
    # predicted total energy
    pred_en =  tf.reduce_sum( ens * pred_id, axis=-1)
    # one-hot encoding for true label (signal,PU,noise)
    return ypred_onehot, pred_en, pred_id
    

In [11]:
def mse_unreduced(true, pred):
    return tf.math.pow(true-pred,2)

def msle_unreduced(true, pred):
    return tf.math.pow(tf.math.log(tf.math.abs(true) + 1.0) - tf.math.log(tf.math.abs(pred) + 1.0), 2)


#@tf.function
def my_loss_full(y_true, y_pred):
    y_true_onehot, true_en = separate_true(y_true, args.nclass_labels)
    y_pred_onehot, pred_en, pred_id = separate_pred(y_pred)
    # since the padded y_true is -1 -> it gives [0,0] when it is onehot. The ypred for batched is [0,0] so the loss
    # is automatically 0 for padded samples
    l1 = tf.nn.softmax_cross_entropy_with_logits(y_true_onehot, y_pred_onehot)
    
    # true energy loss
    mask_outsc = tf.cast(true_en == 0., tf.float32)
    mask_insc = tf.cast(true_en != 0., tf.float32)
    n_outsc = tf.reduce_sum(mask_outsc)
    n_insc = tf.reduce_sum(mask_insc)
    
    l2_en = mse_unreduced(true_en, pred_en)
    
    # separate mean resolution for windows with Caloparticle or not
    l2_en_outsc = tf.reduce_sum(l2_en * mask_outsc) / n_outsc
    l2_en_insc = tf.reduce_sum(l2_en * mask_insc) / n_insc
    
    ltot = 1e4*tf.reduce_mean(l1) + 20* l2_en_insc + 10* l2_en_outsc
    
    return ltot


In [12]:
def energy_resolution_outsc(y_true, y_pred):
    y_true_onehot, true_en = separate_true(y_true, args.nclass_labels)
    y_pred_onehot, pred_en, pred_id = separate_pred(y_pred)
    mask_outsc = tf.cast(true_en == 0., tf.float32)
    n_outsc = tf.reduce_sum(mask_outsc)
    
    return tf.reduce_sum(mse_unreduced(true_en, pred_en)*mask_outsc) / n_outsc

def energy_resolution_insc(y_true, y_pred):
    y_true_onehot, true_en = separate_true(y_true, args.nclass_labels)
    y_pred_onehot, pred_en, pred_id = separate_pred(y_pred)
    mask_insc = tf.cast(true_en != 0., tf.float32)
    n_insc = tf.reduce_sum(mask_insc)
    
    return tf.reduce_sum(mse_unreduced(true_en, pred_en)*mask_insc) / n_insc

In [13]:
def get_tpfn_metrics(y_true, y_pred):
    y_true_mask, n_padded = true_mask(y_true)
    y_false_mask = (tf.cast(y_true_mask == 0., tf.float32))
    
    # pred_id contains the last n_padded elements to 0 that will be always True negatives
    y_pred_onehot, pred_en, pred_id = separate_pred(y_pred)
    
    n_pos = tf.reduce_sum(y_true_mask, axis=-1)
    n_neg = tf.reduce_sum(y_false_mask, axis=-1) - n_padded
    
    n_tot = n_neg + n_pos
    
    true_pos = tf.reduce_sum(pred_id * y_true_mask, axis=-1)
    false_neg = n_pos - true_pos
    
    false_pos = tf.reduce_sum(pred_id * y_false_mask, axis=-1)
    true_neg = n_neg - false_pos
    
    return n_tot, true_pos, false_neg, false_pos, true_neg, 

In [14]:
def precision(tp,tn,fp,fn):
    return tp/(tp+fp)

def recall(tp,tn,fp,fn):
    return tp/(tp+fn)

def accuracy(tp,tn,fp,fn):
    return (tp+tn)/(tp+tn+fp+fn)

In [15]:
class Precision(tf.keras.metrics.Metric):

    def __init__(self, name='precision', **kwargs):
        super(Precision, self).__init__(name=name, **kwargs)
        self.tp = self.add_weight(name='tp', initializer='zeros')
        self.fp = self.add_weight(name='fp', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        n_tot, true_pos, false_neg, false_pos, true_neg = get_tpfn_metrics(y_true, y_pred)
        self.tp.assign_add(tf.reduce_sum(true_pos))
        self.fp.assign_add(tf.reduce_sum(false_pos))

    def result(self):
        return self.tp / (self.tp + self.fp)

    def reset_states(self):
        self.tp.assign(0)
        self.fp.assign(0)
        
class Recall(tf.keras.metrics.Metric):

    def __init__(self, name='recall', **kwargs):
        super(Recall, self).__init__(name=name, **kwargs)
        self.tp = self.add_weight(name='tp', initializer='zeros')
        self.fn = self.add_weight(name='fn', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        n_tot, true_pos, false_neg, false_pos, true_neg = get_tpfn_metrics(y_true, y_pred)
        self.tp.assign_add(tf.reduce_sum(true_pos))
        self.fn.assign_add(tf.reduce_sum(false_neg))

    def result(self):
        return self.tp / (self.tp + self.fn)

    def reset_states(self):
        self.tp.assign(0)
        self.fn.assign(0)


### Settings

In [16]:
data_path = "/storage/ECAL/training_data/window_data/electrons/recordio_v3"
models_path = "/storage/ECAL/deepcluster/models/gcn_models_v5/"

#rain_steps_per_epoch = 
#eval_steps_per_epoch = 3e5 // batch_size
from collections import namedtuple
Args = namedtuple('args', [ 'models_path', 'load','nepochs','ntrain','nval','nfeatures',
                            'n_seed_features','batch_size','lr_decay','lr',
                            'nhidden','distance_dim','num_conv','dropout','convlayer',
                           'nclass_labels', 'opt'])

args = Args( 
        models_path = models_path,
        load = False,
        nepochs = 300,
        ntrain = 700000,
        nval = 100000,
        batch_size = 65,
        nfeatures = 13,
        n_seed_features = 13,
        lr_decay = 0.96,
        lr = 0.001,
        nhidden = 128,
        distance_dim = 128,
        num_conv = 2,
        dropout = 0.1,
        convlayer = 'ghconv',
        nclass_labels=2,
        opt='adam'
        )

### Dataset loading

In [17]:
def _parse_tfr_element(element):
    parse_dic = {
        'X':      tf.io.FixedLenFeature([], tf.string),
        'X_seed': tf.io.FixedLenFeature([], tf.string),
        'y':      tf.io.FixedLenFeature([], tf.string),
        'n_clusters': tf.io.FixedLenFeature([], tf.int64)
    }
    example_message = tf.io.parse_single_example(element, parse_dic)

    X = example_message['X']
    X_seed = example_message['X_seed']
    y = example_message['y']
    nclusters = example_message['n_clusters']
    
    arr_X = tf.io.parse_tensor(X, out_type=tf.float32)
    arr_X_seed = tf.io.parse_tensor(X_seed, out_type=tf.float32)
    arr_y = tf.io.parse_tensor(y, out_type=tf.float32)
    
    #https://github.com/tensorflow/tensorflow/issues/24520#issuecomment-577325475
    arr_X.set_shape(     tf.TensorShape((None, args.nfeatures)))
    arr_X_seed.set_shape(tf.TensorShape((1, args.n_seed_features)))
    arr_y.set_shape(     tf.TensorShape((None,)))
 
    return arr_X, arr_X_seed, nclusters, arr_y
  
def _stack_seed_features(arr_X, arr_X_seed, nclusters, arr_y):
    X = tf.concat([arr_X,tf.broadcast_to(arr_X_seed,[nclusters,arr_X_seed.shape[1]] )],
                  axis=1)
    return X,arr_y

In [18]:
# padding shape
ps = ([None,args.nfeatures+args.n_seed_features],[None,])

# Create datasets from TFRecord files.
dataset = tf.data.TFRecordDataset(tf.io.gfile.glob('{}/training-*'.format(data_path)))
dataset = dataset.map(_parse_tfr_element,num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.map(_stack_seed_features,num_parallel_calls=tf.data.experimental.AUTOTUNE) # deterministic=False in TFv2.3
dataset = dataset.shuffle(10000, reshuffle_each_iteration=True)

ds_train = dataset.take(args.ntrain).padded_batch(args.batch_size, padded_shapes=ps, drop_remainder=True,padding_values=(-1.,-1.))
ds_test = dataset.skip(args.ntrain).take(args.nval).padded_batch(args.batch_size, padded_shapes=ps, drop_remainder=True,padding_values=(-1.,-1.))

ds_train_r = ds_train.repeat(args.nepochs)
ds_test_r = ds_test.repeat(args.nepochs)


In [19]:
idata = iter(ds_train_r)
d = next(idata)

In [20]:
d[1]

<tf.Tensor: shape=(65, 20), dtype=float32, numpy=
array([[ 86.267654,   1.      ,   1.      , ...,  -1.      ,  -1.      ,
         -1.      ],
       [  0.      ,   0.      ,  -1.      , ...,  -1.      ,  -1.      ,
         -1.      ],
       [  0.      ,   0.      ,   0.      , ...,  -1.      ,  -1.      ,
         -1.      ],
       ...,
       [  0.      ,   0.      ,   0.      , ...,  -1.      ,  -1.      ,
         -1.      ],
       [105.83477 ,   1.      ,   1.      , ...,  -1.      ,  -1.      ,
         -1.      ],
       [ 77.395004,   1.      ,   0.      , ...,  -1.      ,  -1.      ,
         -1.      ]], dtype=float32)>

In [21]:
def get_unique_run():
    previous_runs = os.listdir(args.models_path)
    if len(previous_runs) == 0:
        run_number = 1
    else:
        run_number = max([int(s.split('run_')[1]) for s in previous_runs]) + 1
    return run_number

In [22]:
if args.lr_decay > 0:
        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            args.lr,
            decay_steps=3*int(args.ntrain//args.batch_size),
            decay_rate=args.lr_decay
        )
else:
    lr_schedule = args.lr


In [23]:
with strategy.scope():
    opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    
    model = DNNSuperCluster(hidden_dim=args.nhidden, nclass_labels=args.nclass_labels, distance_dim=args.distance_dim, 
                            num_conv=args.num_conv, convlayer=args.convlayer, dropout=args.dropout)
   

In [33]:
if not os.path.isdir(args.models_path):
    os.makedirs(args.models_path)

name =  'run_{:02}'.format(get_unique_run())

outdir = args.models_path + name

if os.path.isdir(outdir):
    print("Output directory exists: {}".format(outdir), file=sys.stderr)

print(outdir)

with open(outdir + "/args.txt",'w') as config:
    config.write(str(args))
    


/storage/ECAL/deepcluster/models/gcn_models_v5/run_21


In [25]:
callbacks = []
tb = tf.keras.callbacks.TensorBoard(
    log_dir=outdir, histogram_freq=5, 
    write_graph=True, 
    write_images=True,
    update_freq='epoch',
    embeddings_freq = 3 ,
    #profile_batch=(10,90),
    profile_batch=0,
)
tb.set_model(model)
callbacks += [tb]

terminate_cb = tf.keras.callbacks.TerminateOnNaN()
callbacks += [terminate_cb]

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=outdir + "/weights.{epoch:02d}-{val_loss:.6f}.hdf5",
    save_weights_only=True,
    verbose=0
)
cp_callback.set_model(model)
callbacks += [cp_callback]

loss_fn = my_loss_full



In [26]:
with strategy.scope():
    model.compile(optimizer=args.opt, loss=loss_fn,
        metrics=[Precision(),Recall(), energy_resolution_insc,energy_resolution_outsc])

    for X, y in ds_train:
        ypred = model(X)
        l = loss_fn(y, ypred)
        break

    

In [27]:
yoh,true_en = separate_true(y, args.nclass_labels)

In [28]:
model.summary()

Model: "dnn_super_cluster"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
distcoords1 (Dense)          multiple                  3456      
_________________________________________________________________
distcoords2 (Dense)          multiple                  16512     
_________________________________________________________________
distcoords (Dense)           multiple                  16512     
_________________________________________________________________
input1 (Dense)               multiple                  3456      
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
input2 (Dense)               multiple                  16512     
_________________________________________________________________
dropout_1 (Dropout)          multiple            

In [29]:
if args.load:
    #ensure model input size is known
    for X, y in ds_train:
        model(X)
        break

    model.load_weights(args.load)
if args.nepochs > 0:
    ret = model.fit(ds_train_r,
        validation_data=ds_test_r, epochs=args.nepochs,
        steps_per_epoch=args.ntrain//args.batch_size, validation_steps=args.nval//args.batch_size,
        verbose=True,
        callbacks=callbacks
    )

Train for 10769 steps, validate for 1538 steps
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
 2482/10769 [=====>........................] - ETA: 3:02 - loss: 4521.1873 - precision: 0.8272 - recall: 0.8717 - energy_resolution_insc: 23.0212 - energy_resolution_outsc: 20.0968

KeyError: 'val_loss'

In [31]:
str(args)

"args(models_path='/storage/ECAL/deepcluster/models/gcn_models_v5/', load=False, nepochs=300, ntrain=700000, nval=100000, nfeatures=13, n_seed_features=13, batch_size=65, lr_decay=0.96, lr=0.001, nhidden=128, distance_dim=128, num_conv=2, dropout=0.1, convlayer='ghconv', nclass_labels=2, opt='adam')"