In [1]:
import edward as ed
import numpy as np
import tensorflow as tf

from edward.models import Normal

In [2]:
import autograd.numpy as np
import autograd.numpy.random as npr

from neuralfingerprint import load_data

from autograd import grad

task_params = {'target_name' : 'measured log solubility in mols per litre',
               'data_file'   : 'delaney.csv'}
N_train = 800
N_val   = 20
N_test  = 20

model_params = dict(fp_length=50,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=4,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))

print "Loading data..."
traindata, valdata, testdata = load_data(
    task_params['data_file'], (N_train, N_val, N_test),
    input_name='smiles', target_name=task_params['target_name'])
train_inputs, train_targets = traindata
val_inputs,   val_targets   = valdata
test_inputs,  test_targets  = testdata

print "Task params", task_params
print
#     print "Starting Morgan fingerprint experiment..."
#     test_loss_morgan = run_morgan_experiment()
#     print "Starting neural fingerprint experiment..."
#     test_loss_neural = run_conv_experiment()
#     print
#print "Morgan test RMSE:", test_loss_morgan, "Neural test RMSE:", test_loss_neural



Loading data...
Task params {'target_name': 'measured log solubility in mols per litre', 'data_file': 'delaney.csv'}



In [3]:
from edward.models import *
from edward.util import Progbar
import edward as ed
import numpy as np
import tensorflow as tf
from keras.layers import Embedding, Dense

H = 5
D = 2
V = 10
E = 2
batch_size = 10
M = 10
nb_steps = 5

X_train = np.zeros([10000, nb_steps ], dtype=np.int32)
y_train = np.zeros([10000, 1 ], dtype=np.int32)

Using TensorFlow backend.


In [4]:
with tf.variable_scope('model', reuse=True):

    ##### params ######
    W = tf.Variable(tf.random_normal([3, H, H]), name='W', dtype=tf.float32)
    U = tf.Variable(tf.random_normal([3, D, H]), name='U', dtype=tf.float32)
    b = tf.Variable(tf.random_normal([H]),       name='b', dtype=tf.float32)
    ####################
    
    y_ph = tf.placeholder(tf.int32, [batch_size, 1], name='y_ph')

    x = tf.placeholder(tf.int32, [batch_size, nb_steps ], name='x')     

    def gru_cell(hprev, xt):
        #  update gate
        z = tf.sigmoid(tf.matmul(xt,U[0]) + tf.matmul(hprev,W[0]) )
        #  reset gate
        r = tf.sigmoid(tf.matmul(xt,U[1]) + tf.matmul(hprev,W[1]) )
        #  intermediate
        h = tf.tanh(tf.matmul(xt,U[2])    + tf.matmul( (r*hprev),W[2]) )
        # new state
        return (1-z)*h + (z*hprev)
      
    # embed -->
    x_ = Embedding(V, D, name='Embedding')(x)
    
    # initial state of RNN
    h = tf.zeros(shape=(batch_size, H)) # initial state

    for t in range(nb_steps-1):
        h = gru_cell(h, x_[:,t,:])

    ############# Varitational Inference #########################    
    qz = Normal(loc=Dense(E, name='Dense_qz_mean')(h),
               scale=Dense(E, name='Dense_qz_scale',activation='softplus')(h))
        
    z = Normal(loc=tf.zeros([M, E]), scale=tf.ones([M, E]))    
    #######################################################   
    
    y = Dense(1, activation='sigmoid', name='Dense_y')(z.value())
    y = Bernoulli(y)

inference = ed.KLqp({z: qz}, data={y: y_ph})


In [25]:
### Run -->
optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer,scale={y: len(X_train) / batch_size}) # always redefine inference before
sess = ed.get_session()
tf.global_variables_initializer().run()

n_epoch = 100
n_iter_per_epoch = 100

for epoch in range(n_epoch):
  avg_loss = 0.0

  pbar = Progbar(n_iter_per_epoch)
  for t in range(1, n_iter_per_epoch + 1):
    pbar.update(t)   
    batch = np.random.randint(0, len(X_train)-1, batch_size)
    info_dict = inference.update({x: X_train[batch], y_ph: y_train[batch]})
    avg_loss  += info_dict['loss']
    
  # Print a lower bound to the average marginal likelihood for an
  # image.
  avg_loss = avg_loss / n_iter_per_epoch
  avg_loss = avg_loss / batch_size
  print("log p(x) >= {:0.3f}".format(avg_loss))

100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 724.890
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 698.066
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 696.108
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 695.783
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 695.217
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.988
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.796
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.669
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.556
100/100 [100%] ██████████████████████████████ Elapsed: 0s 
log p(x) >= 694.401
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.338
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p(x) >= 694.319
100/100 [100%] ██████████████████████████████ Elapsed: 0s
log p

KeyboardInterrupt: 

In [10]:
x_test = np.ones([10, nb_steps ])


In [24]:
y_post = ed.copy(y, {z: qz})
sess.run(y_post, {x: x_test})

array([[1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1]], dtype=int32)

In [24]:
dd = y.sample()
print(dd.eval())

[[1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]]
