In [1]:
# Example regression script using neural fingerprints.
#
# Compares Morgan fingerprints to neural fingerprints.

import autograd.numpy as np
import autograd.numpy.random as npr

from neuralfingerprint import load_data
from neuralfingerprint import build_morgan_deep_net
from neuralfingerprint import build_conv_deep_net
from neuralfingerprint import normalize_array, adam
from neuralfingerprint import build_batched_grad
from neuralfingerprint.util import rmse

from autograd import grad

task_params = {'target_name' : 'measured log solubility in mols per litre',
               'data_file'   : 'delaney.csv'}
N_train = 800
N_val   = 20
N_test  = 20

model_params = dict(fp_length=50,    # Usually neural fps need far fewer dimensions than morgan.
                    fp_depth=4,      # The depth of the network equals the fingerprint radius.
                    conv_width=20,   # Only the neural fps need this parameter.
                    h1_size=100,     # Size of hidden layer of network on top of fps.
                    L2_reg=np.exp(-2))
train_params = dict(num_iters=100,
                    batch_size=100,
                    init_scale=np.exp(-4),
                    step_size=np.exp(-6))

# Define the architecture of the network that sits on top of the fingerprints.
vanilla_net_params = dict(
    layer_sizes = [model_params['fp_length'], model_params['h1_size']],  # One hidden layer.
    normalize=True, L2_reg = model_params['L2_reg'], nll_func = rmse)

def train_nn(pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params, seed=0,
             validation_smiles=None, validation_raw_targets=None):
    """loss_fun has inputs (weights, smiles, targets)"""
    print "Total number of weights in the network:", num_weights
    init_weights = npr.RandomState(seed).randn(num_weights) * train_params['init_scale']

    num_print_examples = 100
    train_targets, undo_norm = normalize_array(train_raw_targets)
    training_curve = []
    def callback(weights, iter):
        if iter % 10 == 0:
            print "max of weights", np.max(np.abs(weights))
            train_preds = undo_norm(pred_fun(weights, train_smiles[:num_print_examples]))
            cur_loss = loss_fun(weights, train_smiles[:num_print_examples], train_targets[:num_print_examples])
            training_curve.append(cur_loss)
            print "Iteration", iter, "loss", cur_loss,\
                  "train RMSE", rmse(train_preds, train_raw_targets[:num_print_examples])
            if validation_smiles is not None:
                validation_preds = undo_norm(pred_fun(weights, validation_smiles))
                print "Validation RMSE", iter, ":", rmse(validation_preds, validation_raw_targets)

    # Build gradient using autograd.
    grad_fun = grad(loss_fun)
    grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'],
                                            train_smiles, train_targets)

    # Optimize weights.
    trained_weights = adam(grad_fun_with_data, init_weights, callback=callback,
                           num_iters=train_params['num_iters'], step_size=train_params['step_size'])

    def predict_func(new_smiles):
        """Returns to the original units that the raw targets were in."""
        return undo_norm(pred_fun(trained_weights, new_smiles))
    return predict_func, trained_weights, training_curve



print "Loading data..."
traindata, valdata, testdata = load_data(
    task_params['data_file'], (N_train, N_val, N_test),
    input_name='smiles', target_name=task_params['target_name'])
train_inputs, train_targets = traindata
val_inputs,   val_targets   = valdata
test_inputs,  test_targets  = testdata


def print_performance(pred_func):
    train_preds = pred_func(train_inputs)
    val_preds = pred_func(val_inputs)
    print "\nPerformance (RMSE) on " + task_params['target_name'] + ":"
    print "Train:", rmse(train_preds, train_targets)
    print "Test: ", rmse(val_preds,  val_targets)
    print "-" * 80
    return rmse(val_preds, val_targets)

def run_morgan_experiment():
    loss_fun, pred_fun, net_parser = \
        build_morgan_deep_net(model_params['fp_length'],
                              model_params['fp_depth'], vanilla_net_params)
    num_weights = len(net_parser)
    predict_func, trained_weights, conv_training_curve = \
        train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
                 train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
    return print_performance(predict_func)

def run_conv_experiment():
    conv_layer_sizes = [model_params['conv_width']] * model_params['fp_depth']
    conv_arch_params = {'num_hidden_features' : conv_layer_sizes,
                        'fp_length' : model_params['fp_length'], 'normalize' : 1}
    loss_fun, pred_fun, conv_parser = \
        build_conv_deep_net(conv_arch_params, vanilla_net_params, model_params['L2_reg'])
    num_weights = len(conv_parser)
    predict_func, trained_weights, conv_training_curve = \
        train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets,
                 train_params, validation_smiles=val_inputs, validation_raw_targets=val_targets)
    test_predictions = predict_func(test_inputs)
    return rmse(test_predictions, test_targets)

print "Task params", task_params
print
#     print "Starting Morgan fingerprint experiment..."
#     test_loss_morgan = run_morgan_experiment()
#     print "Starting neural fingerprint experiment..."
#     test_loss_neural = run_conv_experiment()
#     print
#print "Morgan test RMSE:", test_loss_morgan, "Neural test RMSE:", test_loss_neural




Loading data...
Task params {'target_name': 'measured log solubility in mols per litre', 'data_file': 'delaney.csv'}



In [4]:
print "Training data : " 
print train_inputs[1]

Training data : 
Cc1occc1C(=O)Nc2ccccc2


In [5]:
model_params

{'L2_reg': 0.1353352832366127,
 'conv_width': 20,
 'fp_depth': 4,
 'fp_length': 50,
 'h1_size': 100}

## Edward inference

In [6]:
import edward as ed
import tensorflow as tf
from edward.models import Normal

In [7]:
ed.set_seed(42)

In [8]:
H = 50  # number of hidden units
D = 10  # number of features
lr = 5e-03

x_train = train_inputs[0:10]
y_train = np.reshape(train_targets[0:10],[len(x_train),1])

N = len(x_train)

In [9]:
def rnn_cell(hprev, xt):
  return tf.tanh(ed.dot(hprev, Wh) + ed.dot(xt, Wx) + bh)


In [10]:
with tf.name_scope("model"):
    Wh = Normal(loc=tf.zeros([H, H]), scale=tf.ones([H, H]), name="Wh")
    Wx = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H]), name="Wx")
    Wy = Normal(loc=tf.zeros([H, 1]), scale=tf.ones([H, 1]), name="Wy")
    bh = Normal(loc=tf.zeros(H), scale=tf.ones(H), name="bh")
    by = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="by")
    
with tf.name_scope("posterior"):
  with tf.name_scope("qWh"):
    qWh = Normal(loc=tf.Variable(tf.random_normal([H, H]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([H, H]), name="scale")))
  with tf.name_scope("qWx"):
    qWx = Normal(loc=tf.Variable(tf.random_normal([D, H]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([D, H]), name="scale")))
  with tf.name_scope("qWy"):
    qWy = Normal(loc=tf.Variable(tf.random_normal([H, 1]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([H, 1]), name="scale")))
  with tf.name_scope("qbh"):
    qbh = Normal(loc=tf.Variable(tf.random_normal([H]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([H]), name="scale")))
  with tf.name_scope("qby"):
    qby = Normal(loc=tf.Variable(tf.random_normal([1]), name="loc"),
                  scale=tf.nn.softplus(
                      tf.Variable(tf.random_normal([1]), name="scale")))

  x = tf.placeholder(tf.float32, [N, None])
  h = tf.scan(rnn_cell, x, initializer=tf.zeros(H))
  y = Normal(loc=tf.matmul(h, Wy) + by, scale=1.0)

In [11]:
inference = ed.KLqp({Wh: qWh, bh: qbh,
                     Wx: qWx,
                     Wy: qWy, by: qby}, data={x: x_train, y: y_train})

optimizer = tf.train.AdamOptimizer(learning_rate=lr)

inference.run(logdir='log')

  not np.issubdtype(value.dtype, np.float) and \


AttributeError: 'NoneType' object has no attribute 'outer_context'