In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import tensorflow as tf
import urllib
from dataclasses import dataclass
import kerastuner as kt
np.random.seed(0)

2021-10-28 16:11:42.022088: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-9.0/lib64
2021-10-28 16:11:42.022115: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  import kerastuner as kt


In [2]:
urllib.request.urlretrieve(
    "https://github.com/whitead/dmol-book/raw/master/data/solubility.npz",
    "solubility.npz",
)
with np.load("solubility.npz") as r:
    pos_data, neg_data = r["positives"], r["negatives"]

# create labels and stich it all into one
# tensor
labels = np.concatenate(
    (
        np.ones((pos_data.shape[0], 1), dtype=pos_data.dtype),
        np.zeros((neg_data.shape[0], 1), dtype=pos_data.dtype),
    ),
    axis=0,
)
features = np.concatenate((pos_data, neg_data), axis=0)

In [57]:
@dataclass
class Config:
    vocab_size: int
    example_number: int
    batch_size: int
    buffer_size: int
    rnn_units: int
    hidden_dim: int
    embedding_dim: int
    reg_strength: float
    lr: float
        
config = Config(vocab_size=21, # include gap
                example_number=len(labels), 
                batch_size=16, 
                buffer_size=10000,
                rnn_units=16,
                hidden_dim=32,
                embedding_dim=8,
                reg_strength=1e-4,
                lr=1e-4
               )

In [58]:
# we now need to shuffle before creating TF dataset
# so that our train/test/val splits are random
i = np.arange(len(labels))
np.random.shuffle(i)
labels = labels[i]
features = features[i]
data = tf.data.Dataset.from_tensor_slices((features, labels))
# now split into val, test, train and batch
N = len(data)  
L = features[0].shape[-1]
split = int(0.1 * N)
test_data = data.take(split).batch(config.batch_size)
nontest = data.skip(split)
val_data, train_data = nontest.take(split).batch(config.batch_size), \
    nontest.skip(split).shuffle(config.buffer_size).batch(config.batch_size).prefetch(tf.data.experimental.AUTOTUNE)

In [59]:
for x,y in train_data.as_numpy_iterator():
    print(x.shape, L)
    break

(16, 200) 200


In [60]:
inputs = tf.keras.Input(shape=(L,))

# make embedding and indicate that 0 should be treated as padding mask
e = tf.keras.layers.Embedding(input_dim=config.vocab_size, 
                                    output_dim=config.embedding_dim,
                                    mask_zero=True)(inputs)

counts = tf.keras.layers.GlobalAveragePooling1D()(e)


# RNN layer
x = tf.keras.layers.GRU(config.rnn_units)(e)
x = tf.keras.layers.Concatenate()([x, counts])
x = tf.keras.layers.LayerNormalization()(x)
# a dense hidden layer
x = tf.keras.layers.Dense(
    config.hidden_dim, 
    activation='relu', 
    kernel_regularizer=tf.keras.regularizers.l2(config.reg_strength))(x)
x = tf.keras.layers.LayerNormalization()(x)
x = tf.keras.layers.Dense(
    config.hidden_dim // 4, 
    activation='relu', 
    kernel_regularizer=tf.keras.regularizers.l2(config.reg_strength))(x)
x = tf.keras.layers.LayerNormalization()(x)
# predicting prob, so no activation
yhat = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(inputs=inputs, outputs=yhat, name='sol-rnn')

decay_steps = 1000
lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay(
    config.lr, decay_steps)

model.compile(tf.optimizers.Adam(lr_decayed_fn),
          loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
          metrics=[tf.keras.metrics.AUC(from_logits=True), tf.keras.metrics.BinaryAccuracy(threshold=0)])


In [61]:
result = model.fit(train_data, validation_data=val_data, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


## Hyperparameter search

In [65]:
def build_model(hp):
    model = tf.keras.Sequential()

    # make embedding and indicate that 0 should be treated as padding mask
    model.add(tf.keras.layers.Embedding(input_dim=config.vocab_size, 
                                        output_dim= hp.Choice('embedding_dim', [8, 16, 64]),
                                        mask_zero=True))

    # RNN layer
    model.add(tf.keras.layers.GRU( hp.Choice('rnn_units', [32, 64, 128])))
    # a dense hidden layer
    hd = hp.Choice('hidden_dim', [32, 64, 128])
    activation = hp.Choice('activation', ['relu', 'tanh', 'softplus'])
    model.add(tf.keras.layers.Dense(hd, activation=activation))
    model.add(tf.keras.layers.Dense(hd // 4, activation=activation))
    # predicting prob, so no activation
    model.add(tf.keras.layers.Dense(1))
    
    model.compile(tf.optimizers.Adam(hp.Choice('lr', [0.1, 1e-2, 1e-4])), 
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.AUC(from_logits=True), tf.keras.metrics.BinaryAccuracy(threshold=0)])

    return model

tuner = kt.Hyperband(build_model,
                     objective=kt.Objective("val_auc", direction="max"),
                     max_epochs=25,
                     factor=3,
                     directory='/var/tmp/tuning',
                     project_name='intro_to_kt')


In [10]:
#stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

#tuner.search(train_data, epochs=50, validation_data=val_data, callbacks=[stop_early])
#best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 13 Complete [00h 01m 46s]
val_auc: 0.5

Best val_auc So Far: 0.6387796998023987
Total elapsed time: 00h 14m 40s
INFO:tensorflow:Oracle triggered exit


In [13]:
best_hps.get('lr'), best_hps.get('rnn_units'), best_hps.get('embedding_dim'), best_hps.get('activation'), best_hps.get('hidden_dim')

(0.01, 32, 8, 'tanh', 32)