In [40]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, Reduction

import ethicml

In [2]:
class AlwaysDropout(tf.keras.layers.Dropout):
    def call(self, inputs, training=None):
        # always set training to 'True'
        return super().call(inputs, training=True)

In [86]:
keep_prob = 0.9
length_scale = 1.0
precision = 100.0
batch_size = 32
hidden_units = 100

In [87]:
weight_decay = (keep_prob * length_scale**2) / (2 * batch_size * precision)
NN_drop = tf.keras.Sequential([
    AlwaysDropout(1 - keep_prob),
    Dense(hidden_units, activation="tanh", kernel_regularizer=l2(weight_decay),
          use_bias=True, bias_regularizer=l2(weight_decay)),
    AlwaysDropout(1 - keep_prob),
    Dense(1, activation=None, kernel_regularizer=l2(weight_decay),
          use_bias=False)
])

In [88]:
NN = tf.keras.Sequential([
    Dropout(1 - keep_prob),
    Dense(hidden_units, activation="sigmoid", kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
          use_bias=True, bias_regularizer=tf.keras.regularizers.l2(weight_decay)),
    Dropout(1 - keep_prob),
    Dense(1, activation=None, kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
          use_bias=False)
])

In [53]:
x, s, y = ethicml.data.load.load_data(ethicml.data.Adult())
x, s, y = x.to_numpy(), s.to_numpy(), y.to_numpy().astype(np.float32)

In [54]:
from scipy import stats
x = stats.zscore(x, axis=0)

In [55]:
dataset = tf.data.Dataset.from_tensor_slices((x, y)).cache().shuffle(
    len(x), seed=888, reshuffle_each_iteration=False)
test_ds = dataset.take(len(x) // 5).batch(len(x) // 5)
train_ds = dataset.skip(len(x) // 5).shuffle(len(x) // 5, seed=888).batch(batch_size, drop_remainder=True)

In [95]:
learning_rate = 0.005
epochs = 10

In [96]:
for model in [NN]:  # [NN_drop, NN]:
    model.compile(
        optimizer=Adam(lr=learning_rate),
        loss=BinaryCrossentropy(from_logits=True, reduction=Reduction.SUM_OVER_BATCH_SIZE),
        metrics=['accuracy'],
    )
    model.fit(train_ds, epochs=epochs, validation_data=test_ds, validation_freq=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
