# Pulse sequence reward prediction with supervised learning
_Written by Will Kaufman_

In [None]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
# from rl_pulse.environments import spin_sys_discrete
from matplotlib import pyplot as plt

In [None]:
data_path = '../data/2020-08/2020-08-21-143037/'

In [None]:
npz_files = np.load(os.path.join(data_path, 'data.npz'))

In [None]:
actions = npz_files['actions']
rewards = npz_files['rewards']

In [None]:
# TODO figure out why I can't load the model...
model = keras.models.load_model(os.path.join(data_path, 'my_model'))

In [None]:
index = int(np.floor(rewards.argmax() / 24))

In [None]:
a_sample = tf.convert_to_tensor(actions[index, ...], dtype=tf.float32)
r_sample = tf.convert_to_tensor(rewards[index, ...], dtype=tf.float32)

In [None]:
r_pred = tf.squeeze(model(tf.expand_dims(a_sample, axis=0)))

In [None]:
residuals = r_sample - tf.squeeze(r_pred)

In [None]:
with np.printoptions(precision=2, suppress=True):
    print(a_sample)
    print(r_sample)
    print(r_pred)
    print(residuals)

In [None]:
model.input_shape

In [None]:
action0 = np.zeros((24,5))
for i, a in enumerate([4, 1, 2, 4, 3, 0] * 4):
    action0[i,a] = 1

In [None]:
action1 = np.zeros((24,5))
for i, a in enumerate([4] * 24):
    action1[i,a] = 1

In [None]:
action2 = np.zeros((24,5))
for i, a in enumerate([1, 4, 0, 2, 4, 3, 2, 4, 3, 1, 4, 0] * 2):  # 4, 0, 2, 4, 3, 1
    action2[i,a] = 1

In [None]:
action2

In [None]:
reward0 = tf.squeeze(model(tf.expand_dims(action0, axis=0)))
reward1 = tf.squeeze(model(tf.expand_dims(action1, axis=0)))
reward2 = tf.squeeze(model(tf.expand_dims(action2, axis=0)))

In [None]:
plt.plot(reward0)
plt.plot(reward1)
plt.plot(reward2)
plt.legend(['WHH-4', 'delay', '???'])

## Working backwards: optimizing reward by gradient ascent

In [None]:
model(tf.random.uniform(shape=(1,24,5)))

Looking at [this](https://www.tensorflow.org/tutorials/generative/deepdream#calculate_loss)

In [None]:
def calc_loss(actions, model):
    # assuming actions has batch dimension
    rewards = model(actions)
    loss_reward = -tf.reduce_sum(rewards[:,-1,:])
    
    # regularization to push values towards 0 or 1
    reg = 1e-2 * tf.reduce_sum(actions * (1 - actions))
    
    # make actions look action-like by requiring sum of row to be close to 1
    action_reg = 1e-2 * tf.reduce_sum(tf.abs(tf.reduce_sum(actions, 2) - 1))
    
    return loss_reward + reg + action_reg

In [None]:
# action_random = tf.random.uniform(shape=(1,24,5))
action_random = tf.zeros(shape=(1,24,5)) + .5

In [None]:
for _ in range(50):
    with tf.GradientTape() as tape:
        tape.watch(action_random)
        loss = calc_loss(action_random, model)

    gradients = tape.gradient(loss, action_random)
    # print(gradients)
    action_random = action_random - gradients * 1e-1
    action_random = tf.clip_by_value(action_random, 0, 1)
print(loss)

In [None]:
a = tf.convert_to_tensor(np.array([[[0,1,0,0,0]]], dtype=np.float32))
b = action_random[:,1:,:]
action_random = tf.concat([a, b], 1)

In [None]:
with np.printoptions(precision=3, suppress=True):
    print(action_random)

In [None]:
model(action_random)

In [None]:
lstm = model.get_layers()[0]  # or something like that, I forget...

In [None]:
lstm1 = tf.keras.layers.LSTM(64, return_sequences=True, input_shape=(None, 5))

In [None]:
lstm1.set_weights(lstm.get_weights())