In [None]:
import os
import sys
import logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
logging.getLogger('tensorflow').setLevel(logging.DEBUG)

try:
    from google.colab import drive
    drive.mount('/content/drive')
    !pip install -q ruamel.yaml
    !pip install -q tensorboard-plugin-profile
    project_path = '/content/drive/MyDrive/Colab Projects/QuantumFlow'
except:
    project_path = os.path.expanduser('~/QuantumFlow')

In [None]:
os.chdir(project_path)
sys.path.append(project_path)

import tensorflow as tf
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import quantumflow
import quantumflow.crazynet

In [None]:
n_dim = 2
batch_size = 1
n_inputs = 10

x = np.zeros(shape=(batch_size, n_dim), dtype=np.float32)
x_inputs = np.random.randn(batch_size, n_inputs, n_dim)
inputs = np.random.randn(batch_size, n_inputs, 1)**2

### metric_scaled_dot_product_attention

In [None]:
size_q = 2
size_k = n_inputs
depth = 5
depth_v = 7

alpha = 0.0*np.ones(shape=(batch_size, size_q), dtype=np.float32)/10 # (..., size_q)
beta = 1.0*np.ones(shape=(batch_size, size_q), dtype=np.float32) # (..., size_q)
xdiff = np.float32(quantumflow.crazynet.get_xdiff(x_inputs[:, :size_q], x_inputs).numpy())
print(alpha.shape, beta.shape, xdiff.shape)

In [None]:
q = np.zeros(shape=(batch_size, size_q, depth), dtype=np.float32) # (..., size_q, depth)
k = np.zeros(shape=(batch_size, size_k, depth), dtype=np.float32) # (..., size_k, depth)
v = np.zeros(shape=(batch_size, size_k, depth_v), dtype=np.float32) # (..., size_v, depth_v)

q[0, 0, 1] = 0
k[0, 0, 1] = 0
v[0, 0, :] = 1

In [None]:
output, attention_weights = quantumflow.crazynet.metric_scaled_dot_product_attention(q, k, v, alpha, beta, xdiff)

In [None]:
print('attention_weights\n', attention_weights.numpy(), attention_weights.shape)
print('output\n', output.numpy(), output.shape)

### MetricMultiHeadAttention

In [None]:
d_model = 128
num_heads = 8

In [None]:
layer = quantumflow.crazynet.MetricMultiHeadAttention(d_model, num_heads)

In [None]:
output, attention_weights = layer(v, k, q, xdiff)
print(output.shape)
print(attention_weights.shape)

### CrazyNet

In [None]:
num_outputs = 2
num_layers = 1
dropout_rate = 0.0
dff = 512
dff_final = [512, 64]
scale = 5.0

print(x, x.shape) # (batch_size, n_dim)
print(x_inputs, x_inputs.shape) # (batch_size, n_inputs, n_dim)
print(inputs, inputs.shape) # (batch_size, n_inputs)

In [None]:
crazynet = quantumflow.crazynet.CrazyNet(num_outputs, num_layers, d_model, num_heads, dff, dff_final, dropout_rate, scale)

In [None]:
crazynet(x, x_inputs, inputs)

## 1d test

In [None]:
x_inputs = np.linspace(0, 1, 101)[np.newaxis, :, np.newaxis]
xdiff = np.float32(quantumflow.crazynet.get_xdiff(x_inputs, x_inputs).numpy())

d_model = 16
num_heads = 1

net = np.abs(np.random.randn(batch_size, 101, d_model)/3)

In [None]:
layer = quantumflow.crazynet.MetricMultiHeadAttention(d_model, num_heads)

In [None]:
output, attention_weights = layer(net, net, net, xdiff)
print(output.shape)
print(attention_weights.shape)

In [None]:
plt.figure(figsize=(5, 5))
plt.imshow(attention_weights[0, 0])
plt.show()

In [None]:
plt.figure(figsize=(20, 3))
plt.plot(x_inputs[0], output[0, :])
plt.show()

In [None]:
plt.figure(figsize=(20, 3))
plt.plot(x_inputs[0], attention_weights[0, :, :, 1].numpy().transpose())
plt.show()

# Test Training

In [None]:
def function_x(x, y):
    return x/50.0

def function_y(x, y):
    return y/50.0

def function_1(x, y):
    return 5*np.exp(-(x - 40)**2/200 - (y - 50)**2/50) + 3*np.exp(-(x - 62)**2/193 - (y - 62)**2/342) - 4*np.exp(-(x - 50)**2/393 - (y - 50)**2/342)

def function_c(x, y):
    return np.exp(-(x - 50)**2/200 - (y - 50)**2/200)

def function_d(x, y):
    return np.exp(-(x - 50)**2/100 - (y - 40)**2/100) + np.exp(-(x - 50)**2/100 - (y - 60)**2/100)

In [None]:
num_outputs = 1
num_layers = 8
d_model = 128
dff = 512
dff_final = [512, 64]
num_heads = 8
dropout_rate = 0.1
scale = 50.0

batch_size = 256
steps = 100000

input_fn = function_d
target_fn = function_c

def gen_x(target_fn, batch_size):
    x = np.random.uniform(80, size=(batch_size, 2)).astype(np.float32)+10.0
    targets = target_fn(x[:, 0], x[:, 1])[:, np.newaxis].astype(np.float32)
    return x, targets

def gen_inputs(input_fn, batch_size):
    #x_inputs = np.random.randn(batch_size, 100, 2).astype(np.float32)*15.0+50.0    
    #inputs = input_fn(x_inputs[:, :, 0], x_inputs[:, :, 1])[:, :, np.newaxis]
    x_inputs = np.stack([np.repeat(np.arange(20, 81, 10), 7), np.tile(np.arange(20, 81, 10), 7)], axis=-1) #np.stack([np.repeat(np.arange(25, 76, 5), 11), np.tile(np.arange(25, 76, 5), 11)], axis=-1)
    inputs = input_fn(x_inputs[:, 0], x_inputs[:, 1]).astype(np.float32)
    x_inputs = np.float32(np.repeat(x_inputs[np.newaxis, :, :], batch_size, axis=0))
    inputs = np.float32(np.repeat(inputs[np.newaxis, :, np.newaxis], batch_size, axis=0))
    return x_inputs, inputs


@tf.function
def validation_call(x_fine, x_inputs_fine, inputs_fine):
    return model(x_fine, x_inputs_fine, inputs_fine, training=False)

def validation_fn():
    zz_output = []
    err = []
    
    for y in range(100):
        x_fine = np.stack([np.arange(100), np.tile(y, 100)], axis=-1).astype(np.float32)
        targets_fine = target_fn(x_fine[:, 0], x_fine[:, 1])[:, np.newaxis].astype(np.float32)

        x_inputs_fine, inputs_fine = gen_inputs(input_fn, x_fine.shape[0])

        outputs_fine = validation_call(x_fine, x_inputs_fine, inputs_fine)
        
        err_fine = targets_fine - outputs_fine
        zz_output.append(outputs_fine)
        err.append(err_fine)

    zz_output = np.stack(zz_output, axis=0)
    err = np.stack(err, axis=0)
    
    return zz_output, err

model = quantumflow.crazynet.CrazyNet(num_outputs, num_layers, d_model, num_heads, dff, dff_final, dropout_rate, scale)

def loss_fn(outputs, targets):
    return tf.reduce_mean(tf.square(outputs - targets))

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

losses = []
validation = []

In [None]:
xx, yy = np.meshgrid(np.linspace(0, 100, 100), np.linspace(0, 100, 100))
zz = input_fn(xx, yy)
x_inputs, inputs = gen_inputs(input_fn, 1)

fig = plt.figure(figsize=(10, 10))
ax = plt.axes(projection='3d')
surf = ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm, linewidth=0, antialiased=True)
ax.scatter(x_inputs[0, :, 0], x_inputs[0, :, 1], inputs[0, :], c='k')
plt.show()

zz = target_fn(xx, yy)
x, targets = gen_x(target_fn, batch_size)

fig = plt.figure(figsize=(10, 10))
ax = plt.axes(projection='3d')
surf = ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm, linewidth=0, antialiased=True)
ax.scatter(x[:, 0], x[:, 1], targets, c='k')
plt.show()

In [None]:
@tf.function
def step_fn(x, targets):
    with tf.GradientTape() as tape:

        outputs = model(x, x_inputs, inputs, training=True) 

        loss_value = loss_fn(outputs, targets)

    grads = tape.gradient(loss_value, model.trainable_weights)

    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss_value


print(f"{'step':>10} {'loss':>20} {'mean(abs(err))':>20} {'max(abs(err))':>20}")

for step in range(steps):
    x, targets = gen_x(target_fn, batch_size)
    x_inputs, inputs = gen_inputs(input_fn, batch_size)
    
    loss_value = step_fn(x, targets)
    
    losses.append(loss_value.numpy())
    if step % 100 == 0:
        zz_output, err = validation_fn()
        print(f"{step:10d} {loss_value:20f} {np.mean(np.abs(err)):20f} {np.max(np.abs(err)):20f}")
        validation.append(zz_output)
    
    if len(losses) > 25000:
        optimizer.learning_rate.assign(optimizer.learning_rate*0.9999)
        

In [None]:
plt.figure(figsize=(20, 3))
plt.plot(losses)
plt.yscale('log')
plt.show()

In [None]:
for z in range(zz_output.shape[-1]):
    plt.figure(figsize=(20, 3))
    plt.plot([np.max(np.abs(zz-zz_output[:, :, z])) for zz_output in validation])
    plt.show()

In [None]:
for z in range(zz_output.shape[-1]):
    fig = plt.figure(figsize=(10, 10))
    ax = plt.axes(projection='3d')
    surf = ax.plot_surface(xx, yy, zz_output[:, :, z], rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm, linewidth=0, antialiased=False)
    plt.show()

In [None]:
for z in range(zz_output.shape[-1]):
    fig = plt.figure(figsize=(10, 10))
    ax = plt.axes(projection='3d')
    surf = ax.plot_surface(xx, yy, zz_output[:, :, z]-zz, rstride=1, cstride=1, cmap=matplotlib.cm.coolwarm, linewidth=0, antialiased=False)
    ax.set_zlim(np.min(zz),np.max(zz))
    plt.show()

# Multi-Batch dim

In [None]:
n_dim = 2
batch_size = 1
inner_batch_size = 4
n_inputs = 10

x = np.zeros(shape=(batch_size, inner_batch_size, n_dim), dtype=np.float32)
x_inputs = np.random.randn(batch_size, inner_batch_size, n_inputs, n_dim)
inputs = np.random.randn(batch_size, inner_batch_size, n_inputs, 1)**2

print(x.shape, x_inputs.shape, inputs.shape)

size_q = 2
size_k = n_inputs
depth = 5
depth_v = 7

alpha = 0.0*np.ones(shape=(batch_size, inner_batch_size, size_q), dtype=np.float32)/10 # (..., size_q)
beta = 1.0*np.ones(shape=(batch_size, inner_batch_size, size_q), dtype=np.float32) # (..., size_q)
xdiff = np.float32(quantumflow.crazynet.get_xdiff(x_inputs[:, :, :size_q], x_inputs).numpy())
print(alpha.shape, beta.shape, xdiff.shape)

q = np.zeros(shape=(batch_size, inner_batch_size, size_q, depth), dtype=np.float32) # (..., size_q, depth)
k = np.zeros(shape=(batch_size, inner_batch_size, size_k, depth), dtype=np.float32) # (..., size_k, depth)
v = np.zeros(shape=(batch_size, inner_batch_size, size_k, depth_v), dtype=np.float32) # (..., size_v, depth_v)

q[0, :, 0, 1] = 0
k[0, :, 0, 1] = 0
v[0, :, 0, :] = 1

print(q.shape, k.shape, v.shape)

In [None]:
output, attention_weights = quantumflow.crazynet.metric_scaled_dot_product_attention(q, k, v, alpha, beta, xdiff)

In [None]:
d_model = 128
num_heads = 8

net = np.zeros(shape=(batch_size, inner_batch_size, n_inputs, d_model), dtype=np.float32) # (..., size_v, depth_v)
xdiff = np.float32(quantumflow.crazynet.get_xdiff(x_inputs, x_inputs).numpy())

layer = quantumflow.crazynet.MetricMultiHeadAttention(d_model, num_heads)

In [None]:
output, attention_weights = layer(net, net, net, xdiff)
print(output.shape)
print(attention_weights.shape)

In [None]:
num_outputs = 2
num_layers = 1
dropout_rate = 0.0
dff = 512
dff_final = [512, 64]
scale = 5.0

print(x.shape) # (..., n_dim)
print(x_inputs.shape) # (..., n_inputs, n_dim)
print(inputs.shape) # (..., n_inputs, 1)

In [None]:
crazynet = quantumflow.crazynet.CrazyNet(num_outputs, num_layers, d_model, num_heads, dff, dff_final, dropout_rate, scale)

In [None]:
crazynet(x, x_inputs, inputs)