In [None]:
# Import libraries
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

egrid = np.load('egrid.npy')
xgrid = np.load('xgrid.npy')
V = np.load('V.npy')
C = np.load('C.npy')

batch_size = 15000

optimizer = tf.keras.optimizers.Adam()

# Model parameters
σ_ε = 0.02058
λ_ε = 0.99
T = 9
γ = 2.
β = .97

# Prices
r = 0.07
w = 5


# function that creates a network
def net():
    from tensorflow.keras import Sequential
    from tensorflow.keras.layers import Dense
    model = Sequential([Dense(32, 'relu', input_shape=(2,)),
                        Dense(32, 'relu'),
                        Dense(1, 'sigmoid')])
    return model


n = [net() for _ in range(T)]
Θ = [n[t].weights for t in range(T)]
Θ = [item for sublist in Θ for item in sublist]

μx = 2.
σx = 1.

μe = 0.
σe = .14


def normalize(x, e):
    x = (x - μx) / σx
    e = (e - μe) / σe

    X = tf.concat([x, e], 1)
    return X


# %% Economic Model ------------------------------------------------------
def u(C, γ):
    C_pos = tf.maximum(C, 1e-3)
    return C_pos**(1 - γ) / (1 - γ)


def simulate():
    x0 = tf.random.uniform([batch_size, 1], minval=.01, maxval=4)
    e0 = tf.random.uniform([batch_size, 1], minval=-.25, maxval=.25)

    # Normal shock at t=0
    def ε():
        return tf.random.normal(shape=[batch_size, 1])

    # Organize variables and shocks in dictionaries
    x = {0: x0}
    e = {0: e0}
    s = {}
    y = {}
    C = {}

    for t in range(T):
        # The inputs of the network are wealth and productivity at t
        X = normalize(x[t], e[t])
        s[t] = n[t](X)

        # Disposable income
        y[t] = (1 + r) * x[t] + tf.exp(e[t]) * w

        # Consumption
        C[t] = (1 - s[t]) * y[t]

        # Next states
        e[t + 1] = λ_ε * e[t] + σ_ε * ε()
        x[t + 1] = s[t] * y[t]

    # Terminal values
    C[T] = (1 + r) * x[T] + tf.exp(e[T]) * w

    V = sum([β**t * u(C[t], γ) for t in range(T + 1)])
    return V, [x, e]


@tf.function
def training_step():
    with tf.GradientTape(persistent=True) as tape:
        V, [x, e] = simulate()
        objective = -tf.reduce_mean(V)

    # optimization
    grads = tape.gradient(objective, Θ)
    optimizer.apply_gradients(zip(grads, Θ))

    return -objective, [x, e]


def plot(results):

    def plot_one(t, color):
        xt = results[0][t]
        et = tf.zeros_like(xt)

        X = normalize(xt, et)
        s = n[t](X)

        # Disposable income
        y = (1 + r) * xt + tf.exp(et) * w

        # Consumption
        C = (1 - s) * y

        plt.scatter(xt, C, s=1, color=color)

    plot_one(t=0, color='b')
    plot_one(t=3, color='r')
    plot_one(t=6, color='k')

    plt.plot(xgrid, C[0, :, 10])
    plt.plot(xgrid, C[3, :, 10])
    plt.plot(xgrid, C[6, :, 10])

    plt.show()
    plt.pause(1e-9)


for iteration in range(1000000):
    EV, results = training_step()
    if iteration % 1500 == 0:
        print(EV)
        plot(results)

# %% Plot function ----------------------------------------------------
# I will plot the policies every 1000th iteration to monitor progress

x, e = results
μx = {t: x[t].numpy().mean() for t in range(T)}
σx = {t: x[t].numpy().std() for t in range(T)}

μe = {t: e[t].numpy().mean() for t in range(T)}
σe = {t: e[t].numpy().std() for t in range(T)}


t = 0
