In [None]:
import tensorflow as tf

Spot = 36   # stock price
σ = 0.2     # stock volatility
K = 40      # strike price
r = 0.06    # risk free rate
n = 20000  # Number of simualted paths
m = 50      # number of exercise dates
T = 1       # maturity
order = 6   # Polynmial order
Δt = T / m  # interval between two exercise dates


def create_polynomials():
    model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
    #model = tf.keras.Sequential([tf.keras.layers.Dense(1,input_dim=2)])
    return model


networks = {t: create_polynomials() for t in range(1, m)}


# simulates the stock price evolution
def advance(S, r, σ, Δt, n):
    dB = tf.sqrt(Δt) * tf.random.normal(shape=tf.shape(S))
    out = S + r * S * Δt + σ * S * dB
    return out


optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)
#optimizer = tf.keras.optimizers.RMSprop(0.01)
#optimizer = tf.keras.optimizers.Adam(0.01)


# LSMC algorithm
#@tf.function
def train_step(order, Spot, σ, K, r):
    tf.random.set_seed(0)
    S0 = Spot * tf.ones((n,1))
    S = {0: S0}

    for t in range(m):
        S[t + 1] = advance(S[t], r, σ, Δt, n)

    discount = tf.exp(-r * Δt)
    CFL = {t: tf.maximum(0., K - S[t]) for t in range(m + 1)}
    value_tp1 = CFL[m] * discount
    CV = {m: tf.zeros_like(S[0])}

    for t in range(m - 1, 0, -1):

        with tf.GradientTape() as tape:
            features = S[t]
            target = value_tp1
            CV[t] = networks[t](features) # the continuation value predicted by the network
            mse_loss = 0.5 * tf.reduce_mean((CV[t] - target)**2)
            #mse_loss = tf.reduce_sum(tf.keras.losses.mean_squared_error(target,CV[t]))
        Θ = networks[t].weights
        grads = tape.gradient(mse_loss, Θ)
        optimizer.apply_gradients(zip(grads, Θ))

        CV[t] = networks[t](features)
        exercise = tf.math.greater(CFL[t],CV[t])
        value_t = tf.where(exercise, CFL[t], value_tp1)
        #value_t = tf.where(CFL[t] > CV[t], CFL[t], value_tp1)
        value_tp1 = discount * value_t
    
    POF = {t: tf.where(CV[t] < CFL[t], CFL[t], 0) for t in range(1, m + 1)}
    POF[m] = tf.constant(POF[m],shape=(n,1))
    POF[m-1] = tf.constant(POF[m-1],shape=(n,1))
    #print(POF.values())  

    POF = tf.stack(list(POF.values()),axis=0)

    POF = tf.constant(POF, shape=(m,n))
    idx_payoffs = tf.math.argmax(POF > 0, axis=0)
    FPOF = tf.transpose(tf.one_hot(idx_payoffs, m)) * POF
    m_range = tf.constant(range(0, m), shape=(m,1),dtype=tf.float32)
    dFPOF = FPOF * tf.math.exp(-r * m_range * Δt)
    dFPOF=tf.reduce_sum(dFPOF)
    PRICE = dFPOF / n
    return(PRICE)

    #PRICE = tf.reduce_sum(CV[1])/n

    return PRICE

#print(train_step(order, Spot, σ, K, r))

#for iteration in range(1000000):
for iteration in range(1000):
    PRICE = train_step(order, Spot, σ, K, r)
    if iteration % 100 == 0:
        print(PRICE)

tf.Tensor(3.964233, shape=(), dtype=float32)
tf.Tensor(4.27945, shape=(), dtype=float32)
tf.Tensor(4.2792115, shape=(), dtype=float32)
tf.Tensor(4.279047, shape=(), dtype=float32)
tf.Tensor(4.279131, shape=(), dtype=float32)
tf.Tensor(4.279191, shape=(), dtype=float32)
tf.Tensor(4.2793336, shape=(), dtype=float32)
tf.Tensor(4.2795577, shape=(), dtype=float32)
tf.Tensor(4.27978, shape=(), dtype=float32)
tf.Tensor(4.279594, shape=(), dtype=float32)
