In [None]:
import torch
import torch.nn as nn
import time
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd

class FNN(nn.Module):
    '''Full-Connected Neural Network with Batch Normalization'''
    def __init__(self, input_dim: int, num_hiddens, output_dim: int, activate_fun, device=torch.device("cpu")):
        super(FNN, self).__init__()
        self.num_hiddens = num_hiddens
        self.activate_fun = activate_fun
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.dense_layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList() 
        
        self.dense_layers.append(nn.Linear(input_dim, num_hiddens[0]))
        self.batch_norms.append(nn.BatchNorm1d(num_hiddens[0]))

        for i in range(len(num_hiddens) - 1):
            self.dense_layers.append(nn.Linear(num_hiddens[i], num_hiddens[i+1]))
            self.batch_norms.append(nn.BatchNorm1d(num_hiddens[i+1]))

        self.dense_layers.append(nn.Linear(num_hiddens[-1], output_dim))

        self.to(device)

    def forward(self, x):
        for i in range(len(self.dense_layers) - 1): 
            x = self.dense_layers[i](x)
            x = self.batch_norms[i](x)
            x = self.activate_fun(x)
        
        x = self.dense_layers[-1](x)
        return x
    
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = 'cpu'
print(device)

In [2]:

def sample_path(n_sample, d, n_time, h, lam, delta, device='cpu'):
    sqrth = torch.sqrt(torch.tensor(h))
    
    dW_sample = torch.zeros(n_sample, d, n_time)
    Jumps_sample = torch.zeros(n_sample, d, n_time)
    X_sample = torch.ones(n_sample, d, n_time + 1)
    
    for i in range(n_time):
        dW_sample[:, :, i] = torch.normal(mean=0.0, std=1.0, size=(n_sample, d)) * sqrth
        P_sample = torch.poisson(torch.full((n_sample * d,), lam * h))
        Jumps_sample[:, :, i] = torch.tensor(
            [torch.sum(torch.empty(int(k)).uniform_(-delta, delta)) for k in P_sample]
        ).reshape(n_sample, d)
        
        X_sample[:, :, i+1] = X_sample[:, :, i] \
            + torch.cos(X_sample[:, :, i]) * h \
            + dW_sample[:, :, i] \
            + Jumps_sample[:, :, i]
    
    dW_sample = dW_sample.to(device)
    Jumps_sample = Jumps_sample.to(device)
    X_sample = X_sample.to(device)
    
    return dW_sample, X_sample, Jumps_sample


def f(t, X, Y, Z,U, d, delta):   
    term0 = torch.exp(t) * torch.mean(torch.sin(X), dim=1, keepdim=True)
    term1 = 0.5 * term0
    term2 = torch.sum(Z*torch.cos(X), dim=1, keepdim=True)

    u1 = torch.mean(torch.cos(X-delta)-torch.cos(X+delta),dim=1,keepdim=True)
    U_term = (2*delta)**2 * torch.exp(t) *  u1  - (2*delta)**2 * term0 * (2*delta)**d
    result = -(Y - term1 + term2 + U_term)
    return result

def g(t, X):
    output = torch.exp(t) * torch.mean(torch.sin(X),dim=1,keepdim=True)
    return output

def test_loss(dW_sample, X_sample, time_grid, delta, h, device='cpu'):
    _, d, n_time = dW_sample.shape
    Y = g(time_grid[0], X_sample[:, :, 0])
    loss_his = []
    for i in range(n_time):
        Z = torch.exp(time_grid[i]) * torch.cos(X_sample[:, :, i])/d
        Y = Y - f(time_grid[i], X_sample[:, :, i], Y, Z, torch.zeros_like(Y), d, delta) * h
        Y = Y + torch.sum(Z * dW_sample[:, :, i], dim=1, keepdim=True)
        loss_his.append(torch.mean((Y - g(time_grid[i+1], X_sample[:, :, i+1]))**2).item())
    plt.plot(loss_his)
    plt.yscale('log')
    plt.show()

    return loss_his[-1]

In [None]:
seed = 2024
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

T = 1
d = 100 
n_time = 10

delta = 0.1    # jump size
lam = (2*delta)**2    # jump activity

h = T/n_time
n_sample = 64
time_grid = torch.linspace(0, T, n_time + 1,device=device)

# Generate the sample paths
dW_sample, X_sample, Jumps_sample = sample_path(n_sample, d, n_time, h, lam, delta, 
                                                device=device)
Y_sample = torch.zeros(n_sample, 1, n_time+1)
for i in range(n_time+1):
    Y_sample[:, :, i] = g(time_grid[i], X_sample[:, :, i])

plt.figure(figsize=(12, 6))
for i in range(n_sample):
    plt.plot(time_grid.cpu(), Y_sample[i, 0, :])

plt.xlabel('t')
plt.ylabel('u')
plt.grid(True)
plt.show()

print('y0',Y_sample[:,:,0].mean().item())
print('test_loss',test_loss(dW_sample, X_sample,time_grid, delta, h, device))

In [None]:
modelList_Z = [FNN(input_dim=d, 
                   num_hiddens=[d+10,d+10,d+10], 
                   output_dim = 1, 
                   activate_fun=nn.ReLU(),
                   device=device) 
               for i in range(n_time-1)]
modelList_U = [FNN(input_dim=d, 
                   num_hiddens=[d+10,d+10], 
                   output_dim = 1, 
                   activate_fun=nn.ReLU(),
                   device=device) 
               for i in range(n_time-1)]

y_init = torch.nn.Parameter(torch.empty(1, 1, device=device).uniform_(0.82,0.83))
z_init = torch.nn.Parameter(torch.empty(1, d, device=device).uniform_(-0.1, 0.1))
u_init = torch.nn.Parameter(torch.empty(1, 1, device=device).uniform_(-0.2, 0.2))


def loss_fun(dW_sample, X_sample, Jumps_sample):
    Y = torch.ones((n_sample,1), device=device) @ y_init
    Z = torch.ones((n_sample,1), device=device) @ z_init
    U = torch.ones((n_sample,1), device=device) @ u_init

    for i in range(n_time):
        Y = Y - f(time_grid[i], X_sample[:, :, i], Y, Z,U, d, delta) * h
        Y = Y + torch.sum(Z * dW_sample[:, :, i], dim=1, keepdim=True) + U

        if i < n_time - 1:
            Z = modelList_Z[i](X_sample[:, :, i + 1])
            U = modelList_U[i](Z * Jumps_sample[:, :, i])
    
    term_delta = Y - g(time_grid[i+1], X_sample[:,:,i+1])
    clamp_term_delta = torch.clamp(term_delta, -50, 50) 
    loss = torch.mean(clamp_term_delta**2)
    
    return loss

params = [y_init, z_init, u_init]

for model in modelList_Z:
    params += list(model.parameters())

for model in modelList_U:
    params += list(model.parameters())

optimizer = torch.optim.Adam(params, lr=0.0005)


# Define the learning rate schedule function
def lr_lambda(step):
    if step < 4000:
        return 0.0005 / 0.0005  # Keeps the learning rate at 0.0005
    elif 4000 <= step < 5000:
        return 0.0001 / 0.0005  # Adjust to 0.0001
    elif 5000 <= step < 6500:
        return 0.00005 / 0.0005  # Adjust to 0.00005
    else:
        return 0.00001 / 0.0005  # Adjust to 0.00005

# Create the LambdaLR scheduler
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)


start_time = time.time()
train_his = []

for i in range(7000):
    dW_sample, X_sample, Jumps_sample = sample_path(n_sample, d, n_time, h, lam, delta,
                                                    device=device)
    loss = loss_fun(dW_sample, X_sample, Jumps_sample)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()

    if (i+1) % 100 == 0 or i == 0:
        print(f'iter:{i+1}, loss:{loss.item():.4e}, time:{time.time()-start_time:.2f}, y0:{torch.mean(y_init).item():.4f},lr:{optimizer.param_groups[0]["lr"]:.4f}')
        train_his.append({'iter':i+1, 'loss': loss.item(), 'y0': torch.mean(y_init).item(), 'time': time.time()-start_time, 'lr': optimizer.param_groups[0]['lr']})
    

In [None]:
plt.plot([i['iter'] for i in train_his], [i['loss'] for i in train_his])
plt.yscale('log')
plt.show()

In [None]:
plt.plot([i['iter'] for i in train_his], [i['y0'] for i in train_his])
plt.show()

In [7]:
df = pd.DataFrame(train_his)
df.to_csv('d100-sum_train_his.csv', index=False)