<a href="https://colab.research.google.com/github/wqiu96/summer_project/blob/master/DeepBSDE_pytorch/solver_pytorch_v03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/wqiu96/summer_project.git

Cloning into 'summer_project'...
remote: Enumerating objects: 157, done.[K
remote: Counting objects: 100% (157/157), done.[K
remote: Compressing objects: 100% (157/157), done.[K
remote: Total 625 (delta 66), reused 0 (delta 0), pack-reused 468[K
Receiving objects: 100% (625/625), 2.53 MiB | 18.94 MiB/s, done.
Resolving deltas: 100% (314/314), done.


In [2]:
cd summer_project/DeepBSDE_pytorch/

/content/summer_project/DeepBSDE_pytorch


In [0]:
import logging
import time
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.autograd import Variable
from torch.distributions import uniform
import torchvision
from equation_pytorch import get_equation
from config_pytorch import get_config


MOMENTUM = 0.99
EPSILON = 1e-6
DELTA_CLIP = 50.0


class Net(nn.Module):
    def __init__(self,num_hiddens):
      super(Net, self).__init__()
      self.num_hiddens = num_hiddens
      
      self.fc1 = nn.Linear(num_hiddens[0], num_hiddens[1], bias=False)
      self.norm1 = nn.LayerNorm(num_hiddens[1])
      self.fc2 = nn.Linear(num_hiddens[1], num_hiddens[2], bias=False)
      self.norm2 = nn.LayerNorm(num_hiddens[2])
      self.fc3 = nn.Linear(num_hiddens[2], num_hiddens[3], bias=False)
    
    def forward(self, x):
      # h1 = relu(xw1)
      x = self.norm1(F.relu(self.fc1(x)))
      # h2 = relu(h1w2)
      x = self.norm1(F.relu(self.fc2(x)))
      # h3 = h2w3
      x = self.fc3(x)
      #termin time
      return x
      

class DeepNet(nn.Module):
    def __init__(self,num_hiddens,config,bsde):
      super(DeepNet, self).__init__()
      self.num_hiddens = num_hiddens
      self._config = config
      self._bsde = bsde
      self.y_init = 0
      # make sure consistent with FBSDE equation
      self._dim = bsde.dim
      self._num_time_interval = bsde.num_time_interval
      # ops for statistics update of batch normalization
      self.linears = nn.ModuleList([Net(num_hiddens) for i in range(bsde.num_time_interval - 1)])
      m = uniform.Uniform(config.y_init_range[0],config.y_init_range[1])
      self.y_init = torch.nn.Parameter(m.sample())
      #self.register_param()
      
   # def register_param(self):
  #      exist_w = hasattr(self.module, 'y_init')
   #     if not exist_w:
   #       m = uniform.Uniform(config.y_init_range[0],config.y_init_range[1])
   #       y_init = nn.Parameter(m.sample())
   #       self.register_parameter(y_init) # register 'w' to module
    
    def forward(self,x):
      #dw_train= torch.from_numpy(self._bsde.sample()[0])
      dw_train= self._bsde.sample()[0].astype(np.float32)
      time_stamp = np.arange(0, self._bsde.num_time_interval) * self._bsde.delta_t
      z = 2*torch.rand([self._dim,1],dtype=torch.float32) - 1 #same as the original
      y_ = self.y_init
      for t in range(0,bsde.num_time_interval - 1):
        dw = torch.from_numpy(dw_train[:, t]).view(1,self._dim)
        torch.mm(dw.float(), z.float()) # torch.mm have bug use x.float()
        y_ = y_ - self._bsde.delta_t* (self._bsde.f_tf(time_stamp[t], x[:, t], y_, z)) + torch.mm(dw, z)
        z = (self.linears[t](x[:,t]) / self._dim).view(self._dim,1)
      #terminal condition
      dw = torch.from_numpy(dw_train[:, -1]).view(1,self._dim)
      y_ = y_ - self._bsde.delta_t * (self._bsde.f_tf(time_stamp[-1], x[:, -2], y_, z)) + torch.mm(dw, z)
      return y_


Result:
- AllenCahn: sometimes my code can give the similar result as the original code, sometimes the result is nan(because the loss is too small to learn??).
- HJB: same as the original
- PricingOption: same as or better than the original(The original code does not have a break)
- PricingDefaultRisk: same as or better than the original(The original code does not have a break)
- BurgesType: same as or better than the original(The original code does not have a break)
- QuadraticGradients: same as or better than the original(The original code does not have a break)
- eactionDiffusion : same as or better than the original(maybe, because loss is too late)

In [24]:
config = get_config('HJB')
bsde = get_equation('HJB', config.dim, config.total_time, config.num_time_interval)

deepNet = DeepNet(config.num_hiddens,config,bsde)
optimizer = optim.SGD(deepNet.parameters(), lr=0.0000001, momentum=MOMENTUM) # lr have some different wiht the original
#torch.optim.lr_scheduler.MultiStepLR(optimizer, [15,25,35], gamma=0.1, last_epoch=-1) # Adjust learning rate according to time
train_loss = []
for epoch in range(10000):
  x_ = bsde.sample()[1].astype(np.float32)
  x = torch.from_numpy(x_)
  out = deepNet(x)
  delta = out - bsde.g_tf(bsde.total_time, x[:, -1])
  loss = torch.mean(torch.where(torch.abs(delta) < DELTA_CLIP, torch.pow(delta,2),2 * DELTA_CLIP * torch.abs(delta) - DELTA_CLIP ** 2))
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  train_loss.append(loss.item())
  if epoch % 100 == 0 :
    print(epoch, loss.item(), deepNet.y_init,out)
  if 1000*loss <= 0.0001:
    print(epoch, loss.item(), deepNet.y_init,out)
    break
    

0 0.4980943202972412 Parameter containing:
tensor(0.7549, requires_grad=True) tensor([[1.0443]], grad_fn=<AddBackward0>)
100 0.026739289984107018 Parameter containing:
tensor(0.7547, requires_grad=True) tensor([[0.5727]], grad_fn=<AddBackward0>)
200 0.37423697113990784 Parameter containing:
tensor(0.7547, requires_grad=True) tensor([[1.5609]], grad_fn=<AddBackward0>)
300 1.0942531824111938 Parameter containing:
tensor(0.7549, requires_grad=True) tensor([[1.8347]], grad_fn=<AddBackward0>)
400 0.10216192156076431 Parameter containing:
tensor(0.7552, requires_grad=True) tensor([[-0.0401]], grad_fn=<AddBackward0>)
500 0.8812061548233032 Parameter containing:
tensor(0.7555, requires_grad=True) tensor([[1.3443]], grad_fn=<AddBackward0>)
600 5.90281343460083 Parameter containing:
tensor(0.7555, requires_grad=True) tensor([[-0.1688]], grad_fn=<AddBackward0>)
700 0.678302526473999 Parameter containing:
tensor(0.7558, requires_grad=True) tensor([[0.9748]], grad_fn=<AddBackward0>)
800 0.681772172