This file is for the assignment 5 of Monte Carlo Method at University Paris Dauphine master 2 MASEF

Author: Yu Xiang

Contact: shawnxiangyu@yahoo.com

In [155]:
# import libaries and set the setting for plot
%matplotlib inline
import numpy as np
from scipy import linalg
from scipy.stats import norm
import time
import matplotlib.pyplot as plt
import math
from scipy.stats.stats import pearsonr 
import pandas as pd


# choose a large font size by default and use tex for math
fontsize = 10
params = {'axes.labelsize': fontsize + 2,
      'font.size': fontsize + 2,
      'legend.fontsize': fontsize + 2,
      'xtick.labelsize': fontsize,
      'ytick.labelsize': fontsize}
plt.rcParams.update(params)

# Exercise 1: Payoff Regularization


In [158]:
# parameters
r = 0 
sigma = 0.25 
X0 = 100
T = 1 
a = 95
b = 105

A stochastic process $X_t$ is said to follow a GBM if it satisfies the following stochastic differential equation (SDE):

$$ dX_t = \mu X_t\,dt + \sigma X_t\,dW_t $$

For an arbitrary initial value $X_0$ the above SDE has the analytic solution (under Itô calculus|Itô's interpretation):
$$ X_t = X_0\exp\left( \left(\mu - \frac{\sigma^2}{2} \right)t + \sigma W_t\right)$$

####  Confidence interval
According to central limit theorem:  
Let $\{X_1,..., X_n\}$ be a random sample of size $n$ — that is, a sequence of independent and identically distributed (i.i.d.) random variables drawn from a distribution of expected value given by $\mu$ and finite variance given by $\sigma$. Suppose we are interested in the ample mean|sample average: 

$$S_n := \frac{X_1+\cdots+X_n}{n}$$

Then, we have: 
        
$$\sqrt{n}\left(S_n - \mu\right)\ \xrightarrow{d}\ N\left(0,\sigma^2\right)$$   

The population variance $\sigma^2$ could be estimated by the sample variance $\sigma_n^2$

The sample variance is calculated as follows: 
$$s^2 = \frac{n}{n-1} \sigma_y^2 =  \frac{n}{n-1}(\operatorname{E}\left[Y^2 \right] - \operatorname{E}[Y]^2) $$




In [159]:
# Compute the European call  via Monte Carlo Method Based on Black Scholes model
def sim_XT_WT(T=1, nr_steps=500, X0=100, r=0, sigma=0.25, nr_sims=10000): 
    
    dt = T / nr_steps  # time grid: tg[0] = 0,  tg[-1] = T, tg length: n + 1 (t0, t1, ..., tn)
    XT = np.ones((nr_sims, 1)) * X0
    W =  np.zeros((nr_sims, 1))
        
    for i in range(nr_steps): 
        dW = dt ** 0.5 * np.random.randn(nr_sims, 1)
        XT *= np.exp((r - sigma ** 2 / 2) * dt + sigma * dW)  
        W += dW
    return XT, W



def G(Xs, a=95, b=105):
    
    inbetw_id = np.logical_and(Xs >= a, Xs <= b)
    bigger_than_b = Xs > b
    Gx = (np.log(Xs) - np.log(a)) * inbetw_id + (np.log(b) - np.log(a)) * bigger_than_b
    
    return Gx

In [166]:
# 1.1
nr_sims = [10000, 50000, 100000]
nr_cases = len(nr_sims)

value_conf_interval = np.zeros((nr_cases, 5))
rug_value_conf_interval = np.zeros((nr_cases, 5))

for i in range(nr_cases): 
    nr_simi = nr_sims[i]
    Xs, W = sim_XT_WT(nr_sims=nr_simi)
    
    Gx = G(Xs)
    rug_Ep = np.mean(Gx * W / (sigma * T)) 
    
    
    inbetw_id = np.logical_and(Xs >= a, Xs <= b)
    ep = np.sum(inbetw_id) /len(inbetw_id) # expectaction of the probablity
    
    
    var_n = nr_simi /(nr_simi - 1) * (ep - ep ** 2) 
    std_n = var_n ** 0.5
    half_int = 1.96 * std_n / nr_simi ** 0.5 
    conf_interval = np.round([ep - half_int,  ep + half_int],4)
    
    value_conf_interval[i, 0] = nr_simi
    value_conf_interval[i, 1] = ep

    value_conf_interval[i, 2:4] = conf_interval
    value_conf_interval[i, 4] = half_int * 2
    
    
    rug_var = nr_simi /(nr_simi - 1) * (np.mean((Gx * W / (sigma * T)) ** 2)  - rug_Ep ** 2)
    rug_half_int = 1.96 * rug_var ** 0.5 / nr_simi ** 0.5 
    rug_conf_interval = np.round([rug_Ep - rug_half_int,  rug_Ep + rug_half_int],4)
    
    
    rug_value_conf_interval[i, 0] = nr_simi
    rug_value_conf_interval[i, 1] = rug_Ep

    rug_value_conf_interval[i, 2:4] = rug_conf_interval
    rug_value_conf_interval[i, 4] = rug_half_int * 2
 

 
NMC = value_conf_interval
PRMC = rug_value_conf_interval

In [167]:
# normal simulation result
import pandas as pd
result = pd.DataFrame(NMC,
                      columns=['Nr_sims', 'probability', 
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length'])

result

Unnamed: 0,Nr_sims,probability,95% conf_lower_bound,95% conf_upper_bound,interval_length
0,10000.0,0.1626,0.1554,0.1698,0.014466
1,50000.0,0.15682,0.1536,0.16,0.006375
2,100000.0,0.15679,0.1545,0.159,0.004507


In [168]:
# regularization result

import pandas as pd
result = pd.DataFrame(PRMC,
                      columns=['Nr_sims', 'probability', 
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length'])



result

Unnamed: 0,Nr_sims,probability,95% conf_lower_bound,95% conf_upper_bound,interval_length
0,10000.0,0.156094,0.1515,0.1607,0.009203
1,50000.0,0.156341,0.1543,0.1584,0.004095
2,100000.0,0.157888,0.1564,0.1593,0.002917


### Comment: 

As we can see, the regularization methods has smaller variance compared to the normal simulation. 
Both methods's variance decreases when the number of simulation increases, and they seems to converges to a same result. 

In [169]:
# 1.3 Compare the result
abs_p_diff = NMC[:,1] - PRMC[:,1]
abs_conf_inter_diff = NMC[:,4] - PRMC[:,4] 
times_conf_inter_len = NMC[:,4] / PRMC[:,4] 
Compare = np.hstack((NMC, PRMC[:,1:], abs_p_diff[:,np.newaxis],
                     abs_conf_inter_diff[:,np.newaxis], times_conf_inter_len[:,np.newaxis]))


# display summary and compare
import pandas as pd
Exercise_1_result = pd.DataFrame(Compare,
                      columns=['Nr_sim','N_p','N_lower', 'N_upper',
                               'N_intlen', 'PR_p','PR_lower', 'PR_upper',
                               'PR_intlen','diff_p', 'diff_intlen', 'times_intlen'])


Exercise_1_result

Unnamed: 0,Nr_sim,N_p,N_lower,N_upper,N_intlen,PR_p,PR_lower,PR_upper,PR_intlen,diff_p,diff_intlen,times_intlen
0,10000.0,0.1626,0.1554,0.1698,0.014466,0.156094,0.1515,0.1607,0.009203,0.006506,0.005263,1.571877
1,50000.0,0.15682,0.1536,0.16,0.006375,0.156341,0.1543,0.1584,0.004095,0.000479,0.00228,1.556685
2,100000.0,0.15679,0.1545,0.159,0.004507,0.157888,0.1564,0.1593,0.002917,-0.001098,0.00159,1.545204


## Comment: 
Basically, the normal Monte Carlo methods's confidence interval length is about 1.55 times that of Payoff Regularization. 

# Exercise 2: Control Variate

In [175]:
# 2.1 simple/ Normal Monte Carlo simulation

r = 0 
sigma = 0.3
X0 = 100
T = 1

Ks = [80, 150]
nr_sims = [10000,  100000, 1000000]
nr_cases = len(Ks) * len(nr_sims)
value_conf_interval = np.zeros((nr_cases, 6))

i = 0
for ki in range(len(Ks)): 
    
    K = Ks[ki]
    
    for si in range(len(nr_sims)): 
        nr_simi = nr_sims[si]
        
        XT, WT = sim_XT_WT(T=T, X0=X0, r=r, sigma=sigma, nr_sims=nr_simi)
        
        price = np.mean(np.exp(-r * T) * np.maximum(XT - K, 0))
        var_n = nr_simi / (nr_simi - 1) * (np.mean((np.exp(-r * T) \
                        * np.maximum(XT - K,0)) ** 2) - price **2) 
    
        # 95% interval corresponds to 1.96 * std_n / (sqrt(n))
        half_int = 1.96 * var_n ** 0.5 / nr_simi ** 0.5 

        conf_interval = np.round([price - half_int,  price + half_int],4)      
        value_conf_interval[i, 0] = nr_simi
        value_conf_interval[i, 1] = K
        value_conf_interval[i, 2] = price
        
        value_conf_interval[i, 3:5] = conf_interval
        value_conf_interval[i, 5] = half_int * 2
        
        i += 1


NMC = value_conf_interval

In [176]:
# display the result from simple Monte Carlo Method
import pandas as pd
result = pd.DataFrame(NMC,
                      columns=['Nr_sims', 'strike_K', 'price',
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length'])


result

Unnamed: 0,Nr_sims,strike_K,price,95% conf_lower_bound,95% conf_upper_bound,interval_length
0,10000.0,80.0,23.422864,22.8959,23.9498,1.053871
1,100000.0,80.0,23.471429,23.3051,23.6378,0.332714
2,1000000.0,80.0,23.53127,23.4787,23.5838,0.105072
3,10000.0,150.0,1.39483,1.249,1.5406,0.291618
4,100000.0,150.0,1.495566,1.4453,1.5458,0.10051
5,1000000.0,150.0,1.48241,1.4669,1.4979,0.031013


In [177]:
# 2.2 Monte Carlo simulation with Control Variate
r = 0 
sigma = 0.3
X0 = 100
T = 1

Ks = [80, 150]
nr_sims = [10000,  100000, 1000000]
nr_cases = len(Ks) * len(nr_sims)
cv_value_conf_interval = np.zeros((nr_cases, 7))

i = 0

for ki in range(len(Ks)): 
    
    K = Ks[ki]
    
    for si in range(len(nr_sims)): 
        nr_simi = nr_sims[si]
        
        half_nrsim = int(nr_simi / 2)
        
        XT, WT = sim_XT_WT(T=T, X0=X0, r=r, sigma=sigma, nr_sims=nr_simi)
        Y = np.exp(-r * T) * np.maximum(XT - K, 0)
        
        XM = XT[:half_nrsim]
        XN = XT[half_nrsim:] 
        XM_mean = np.mean(XM)
        XN_mean = np.mean(XN)
        
        
        YM = Y[:half_nrsim]
        YN = Y[half_nrsim:]
        YM_mean = np.mean(YM)
        YN_mean = np.mean(YN)
        
        
        # half the simulation is used for calculating b * star
        b_star = np.sum((XM-XM_mean) * (YM-YM_mean)) / np.sum((XM_mean) ** 2)  
        
        # remaining half is used to calculate the price and variance
        price = 1 / half_nrsim * np.sum(YN + b_star * (XN - XN_mean))
        
        
        # now we need to calculate the variance 
        VarY =  nr_simi / (nr_simi - 1) * (np.mean(YN ** 2) - YN_mean ** 2)
        rho_XY = pearsonr(XN, YN)[0][0]
        Var_YN = VarY * (1 - rho_XY ** 2)
        
        # 95% interval corresponds to 1.96 * std_n / (sqrt(n))
        half_int = 1.96 * Var_YN ** 0.5 / nr_simi ** 0.5 

        conf_interval = np.round([price - half_int,  price + half_int],4)      
        cv_value_conf_interval[i, 0] = nr_simi
        cv_value_conf_interval[i, 1] = K
        cv_value_conf_interval[i, 2] = price
        
        
        cv_value_conf_interval[i, 3:5] = conf_interval
        cv_value_conf_interval[i, 5] = half_int * 2
        
        cv_value_conf_interval[i, 6] = rho_XY
        
        i += 1


CVMC = cv_value_conf_interval

In [178]:
# display the result from simple Monte Carlo Method
import pandas as pd
result = pd.DataFrame(CVMC,
                      columns=['Nr_sims', 'strike_K', 'price',
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length', 'rho_xy'])


result

Unnamed: 0,Nr_sims,strike_K,price,95% conf_lower_bound,95% conf_upper_bound,interval_length,rho_xy
0,10000.0,80.0,23.29537,23.1756,23.4151,0.239464,0.973276
1,100000.0,80.0,23.424591,23.3874,23.4618,0.074327,0.974582
2,1000000.0,80.0,23.505126,23.4933,23.5169,0.023571,0.97457
3,10000.0,150.0,1.454525,1.3314,1.5777,0.246279,0.568048
4,100000.0,150.0,1.512922,1.4721,1.5538,0.081737,0.575315
5,1000000.0,150.0,1.49596,1.4832,1.5088,0.025614,0.573287


In [180]:
# 2.3 Compare the result
abs_p_diff = NMC[:,2] - CVMC[:,2]
abs_conf_inter_diff = NMC[:,5] - CVMC[:,5] 
times_conf_inter_len = NMC[:,5] / CVMC[:,5] 
Compare = np.hstack((NMC, CVMC[:,2:], abs_p_diff[:,np.newaxis],
                     abs_conf_inter_diff[:,np.newaxis], times_conf_inter_len[:,np.newaxis]))


# display summary and compare
import pandas as pd
Exercise_2_result = pd.DataFrame(Compare,
                      columns=['Nr_sim','K', 'N_p','N_lower', 'N_upper',
                               'N_intlen', 'CV_p','CV_lower', 'CV_upper',
                               'CV_intlen','CV_rho', 'diff_p', 'diff_intlen', 'times_intlen'])


Exercise_2_result

Unnamed: 0,Nr_sim,K,N_p,N_lower,N_upper,N_intlen,CV_p,CV_lower,CV_upper,CV_intlen,CV_rho,diff_p,diff_intlen,times_intlen
0,10000.0,80.0,23.422864,22.8959,23.9498,1.053871,23.29537,23.1756,23.4151,0.239464,0.973276,0.127494,0.814408,4.400965
1,100000.0,80.0,23.471429,23.3051,23.6378,0.332714,23.424591,23.3874,23.4618,0.074327,0.974582,0.046838,0.258388,4.476382
2,1000000.0,80.0,23.53127,23.4787,23.5838,0.105072,23.505126,23.4933,23.5169,0.023571,0.97457,0.026144,0.081501,4.457771
3,10000.0,150.0,1.39483,1.249,1.5406,0.291618,1.454525,1.3314,1.5777,0.246279,0.568048,-0.059696,0.045339,1.184096
4,100000.0,150.0,1.495566,1.4453,1.5458,0.10051,1.512922,1.4721,1.5538,0.081737,0.575315,-0.017356,0.018773,1.229679
5,1000000.0,150.0,1.48241,1.4669,1.4979,0.031013,1.49596,1.4832,1.5088,0.025614,0.573287,-0.01355,0.005399,1.210776


## Comment: 
as we could see, when the correlation is big (CV_rho is near 1), the effect of variance reduction is obvious. The normal simulation almost have around 4.4 times confidence interval length compared to that of CVMC. However, when the correlation is small, the effect of variance reduction also decreases. 

# Exercise 3: Importance Sampling

In [181]:
# 3.1 Normal simulation
Ks = [2, 3.5, 5]
nr_sims = [1000000]

nr_cases = len(Ks) * len(nr_sims)

value_conf_interval = np.zeros((nr_cases, 6))

i = 0
for ki in range(len(Ks)): 
    
    K = Ks[ki]
    
    for si in range(len(nr_sims)): 
        
        nr_simi = nr_sims[si]
        X = np.random.randn(nr_simi, 1)
        
        biggerK_id = X > K
        
        p = np.sum(biggerK_id) / nr_simi
        
        var_n = nr_simi /(nr_simi - 1) * (p - p ** 2) 
        
        half_int = 1.96 * var_n ** 0.5 / nr_simi ** 0.5 
        conf_interval = [p - half_int,  p + half_int]
  
        value_conf_interval[i, 0] = nr_simi
        value_conf_interval[i, 1] = K
        value_conf_interval[i, 2] = p
        
        value_conf_interval[i, 3:5] = conf_interval
        value_conf_interval[i, 5] = half_int * 2
        
        i += 1
        
        
NMC = value_conf_interval        

In [182]:
# display normal Monte Carlo simulation result
result = pd.DataFrame(NMC,
                      columns=['Nr_sims', 'K', 'probability',
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length'])


result

Unnamed: 0,Nr_sims,K,probability,95% conf_lower_bound,95% conf_upper_bound,interval_length
0,1000000.0,2.0,0.022916,0.022623,0.023209,0.000587
1,1000000.0,3.5,0.000231,0.000201,0.000261,6e-05
2,1000000.0,5.0,0.0,0.0,0.0,0.0


In [183]:
# 3.2 Importance Sampling of simulation
Ks = [2, 3.5, 5]
nr_sims = [1000000]

nr_cases = len(Ks) * len(nr_sims)

value_conf_interval = np.zeros((nr_cases, 6))

i = 0
for ki in range(len(Ks)): 
    
    K = Ks[ki]
    
    for si in range(len(nr_sims)): 
        
        nr_simi = nr_sims[si]
        X = np.random.randn(nr_simi, 1)
        
        h = 1.1 * K
        biggerK_id = (X + h) > K
        
        XH = np.exp(- h * X - h ** 2 / 2) * biggerK_id
        p = np.mean(XH)
        
        
        var_n = nr_simi /(nr_simi - 1) * (np.mean(XH ** 2) - p ** 2) 
        
        half_int = 1.96 * var_n ** 0.5 / nr_simi ** 0.5 
        conf_interval = [p - half_int,  p + half_int]
  
        value_conf_interval[i, 0] = nr_simi
        value_conf_interval[i, 1] = K
        value_conf_interval[i, 2] = p
        
        value_conf_interval[i, 3:5] = conf_interval
        value_conf_interval[i, 5] = half_int * 2
        
        i += 1
        
ISMC = value_conf_interval      

In [184]:
# display Importance sampling Monte Carlo
import pandas as pd
result = pd.DataFrame(ISMC,
                      columns=['Nr_sims', 'K', 'probability',
                               '95% conf_lower_bound', '95% conf_upper_bound','interval_length'])


result

Unnamed: 0,Nr_sims,K,probability,95% conf_lower_bound,95% conf_upper_bound,interval_length
0,1000000.0,2.0,0.02270866,0.02264168,0.02277564,0.0001339619
1,1000000.0,3.5,0.0002324889,0.0002315765,0.0002334012,1.824651e-06
2,1000000.0,5.0,2.879761e-07,2.865716e-07,2.893807e-07,2.809124e-09


In [185]:
# Compare the result
abs_p_diff = NMC[:,2] - ISMC[:,2]
abs_conf_inter_diff = NMC[:,5] - ISMC[:,5] 
times_conf_inter_len = NMC[:,5] / ISMC[:,5] 
Compare = np.hstack((NMC, ISMC[:,2:], abs_p_diff[:,np.newaxis],
                     abs_conf_inter_diff[:,np.newaxis], times_conf_inter_len[:,np.newaxis]))


# display summary and compare
import pandas as pd
Exercise_3_result = pd.DataFrame(Compare,
                      columns=['Nr_sim', 'K', 'N_p','N_lower', 'N_upper',
                               'N_intlen', 'IS_p','IS_lower', 'IS_upper',
                               'IS_intlen','abs_diff_p', 'abs_diff_intlen', 'times_intlen'])


Exercise_3_result

Unnamed: 0,Nr_sim,K,N_p,N_lower,N_upper,N_intlen,IS_p,IS_lower,IS_upper,IS_intlen,abs_diff_p,abs_diff_intlen,times_intlen
0,1000000.0,2.0,0.022916,0.022623,0.023209,0.000587,0.02270866,0.02264168,0.02277564,0.0001339619,0.0002073369,0.0004526105,4.378652
1,1000000.0,3.5,0.000231,0.000201,0.000261,6e-05,0.0002324889,0.0002315765,0.0002334012,1.824651e-06,-1.488857e-06,5.774734e-05,32.648437
2,1000000.0,5.0,0.0,0.0,0.0,0.0,2.879761e-07,2.865716e-07,2.893807e-07,2.809124e-09,-2.879761e-07,-2.809124e-09,0.0


## Comment: 

The effect of variance reduction is obvious. Here, we choose h = K. For K = 2 and 3.5, the confidence interval length is narrowed by around 4.4 and 32.6 times. For K = 5, the difference is negligiable due to the numerical precision, i.e. when K = 5, a variable following standard normal distrbution could hardly be over K. 