In [1]:
%matplotlib inline
import pandas as pd
import qrbook_funcs as qf
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from tabulate import tabulate
import scipy.stats as spst

#### Q1 Prove that the vector x_sol=(1, 1/2, −1) is the optimal solution to the problem

In [42]:
C=np.array([[0.5194, 0.2494, 0.2207], [0.2494, 0.3568, 0.1166], [0.2207, 0.1166, 0.4155]])
m=np.array([1.4234, 0.3112, -1.1365]).reshape(3,1)
Xsol=np.array([1,0.5,-1]).reshape(3,1)

In [46]:
# Compute the Gradient of f(x), which is used in my handwriting answer sheet
gradient_f=np.matmul(C,Xsol)-m
gradient_f

array([[-1.00000000e+00],
       [ 5.55111512e-17],
       [ 1.00000000e+00]])

#### Q3 Compute the Box M test statistic M(1,2) given in (4.52), where S1 is the example 3x3 currency covariance matrix we’ve been using (Swissie, pound, yen up to the end of last year), and S2 is the 3x3 sample currency covariance matrix arising from the Monte Carlo simulation in Chapter 7 (which can be obtained from np.cov(r_trial.T) after the code following (7.12)). Apply Levene’s test (section 4.2.2.2) to the three variances. Can we accept the hypothesis that they are the same? Apply the Box M test as described in section 4.2.2.3 to the two covariance matrices. Can we accept the hypothesis that the two matrices are the same?

In [4]:
#Get 3 currencies until the end of previous year. 
lastday=qf.LastYearEnd()
#Swiss franc, pound sterling, Japanese Yen
seriesnames=['DEXSZUS','DEXUSUK','DEXJPUS']
cdates,ratematrix=qf.GetFREDMatrix(seriesnames,enddate=lastday)

#Convert levels to log-returns
#First take logs of the currency levels
#Currency exchange rates are usually expressed in the direction
#that will make the rate > 1
#Swissie and yen are in currency/dollar, but
#pounds is in dollar/currency. Reverse signs
#so everything is in dollar/currency

#Do each currency separately to account for separate missing data patterns
#dlgs is a list of lists of length 3 corresponding to the 3 currencies
#The value in dlgs is nan if there is missing data for the present or previous day's observation
#Otherwise it is the log of today/yesterday
multipliers=[-1,1,-1]
dlgs=[]
for i in range(len(multipliers)):
    lgrates=[]
    previous=-1
    for t in range(len(ratematrix)):
        if pd.isna(ratematrix[t][i]) or ratematrix[t][i]<=0:
            lgrates.append(np.nan)    #Append a nan
        else:
            if previous < 0:    #This is the first data point
                lgrates.append(np.nan)
            else:
                lgrates.append(np.log(ratematrix[t][i]/previous)*multipliers[i])
            previous=ratematrix[t][i]
    dlgs.append(lgrates)

#dlgs is the transpose of what we want - flip it
dlgs=np.transpose(dlgs)

#Delete any time periods that don't have data
lgdates=[]
difflgs=[]
for t in range(len(dlgs)):
    if all(pd.notna(dlgs[t])):
        #include this time period
        difflgs.append(dlgs[t])
        lgdates.append(cdates[t])

#Mean vector and covariance matrix are inputs to efficient frontier calculations
d=np.array(difflgs)
m=np.mean(d,axis=0)
c=np.cov(d.T)

In [18]:
#Generate random draws; use fixed seed to be replicable
count=12036 

#Show the Cholesky decomposition of the CHF-GPB-JPY covariance matrix
chol=np.linalg.cholesky(c)

seed=np.random.seed(12345678)
s_trial=np.random.normal(0,1,size=[int(count),3])
logr_trial=np.matmul(chol,s_trial.T).T+m

#logr_trial has Monte Carlo log-returns; transform to returns
r_trial=np.exp(logr_trial)-1

#CHF-GBP-JPY 
w=np.array([1/3]*3).T

#Get trial portfolio returns
r_ptrial=np.matmul(r_trial,w)
statnames,mettrial,tabtrial=qf.StatsTable(r_ptrial)
headers=['Statistic','Value']
print(tabulate(tabtrial, headers, tablefmt='fancy_grid'))

╒════════════════════╤═══════════════╕
│ Statistic          │         Value │
╞════════════════════╪═══════════════╡
│ Count              │ 12036         │
├────────────────────┼───────────────┤
│ Min                │    -0.0203071 │
├────────────────────┼───────────────┤
│ Max                │     0.0214615 │
├────────────────────┼───────────────┤
│ Mean               │     6.65e-05  │
├────────────────────┼───────────────┤
│ Median             │     8.36e-05  │
├────────────────────┼───────────────┤
│ Standard Deviation │     0.0052779 │
├────────────────────┼───────────────┤
│ Skewness           │     0.0010422 │
├────────────────────┼───────────────┤
│ Excess Kurtosis    │     0.0373164 │
├────────────────────┼───────────────┤
│ Jarque-Bera        │     0.700526  │
├────────────────────┼───────────────┤
│ Chi-Squared p      │     0.704503  │
├────────────────────┼───────────────┤
│ Serial Correlation │     0.0013274 │
├────────────────────┼───────────────┤
│ 99% VaR            │   

In [19]:
# first column
r_trial[:,0]

array([ 0.00411198, -0.01075888,  0.00358579, ..., -0.00592292,
       -0.00307315,  0.00125228])

In [23]:
d.shape

(12036, 3)

In [39]:
# S1 is the covariance matrix using the data up to last year
S1=c*10000
# S2 is from the simulation above
S2=np.cov(r_trial.T) * 10000

# T1 is the sample size
T1=len(lgdates)
# T2 is the sample size
T2=count

In [40]:
S1, S2

(array([[0.51941274, 0.24943128, 0.22066643],
        [0.24943128, 0.35676267, 0.11657374],
        [0.22066643, 0.11657374, 0.41545275]]),
 array([[0.52151159, 0.25497548, 0.22517401],
        [0.25497548, 0.36411532, 0.12335733],
        [0.22517401, 0.12335733, 0.41460933]]))

### (1) Below is the Levene's test

In [27]:
# Apply Levene's Test to three-currency example with previous years compared to latest year
#Note the results shown are the same as
#scipy.stats.levene(d[:prev_year_n,k],d[prev_year_n:,k],center='mean')

# threshold is 1%
thresh=.01
one_minus_thresh=(1-thresh)*100

def levene(T1,T2,x1,x2):

    m1=np.average(x1)
    m2=np.average(x2)
    z1j=[np.abs(x1[j]-m1) for j in range(T1)]
    z2j=[np.abs(x2[j]-m2) for j in range(T2)]
    z1=np.average(z1j)
    z2=np.average(z2j)

    levene_mult=(T1+T2-2)*T1*T2/(T1+T2)

    levene_denom=np.sum((z1j-z1)**2)+np.sum((z2j-z2)**2)
    levene_stat=levene_mult*(z1-z2)**2/levene_denom

    p_value = 1 - spst.f.cdf(levene_stat, 1, T1+T2-2)

    return(levene_stat,p_value)

for i in range(3):
    lstat, p_value = levene(T1,T2,d[:,i],r_trial[:,i])
    print("Levene statistic for "+seriesnames[i]+": ",lstat,", p-value: ",p_value)
    if p_value < thresh:
        str_p="Reject null hypothesis of equal variances at %2.f" % one_minus_thresh
    else:
        str_p="Cannot reject null hypothesis of equal variances at %2.f" % one_minus_thresh
    str_p+="% significance"
    print(str_p)

Levene statistic for DEXSZUS:  127.20267337053697 , p-value:  1.1102230246251565e-16
Reject null hypothesis of equal variances at 99% significance
Levene statistic for DEXUSUK:  166.24256039528214 , p-value:  1.1102230246251565e-16
Reject null hypothesis of equal variances at 99% significance
Levene statistic for DEXJPUS:  177.52316917940098 , p-value:  1.1102230246251565e-16
Reject null hypothesis of equal variances at 99% significance


### (2) Below is the Box test

In [44]:
def BoxM(T1,T2,s1,s2):
    #Tests for equality of two covariance matrices, s1 and s2
    #T1 and T2 are numbers of observations for s1 and s2
    #Returns M statistic and p-value

    #Make sure dimension is common
    if len(s1)!=len(s2):
        print("Error: different dimensions in Box M Test:",len(s1),len(s2))
        return(0,0)
    
    #Matrices are pxp
    p=len(s1)

    #Form the combined matrix
    scomb=(T1*s1+T2*s2)/(T1+T2)

    #Box M statistic
    Mstat=(T1+T2-2)*np.log(np.linalg.det(scomb))-(T1-1)*np.log(np.linalg.det(s1))-(T2-1)*np.log(np.linalg.det(s2))

    #Multipliers from equation (49) in Box 1949.
    A1=(2*p**2+3*p-1)/(6*(p+1))
    A1*=(1/(T1-1)+1/(T2-1)-1/(T1+T2-2))

    A2=(p-1)*(p+2)/6
    A2*=(1/(T1-1)**2+1/(T2-1)**2-1/(T1+T2-2)**2)

    discrim=A2-A1**2

    #Degrees of freedom
    df1=p*(p+1)/2

    if discrim <= 0:
        #Use chi-square (Box 1949 top p. 329)
        test_value=Mstat*(1-A1)
        p_value=1-spst.chi2.cdf(test_value,df1)
    else:
        #Use F Test (Box 1949 equation (68))
        df2=(df1+2)/discrim
        b=df1/(1-A1-(df1/df2))
        test_value=Mstat/b
        p_value=1-spst.f.cdf(test_value,df1,df2)
    
    return(test_value,p_value)

In [50]:
#Apply to sample variances
stat, p_value = BoxM(T1,T2,S1,S2)
print("Box M-stat and p-value is:",stat,p_value)


if p_value < thresh:
    str_p="Reject null hypothesis of equal variances at %2.f" % one_minus_thresh
else:
    str_p="Cannot reject null hypothesis of equal variances at %2.f" % one_minus_thresh
str_p+="% significance"
print(str_p)

Box M-stat and p-value is: 0.48531478642713505 0.8198217436021252
Cannot reject null hypothesis of equal variances at 99% significance
