In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
%matplotlib inline

__Vasicek Model:__ $dr_t = \alpha(b-r_t)dt + \sigma dW_t$   
Точное решение: $r_t \sim N\left(r_0 e^{-\alpha t} + b (1-e^{-\alpha t}),\,\tfrac{\sigma^2}{2\alpha}\left(1-e^{-2\alpha t}\right)\right)
$

In [2]:
def minimizeNLogLikelihood(r):
    dt = 1 
    rt = r    
    N = r.shape[0]
    params0 = np.array([0.0000001,0.0000001,0.0000001])
    
    def VasicekLogLikelihood(params):
        alpha, b, sigma = params[0], params[1], params[2] 
        return -(N-1)*np.log(alpha/(sigma**2*(1-np.exp(-2*alpha*dt)))) \
            - alpha/(sigma**2*(1-np.exp(-2*alpha*dt)))*np.sum(-(rt[:-1] - rt[1:]*np.exp(-alpha*dt)-b*(1-np.exp(-alpha*dt)))**2) 
    
    params_opt = minimize(VasicekLogLikelihood, params0, method='Powell')
    return params_opt.x

In [3]:
def ClosestTradeDate(day, dates):
    return dates[dates <= day][-1]
    

In [4]:
def estimate_params(components):
    params = np.zeros((3,3))

    for i in range(3):
        params[i] = minimizeNLogLikelihood(components[:,i])

    return params

In [5]:
def load_bonds(bond_count, forDate):
 
    bond_info = {}

    for i in range( bond_count):
        bond_i = i

        coupons =pd.read_excel('офз_график_выплат.xlsx', bond_i)[['Date', 'Coupon']].astype({'Date':'M8[D]'})

        coupons_date = coupons['Date'].values



        info = pd.read_excel('офз_описание.xlsx',bond_i, index_col=0)

        coupons_rate = coupons['Coupon'].values/2/100
        coupons_rate = coupons_rate[coupons_date > forDate]

        coupons_date = coupons_date[coupons_date > forDate].reshape(-1,1)
        expDate = np.datetime64(info.loc['Дата погашения'].values[0])

#         print('bond number', i)
#         print('expDate', expDate)
#         print('coupon', coupons_rate.mean())
#         print()



        face = info.loc['Номинальная стоимость'].values[0]

        bond_info[i] = {
            'coupons_date': coupons_date,
            'coupons_rate': coupons_rate,
            'expDate': expDate,
            'face': face
        }
    return bond_info


In [6]:
def VasicekPath(params,r0, N):
    dt = 1    
    alpha, b, sigma = params[0], params[1], params[2]
    r = [] # варианты аля-ставки на один из аля-TtM  
    for j in range(N):
        r1 = r0 + alpha*(b-r0)*dt + sigma*np.sqrt(dt)*np.random.randn()
        r.append(r1)
    return r

In [7]:
from scipy.interpolate import interp1d

  
def interpolate(bond_count, pca, r0, bond_info):
    
    for bond_i in range(bond_count):
    #переводим аля-ставки в разности реальных ставок 
        inverse_r = pca.inverse_transform(bond_info[bond_i]['simulations'])

    #переводим разности в кривую YTM. 
        inverse_r = r0.values + inverse_r
    

        x = [0]+list(r0.index*360)
        xnew = np.arange(1, 30*360, 1)

        inverse_r_interpolate = np.zeros((inverse_r.shape[0], len(xnew)))

        for i, ofz in enumerate(inverse_r):
            y = [0]+list(ofz)

            f = interp1d(x, y)
            ynew = f(xnew)

            inverse_r_interpolate[i] = ynew
        bond_info[bond_i]['inverse_r_interpolate'] = inverse_r_interpolate
        
    return bond_info

In [8]:
def PriceBond(bond_params, tradeDate,  N=10):
    coupons_date = bond_params['coupons_date']
    couponRate = bond_params['coupons_rate']
    FaseValue = bond_params['face']/10
    inverse_r_interpolate = bond_params['inverse_r_interpolate']

    TimeToMaturity = np.apply_along_axis(lambda x: int((x - tradeDate)/np.timedelta64(1,'D')), 1, coupons_date)
    B = np.zeros((N, TimeToMaturity[-1]))
    discount_factor = ((1+inverse_r_interpolate[:, :TimeToMaturity[-1]])**np.arange(TimeToMaturity[-1]))[:, ::-1]


    for i in range(N):    
        #для каждого купона считаем его динамику цены (делим значение купона на показатель дисконтирования)
        for j,_ in enumerate(TimeToMaturity):        
            V = FaseValue*couponRate[j] if j != len(TimeToMaturity)-1 else FaseValue*(1+couponRate[j])  
            b = (V / discount_factor[i, :TimeToMaturity[j]])  
            B[i, :TimeToMaturity[j]] += b
        
    return B

In [9]:
def estimate_r(bond_count, params, r0_diff, N=100):
    for bond_i in range(bond_count):
        r = []    
        for j in range(params.shape[0]):        
            r.append(VasicekPath(params[j], r0_diff[j], N))

        bond_info[bond_i]['simulations'] = np.array(r).T
    return bond_info

In [10]:
def estimate_price(bond_count, bond_info, N, tradeDate):

    for i in range(bond_count):

        p = PriceBond(bond_info[i], tradeDate,  N=N)

        bond_info[i]['price_path'] = p
        
        
    return bond_info

### смотрим на весь портфель из облигаций на 1 день

In [11]:
def load_real_price(bond_count, tradeDate, forecastDate):
    p0 = np.array([])
    p_last = np.array([])

    for i in range(bond_count):
        bond_price = pd.read_excel('офз_данные.xlsx', i, index_col=2).iloc[:, 6:7]
        bond_price.columns=['price']


        p0 = np.append(p0, bond_price.loc[tradeDate]['price'])
        p_last = np.append(p_last, bond_price.loc[forecastDate]['price'])

#         bond_result = bond_info[i]
#         dates_index = pd.date_range(tradeDate1, bond_result['expDate'])[:-1]
#         p_path = pd.DataFrame(bond_result['price_path'], columns=dates_index)
        


    return p0, p_last

In [12]:
def pricePortfolioInitial(p_real, bond_value = 1):
    w = bond_value/p_real
    return np.sum(w*p_real), w

def pricePortfolio(price, w):
    if len(price.shape) == 2:
        return np.sum(w[:, None]*price, 0)
    else: 
        return np.sum(w*price)

In [13]:
def rebalance(amount0, p_path):
    w = amount0 / sum(amount0)
    
    count = amount0 / p_path[:, 0]
    
    for i in range(1, p_path.shape[1]):
        p = p_path[:, i]
        port_value = sum(p * count)
        count = (port_value * w) / p
        
        
    return sum(count * p)


def estimate_VaR(amount0, p_pathes, N, alpha=0.9):
    p0_real = p_pathes[:, 0, 0]
    
    port_value_0 = sum(amount0)
    
    port_value_last = np.zeros(N)
    
    for i in range(N):
        p_path = np.array([x[i] for x in p_pathes])
        port_value_last[i] = rebalance(amount0, p_path)
        
    distr = (port_value_0 - port_value_last) / port_value_0

    return np.percentile(distr, alpha*100)
    
def real_loss(p0, p_last, amount0):
    count = amount0 / p0
    return((sum(amount0) - sum(count * p_last)) / sum(amount0))
    

In [14]:
def make_true_price_path(bond_count, bond_info, p0_real, N, days_for_predict):
    
    
    for i in range(bond_count):
        yields = bond_info[i]['price_path'][:, 1:] / bond_info[i]['price_path'][:, :-1]
        yields = yields[:, :days_for_predict]

        bond_info[i]['price_path'] = np.cumprod(np.concatenate((p0_real[i] + np.zeros((N,1)), 
                   yields), 1), 1)
        
        
    return  bond_info    

In [16]:
ir = pd.read_excel('офз.xlsx', index_col=0, header=1) / 100/365

dates = np.array(['-'.join(x.split('.')[::-1]) for x in ir.index]).astype('M8[D]')
ir.index = dates
ir.sort_index(inplace=True)
dates = dates[::-1][1:]

ir_diff = ir.diff().dropna()# берем разности

pca = PCA(n_components=3)
components = pca.fit_transform(ir_diff)




In [None]:
result = []
startDate0 = np.datetime64('2017-01-12')
days_for_predict = 1
bond_count = 5
N = 100
    
#result = pd.DataFrame(columns=['forDate', 'VaR', 'loss'])

for forecastDate in dates[dates >= np.datetime64('2018-01-12')][:]:
    
    startDate = startDate0 + (forecastDate - np.datetime64('2018-01-12'))
    
    tradeDate =  ClosestTradeDate(forecastDate-1, dates)
    
    print('last traid date before forecase', tradeDate)
    print('forecast date', forecastDate)
    bond_info = load_bonds(bond_count, tradeDate)
    
    components_for = components[(dates <= tradeDate) & (dates >= startDate)]
    params = estimate_params(components_for)
    
    r0_diff = components_for[-1]
    r0 = ir.loc[tradeDate]
    
    bond_info = estimate_r(bond_count, params,  r0_diff, N=N)
    bond_info = interpolate(bond_count, pca, r0, bond_info)
    bond_info = estimate_price(bond_count, bond_info, N, tradeDate)
    
    p0, p_last = load_real_price(bond_count, tradeDate, forecastDate)

    bond_info = make_true_price_path(bond_count, bond_info, p0, N, days_for_predict)
    
    porfolio_path = np.array([bond_info[i]['price_path'] for i in range(bond_count)])

    amount0 = np.array([100, 100, 100, 200, 100])
    VaR = estimate_VaR(amount0, porfolio_path, N)
    
    loss = real_loss(p0, p_last, amount0 )
    
    print('VaR', VaR, 'loss', loss)
    print()
    result.append([forecastDate, VaR, loss])

last traid date before forecase 2018-01-11
forecast date 2018-01-12
VaR -0.0002162730113310166 loss -0.0025002267847846343

last traid date before forecase 2018-01-12
forecast date 2018-01-15
VaR -0.00021404632907937564 loss -0.00040372820721472634

last traid date before forecase 2018-01-15
forecast date 2018-01-16
VaR -0.00022188997391034112 loss 0.0002946092244284652

last traid date before forecase 2018-01-16
forecast date 2018-01-17
VaR -0.00021122443792410194 loss 0.0006727409665902921

last traid date before forecase 2018-01-17
forecast date 2018-01-18
VaR -0.0002111562149556221 loss -0.00022599447329848014

last traid date before forecase 2018-01-18
forecast date 2018-01-19
VaR -0.00021015148871941845 loss -0.0010039912709536717

last traid date before forecase 2018-01-19
forecast date 2018-01-22
VaR -0.00020994335513184597 loss 0.0005502348802080329

last traid date before forecase 2018-01-22
forecast date 2018-01-23
VaR -0.0002072212772787907 loss -0.00018466690092452609

las

VaR -0.00021027566285675904 loss -0.0027184666858693163

last traid date before forecase 2018-04-19
forecast date 2018-04-20
VaR -0.0002070845237812288 loss -0.0006466183918045469

last traid date before forecase 2018-04-20
forecast date 2018-04-23
VaR -0.00020521194318149586 loss 0.0017938346363475452

last traid date before forecase 2018-04-23
forecast date 2018-04-24
VaR -0.0002017163816512948 loss -0.002572280067640804

last traid date before forecase 2018-04-24
forecast date 2018-04-25
VaR -0.00020533654784600231 loss 0.0014939324219172552

last traid date before forecase 2018-04-25
forecast date 2018-04-26
VaR -0.00020398183093601574 loss 0.0013877907739663442

last traid date before forecase 2018-04-26
forecast date 2018-04-27
VaR -0.00020329379266945577 loss -0.0027562907751189415

last traid date before forecase 2018-04-27
forecast date 2018-04-28
VaR -0.00020887371263389318 loss -0.00034058666050384545

last traid date before forecase 2018-04-28
forecast date 2018-05-03
VaR -

In [30]:
result = pd.DataFrame(result, columns=['forDate', 'VaR', 'loss'])


In [31]:
sum(result['loss'] > result['VaR'])/result.shape[0]

0.7142857142857143