In [2]:
# -*- coding: utf-8 -*-
from datetime import datetime
import json
import numpy as np
import pandas as pd
from pandas_datareader import data as wb
from scipy.stats import norm, gmean

In [62]:
START_DATE = '2017-1-1' # Date from which we want to get the financial year.
YEARS = 20 # Years until retirement
DAYS_IN_YEAR = 253 # Average number of trading days in the year
PORTFOLIO_VALUE = 1000

In [5]:
def get_portfolio():
    with open('./data/portfolio.json', 'r') as portfolio_file:
        portfolio_json = portfolio_file.read()
        return json.loads(portfolio_json)['portfolio']

portfolio_data = get_portfolio()


In [61]:
portfolio_data

[{'ticker': '^FTM', 'name': 'FTSE 250', 'type': 'index', 'weight': 0.1},
 {'ticker': 'VEMAX',
  'name': 'Vanguard Emerging Markets',
  'type': 'stock',
  'weight': 0.8},
 {'ticker': 'UU.L',
  'name': 'United Utilities',
  'type': 'stock',
  'weight': 0.1},
 {'ticker': 'BHP.L', 'name': 'BHP Group', 'type': 'stock', 'weight': 0.1}]

In [64]:
# 测试：type = 'stock' 从 yahoo 获取数据，返回值是 Series
test_data = wb.DataReader(portfolio_data[1]['ticker'], 'yahoo', start=START_DATE)['Adj Close']

In [72]:
test_data.index

DatetimeIndex(['2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06',
               '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
               '2017-01-13', '2017-01-17',
               ...
               '2020-03-13', '2020-03-16', '2020-03-17', '2020-03-18',
               '2020-03-19', '2020-03-20', '2020-03-23', '2020-03-24',
               '2020-03-25', '2020-03-26'],
              dtype='datetime64[ns]', name='Date', length=813, freq=None)

In [50]:
def get_simulation(data):
    ### 参数 data 是 wb.DataReader 的返回值:
    ##    Date
    ##    2020-03-26    15380.71
    ##    2020-03-25    14819.91 
    
    # Get the logarithmic returns of the % change of prices from one trading day to the next.
    log_returns = np.log(1 + data.pct_change()) 
    # Get the mean of these returns
    u = log_returns.mean() 
    # Get the variance of these returns 
    var = log_returns.var()
    # Get the change in the average value of these values
    drift = u - (0.5 * var)
    # Get the standard deviation
    stdev = log_returns.std()
    
    
    # 这是核心：根据历史数据，使用 Monte Carlo stimulation，生成未来的模拟
    # How many days are we going to run the stimulation for
    t_intervals = DAYS_IN_YEAR * YEARS
    # How many simulations of this financial instrument are we going to run? 
    iterations = 10
    # Create the Monte Carlo stimulation of daily percent changes of the financial instruments.
    # t_intervals: 未来要模拟多少天
    # iterations: 模拟多少轮，每一轮中都是 t_interals 天
    # daily_returns 模拟未来每天的涨幅/跌幅，一共 iterations 轮
    daily_returns = np.exp(drift + stdev * norm.ppf(np.random.rand(t_intervals, iterations)))
    
    # price_list 的第一行是最近的股票金额，乘以 daily_returns 中每天的涨幅/跌幅，计算后续每天的金额
    # Create an numpy array filled with zeros with the same shape as the daily_returns numpy array.
    price_list = np.zeros_like(daily_returns)
    # Set the most recent trading day's data as the start prices
    price_list[0] = data.iloc[-1]
    # For each day in the simulation, compute the price of the stock after multiplying 
    # the previous's price by the current day's price.
    for t in range(1, t_intervals):
        price_list[t] = price_list[t - 1] * daily_returns[t]

    # 最后一天的金额 / 起始值 得到总的变化比例
    # 因为模拟了 iterations 轮，再 gmean 求几何均值
    # Get all the percentage returns for all the simulations for this financial instructment.
    asset_returns = price_list[-1] / price_list[0]
    # Get and return the geometric mean (because we are dealingn with percentages)
    # of all these simulations for this financial instrument.   
    # 这是一个 scalar，一个 stock 只返回一个值
    return gmean(asset_returns)

In [51]:
def get_data(stock):
    ## 参数是对象： {'ticker': '^FTM', 'name': 'FTSE 250', 'type': 'index', 'weight': 0.1}
    ### wb.DataReader 返回值格式：
    ##    Date
    ##    2020-03-26    15380.71
    ##    2020-03-25    14819.91 
    ## get_simulation 返回值是一个数
    
    data = pd.DataFrame()
    
    # type: index/stock 指数/股票 从两个不同的地方获取数据
    if stock['type'] == 'index':
        data = wb.DataReader(stock['ticker'], 'stooq', start=START_DATE)['Close']
        return get_simulation(data)
    
    data = wb.DataReader(stock['ticker'], 'yahoo', start=START_DATE)['Adj Close']
    return get_simulation(data)

In [52]:
portfolio = pd.DataFrame(portfolio_data).assign(returns = [get_data(stock) for stock in portfolio_data])

In [53]:
portfolio

Unnamed: 0,ticker,name,type,weight,returns
0,^FTM,FTSE 250,index,0.1,1.704453
1,VEMAX,Vanguard Emerging Markets,stock,0.8,0.992251
2,UU.L,United Utilities,stock,0.1,4.0017
3,BHP.L,BHP Group,stock,0.1,1.55677


In [60]:

def get_expected_portfolio_return(porfolio):
    return np.sum(portfolio['weight'] * porfolio['returns'])
  
expected_portfolio_return = get_expected_portfolio_return(portfolio)
final_value = expected_portfolio_return * PORTFOLIO_VALUE

print('Estimated value of Portfolio in {} : £{:,.2f} \nExpected Portfolio Return: {:,.2f}%'.format(datetime.now().year + YEARS, final_value, expected_portfolio_return * 100))   

# the income we can expect from it (using the 4% drawdown rule). 
print('Estimated Income £{:,.2f}'.format(final_value * 0.04))

1.520093293635971
Estimated value of Portfolio in 2060 : £1,520.09 
Expected Portfolio Return: 152.01%
Estimated Income £60.80


In [10]:
norm.ppf(np.random.rand(4, 2))

array([[-1.62556935, -0.35702068],
       [-1.43228425,  0.14106065],
       [ 1.16846513,  0.80822364],
       [-1.19204524,  0.88260446]])