In [16]:
%config InLineBackend.figure_format='retina'

In [24]:
import numpy as np
import pandas as pd
import altair as alt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

In [19]:
def correlated_streams(n: int, mean: float, risk: float, corr: float) -> np.array:
        """
        Generates n returns streams with a given average mean and risk 
        and with an average correlation level corr 
        """
        num_samples = 10_000
        means = np.full(n, mean)
        
        corr_mat = np.full((n,n),corr, dtype=np.dtype("d"))
        np.fill_diagonal(corr_mat, 1,)
        cov_mat = corr_mat * risk**2
        
        streams = np.random.multivariate_normal(means, cov_mat, size=num_samples)
        
        return streams.T
        

In [27]:
n=5
mean, std, corr = 10, 15, 0.6
streams = correlated_streams(n, mean, std, corr)

In [28]:
streams.mean(axis=1)

array([10.00222395,  9.88422942,  9.94152682,  9.91113114,  9.94114824])

In [29]:
streams.std(axis=1)

array([14.97744151, 15.13176711, 15.06834027, 15.1321375 , 15.12785865])

In [26]:
streams

array([[ -2.51237014, -29.52319219,  39.51539455, ...,  -9.0968578 ,
         25.5230905 ,   8.8425909 ],
       [ -0.89574053,  13.93734329,  72.15486409, ...,  -9.39101375,
         70.02544873, -36.12234934],
       [-26.91537347,  58.90324544,  47.08517556, ..., -28.36420705,
        -62.08907707,  27.13298684],
       [ 34.91504831,  27.97122928, -43.54781673, ...,  68.83134022,
         11.30372425,  33.14512221],
       [-37.89306004,  17.97733555,  12.50989227, ...,  -1.50070738,
        -27.86640717,   7.47617213]])

In [30]:
np.corrcoef(streams)

array([[1.        , 0.60398834, 0.60501612, 0.59762202, 0.60245373],
       [0.60398834, 1.        , 0.60919205, 0.60012463, 0.60940807],
       [0.60501612, 0.60919205, 1.        , 0.60260022, 0.60928793],
       [0.59762202, 0.60012463, 0.60260022, 1.        , 0.60770469],
       [0.60245373, 0.60940807, 0.60928793, 0.60770469, 1.        ]])

In [31]:
def aggregate_risk(returns_streams: np.array, n:int) -> np.array:
    """
    Returns the ppoled risk (std) of the n first streams in return_streams
    """
    if len(returns_streams) < n:
        raise valueError(f"len of return_stream less than n: {n}")
    
    return (np.sum(returns_streams[:n], axis=0)/n).std()

In [41]:
max_assets = 20
assets = range(1, max_assets+1)

mean=10 # average mean return of 10%
risk_levels = range(1,15)

index = pd.MultiIndex.from_product([risk_levels,assets], 
                                   names=["risk_level","num_assets"])
simulated_data = pd.DataFrame(index=index)

for risk in risk_levels:
    for corr in np.arange(0.0,0.8,0.1):
        return_streams = correlated_streams(max_assets, mean, risk, corr)
        risk_level = np.zeros(max_assets)
        for num_assets in assets:
            risk_level[num_assets-1] = aggregate_risk(return_streams, num_assets)
        simulated_data.loc[(risk, ), round(corr,1)] = risk_level
simulated_data.columns.names=["correlation"]



In [42]:
simulated_data.query("risk_level==14")

Unnamed: 0_level_0,correlation,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7
risk_level,num_assets,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
14,1,13.940044,14.052212,14.019872,13.916254,14.11321,14.229461,13.913509,14.019948
14,2,9.906013,10.438326,10.743339,11.323233,11.860038,12.140276,12.476484,12.905944
14,3,8.080338,8.889536,9.548746,10.305909,10.954035,11.516892,11.989504,12.513196
14,4,6.925624,8.01958,8.86397,9.713392,10.470868,11.105878,11.731565,12.324642
14,5,6.205322,7.466439,8.37572,9.35221,10.186392,10.911455,11.574425,12.216736
14,6,5.700413,7.081689,8.065595,9.119634,9.96365,10.765848,11.471063,12.127635
14,7,5.301572,6.808721,7.826437,8.962939,9.805478,10.64908,11.387857,12.073691
14,8,4.957155,6.573748,7.638013,8.834362,9.705645,10.586701,11.335413,12.036084
14,9,4.676249,6.373963,7.502516,8.714214,9.611202,10.505008,11.277297,11.996258
14,10,4.452441,6.226457,7.380717,8.629474,9.518935,10.454021,11.230156,11.970614


In [60]:
def plot_risk_level(data:np.array, risk_level:int):
    subset = data.query(f"risk_level=={risk_level}")
    stacked = subset.stack().reset_index(name='risk')
    stacked.head()
    
    chart = alt.Chart(data=stacked)
    
    highlight = alt.selection(type='single',on='mouseover',
                                   fields=['correlation'], nearest=True)
    
    base = chart.encode(
        alt.X("num_assets", axis=alt.Axis(title="Number of Assets")),
        alt.Y("risk", axis=alt.Axis(title="Risk[%]")),
        alt.Color("correlation:N", scale=alt.Scale(scheme='set2'))
        )
    points = base.mark_circle().encode(
        opacity=alt.value(0)
        ).add_selection(
        highlight
    ).properties(
        height=400,
        width=600,
        title="Risk % by number of assets in portfolio"
    )
    lines = base.mark_line().encode(
        size=alt.condition(~highlight,alt.value(1),alt.value(3)),
    tooltip=["correlation"]
    )
    return points+lines

In [61]:
plot_risk_level(simulated_data, 10)