In [2]:
%config InLineBackend.figure_format='retina'

In [1]:
import numpy as np
import pandas as pd
import altair as alt
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

In [2]:
def correlated_streams(n: int, mean: float, risk: float, corr: float) -> np.array:
        """
        Generates n returns streams with a given average mean and risk 
        and with an average correlation level corr 
        """
        num_samples = 10_000
        means = np.full(n, mean)
        
        corr_mat = np.full((n,n),corr, dtype=np.dtype("d"))
        np.fill_diagonal(corr_mat, 1,)
        cov_mat = corr_mat * risk**2
        
        streams = np.random.multivariate_normal(means, cov_mat, size=num_samples)
        
        return streams.T
        

In [3]:
n=5
mean, std, corr = 10, 15, 0.6
streams = correlated_streams(n, mean, std, corr)

In [4]:
streams.mean(axis=1)

array([10.12229747,  9.92797016,  9.98877207, 10.05103342,  9.90978558])

In [5]:
streams.std(axis=1)

array([15.07254044, 15.05168254, 15.17926238, 15.2192544 , 15.14908131])

In [6]:
streams

array([[  2.68277374,  26.2961793 ,  11.78031229, ...,  11.55402765,
          1.2952331 ,   9.08595787],
       [ 15.30537123,   7.39542686,  -2.82396479, ...,  24.86409114,
         14.01618077,  15.55587422],
       [ -5.2118669 ,  15.43746428,  15.57786758, ...,  -0.3456664 ,
        -14.55323808,  17.66691734],
       [ -1.09300273,   2.52764164,  25.44016093, ...,   1.64963583,
         20.81314878,  -1.26545688],
       [  7.59665087,  12.82385898,  28.68642545, ...,   1.39094052,
         16.2208108 ,   5.443885  ]])

In [7]:
np.corrcoef(streams)

array([[1.        , 0.60676484, 0.61222918, 0.61179636, 0.60301561],
       [0.60676484, 1.        , 0.61036834, 0.61049393, 0.61073826],
       [0.61222918, 0.61036834, 1.        , 0.61526424, 0.61265281],
       [0.61179636, 0.61049393, 0.61526424, 1.        , 0.605607  ],
       [0.60301561, 0.61073826, 0.61265281, 0.605607  , 1.        ]])

In [8]:
def aggregate_risk(returns_streams: np.array, n:int) -> np.array:
    """
    Returns the ppoled risk (std) of the n first streams in return_streams
    """
    if len(returns_streams) < n:
        raise valueError(f"len of return_stream less than n: {n}")
    
    return (np.sum(returns_streams[:n], axis=0)/n).std()

In [10]:
max_assets = 20
assets = range(1, max_assets+1)

mean=10 # average mean return of 10%
risk_levels = range(1,15)

index = pd.MultiIndex.from_product([risk_levels,assets], 
                                   names=["risk_level","num_assets"])
simulated_data = pd.DataFrame(index=index)

for risk in risk_levels:
    for corr in np.arange(0.0,0.8,0.1):
        return_streams = correlated_streams(max_assets, mean, risk, corr)
        risk_level = np.zeros(max_assets)
        for num_assets in assets:
            risk_level[num_assets-1] = aggregate_risk(return_streams, num_assets)
        simulated_data.loc[(risk, ), round(corr,1)] = risk_level
simulated_data.columns.names=["correlation"]



In [11]:
simulated_data.query("risk_level==14")

Unnamed: 0_level_0,correlation,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7
risk_level,num_assets,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
14,1,14.139732,14.083466,14.075192,14.211693,14.016018,14.237709,14.047985,14.124923
14,2,9.97189,10.354355,10.939584,11.367708,11.798633,12.26122,12.441364,13.00799
14,3,8.109977,8.819959,9.606007,10.213402,10.950501,11.577389,11.903425,12.604708
14,4,6.986998,7.949406,8.896181,9.628273,10.510822,11.179527,11.647545,12.408229
14,5,6.264615,7.401336,8.439524,9.254206,10.242789,10.937341,11.493025,12.281325
14,6,5.716743,6.977136,8.096929,9.013415,10.042193,10.778337,11.398521,12.207259
14,7,5.295934,6.67127,7.88007,8.83433,9.888923,10.683292,11.307895,12.158006
14,8,4.947562,6.405502,7.712366,8.731689,9.763589,10.60242,11.246477,12.135535
14,9,4.661358,6.229094,7.544881,8.634614,9.666221,10.528369,11.204164,12.112906
14,10,4.44271,6.074949,7.430312,8.528163,9.601574,10.485156,11.166506,12.091789


In [12]:
def plot_risk_level(data:np.array, risk_level:int):
    subset = data.query(f"risk_level=={risk_level}")
    stacked = subset.stack().reset_index(name='risk')
    stacked.head()
    
    chart = alt.Chart(data=stacked)
    
    highlight = alt.selection(type='single',on='mouseover',
                                   fields=['correlation'], nearest=True)
    
    base = chart.encode(
        alt.X("num_assets", axis=alt.Axis(title="Number of Assets")),
        alt.Y("risk", axis=alt.Axis(title="Risk[%]")),
        alt.Color("correlation:N", scale=alt.Scale(scheme='set2'))
        )
    points = base.mark_circle().encode(
        opacity=alt.value(0)
        ).add_selection(
        highlight
    ).properties(
        height=400,
        width=600,
        title="Risk % by number of assets in portfolio"
    )
    lines = base.mark_line().encode(
        size=alt.condition(~highlight,alt.value(1),alt.value(3)),
    tooltip=["correlation"]
    )
    return points+lines

In [13]:
plot_risk_level(simulated_data, 10)

Your plot shows how diversification benefits to portfolios with assets that have a risk level of 10 percent.
More highly correlated portfolios do not benefit as much from increased diversification.
You get only small reduction by adding highly correlated assets beyond a total of three or four.
In contrast, you can have the risk by adding just six or seven uncorrelated, or more realistically, weakly correlated assets to a portfolio.
The benefits of diversification are reduced risk through exposure to different sources of trading revenue.


The insight that Dalio brings is that the construction of a diversified portfolio, through a combination of uncorrelated return streams, significantly reduces your overall risk raising in turn your return to risk or sharp ratio.
By the careful mixing of uncorrelated assets, you can capture this true low-risk Alpha.
This gives you the ability to add leverage and greatly increase your potential returns.
This is the strategy that Dalio and Bridgewater have used successfully in their [risk parity](https://www.bridgewater.com/research-and-insights/the-all-weather-story) approach.