In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
df = pd.read_csv('data.csv')
df = df.dropna()  # VRTX has null PX_LAST in 2015-05-12
df.head()

Unnamed: 0,Date,PX_LAST,CUR_MKT_CAP,PX_VOLUME,Ticker,Market_Value
0,2014-01-02,76.27,134027.7199,5212460.0,DIS,10222290.0
1,2014-01-03,76.11,133746.5552,4061042.0,DIS,10179450.0
2,2014-01-06,75.82,133236.9441,6816693.0,DIS,10102030.0
3,2014-01-07,76.34,134150.7295,4511157.0,DIS,10241070.0
4,2014-01-08,75.22,132182.5763,10914858.0,DIS,9942773.0


In [3]:
tickers = df['Ticker'].unique()
tickers

array(['DIS', 'UHS', 'GOOGL', 'LOW', 'BSX', 'JPM', 'EOG', 'SBAC', 'STT',
       'DVN', 'VRTX', 'BK', 'ALL', 'WY', 'OXY', 'ON', 'EXC', 'UAL',
       'AKAM', 'SMCI', 'LYV', 'AMD', 'VLO', 'NI', 'DRI', 'SJM', 'APH',
       'MAS', 'CMG', 'TXT'], dtype=object)

In [4]:
n = random.randint(1, len(tickers))
n

16

In [5]:
random_tickers = random.sample(list(tickers), n)
random_tickers

['UAL',
 'DVN',
 'LYV',
 'SMCI',
 'MAS',
 'SBAC',
 'ON',
 'UHS',
 'ALL',
 'DRI',
 'VLO',
 'APH',
 'VRTX',
 'CMG',
 'DIS',
 'GOOGL']

In [7]:
def compute_ex_post_returns(df: pd.DataFrame, tickers: list) -> pd.Series:
    selected_data = df[df['Ticker'].isin(tickers)].copy()
    selected_data['Return'] = selected_data.groupby('Ticker')['PX_LAST'].pct_change()  # Daily percentual returns
    selected_data = selected_data.dropna(subset=['Return'])
    print(selected_data)
    ex_post_returns = selected_data.groupby('Date')['Return'].mean()  # Ex-post returns on an equal-weighted portfolio
    
    return ex_post_returns

In [8]:
ex_post_returns = compute_ex_post_returns(df, random_tickers)
ex_post_returns

             Date  PX_LAST  CUR_MKT_CAP   PX_VOLUME Ticker  Market_Value  \
1      2014-01-03   76.110  133746.5552   4061042.0    DIS  1.017945e+07   
2      2014-01-06   75.820  133236.9441   6816693.0    DIS  1.010203e+07   
3      2014-01-07   76.340  134150.7295   4511157.0    DIS  1.024107e+07   
4      2014-01-08   75.220  132182.5763  10914858.0    DIS  9.942773e+06   
5      2014-01-09   74.900  131620.2468   8077726.0    DIS  9.858356e+06   
...           ...      ...          ...         ...    ...           ...   
72959  2023-12-22   46.087   63242.1023   5231100.0    CMG  2.914639e+06   
72960  2023-12-26   46.390   63658.4378   5337400.0    CMG  2.953115e+06   
72961  2023-12-27   46.149   63326.9063   7292700.0    CMG  2.922473e+06   
72962  2023-12-28   45.568   62529.3645   7513250.0    CMG  2.849338e+06   
72963  2023-12-29   45.739   62764.8396   9245250.0    CMG  2.870801e+06   

         Return  
1     -0.002098  
2     -0.003810  
3      0.006858  
4     -0.014671

Date
2014-01-03    0.006294
2014-01-06   -0.003118
2014-01-07    0.009601
2014-01-08    0.007944
2014-01-09    0.005338
                ...   
2023-12-22   -0.000352
2023-12-26    0.006601
2023-12-27   -0.002660
2023-12-28   -0.001217
2023-12-29   -0.005606
Name: Return, Length: 2515, dtype: float64

In [8]:
def compute_portfolio_variance(ex_post_returns: pd.Series) -> float:
    return ex_post_returns.var()

In [9]:
portfolio_variance = compute_portfolio_variance(ex_post_returns)
portfolio_variance

0.00019062323047179975

In [10]:
# Repeat the process 10 times
def simulate_portfolio_variances(df: pd.DataFrame, n: int, repetitions: int = 10) -> list:
    portfolio_variances = []
    for _ in range(repetitions):
        random_tickers = random.sample(list(tickers), n)
        ex_post_returns = compute_ex_post_returns(df, random_tickers)
        portfolio_variance = compute_portfolio_variance(ex_post_returns)
        portfolio_variances.append(portfolio_variance)
    return portfolio_variances

In [11]:
portfolio_variances = simulate_portfolio_variances(df, n)
portfolio_variances

[0.00017626452520553613,
 0.00017915776792694398,
 0.0001816863890711591,
 0.00022457248975621885,
 0.00017956523579215354,
 0.00020893693874472774,
 0.00020332250609331114,
 0.00020086232915445386,
 0.00018771253230115,
 0.00020256495403626144]

In [12]:
# Repeat for n = 1, 2, ..., 30
def simulate_variances_accross_portfolio_sizes(df: pd.DataFrame, max_n: int, repetitions: int = 10) -> pd.DataFrame:
    all_variances = {}
    
    for n in range(1, max_n + 1):
        variances = simulate_portfolio_variances(df, n, repetitions)
        all_variances[n] = variances
        
    return pd.DataFrame(all_variances)

In [13]:
all_variances = simulate_variances_accross_portfolio_sizes(df, 30)
all_variances

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,21,22,23,24,25,26,27,28,29,30
0,0.000561,0.000294,0.000187,0.000284,0.000175,0.000234,0.000191,0.000167,0.000271,0.000212,...,0.000174,0.000174,0.000194,0.000188,0.00019,0.000177,0.000189,0.000172,0.000178,0.000176
1,0.000568,0.000268,0.000161,0.000129,0.000208,0.000212,0.000191,0.000227,0.000159,0.000211,...,0.000177,0.000165,0.000195,0.000172,0.000183,0.000187,0.000161,0.000178,0.000176,0.000176
2,0.000244,0.000534,0.00015,0.000309,0.000159,0.000207,0.000134,0.000207,0.000188,0.00018,...,0.000183,0.000197,0.000171,0.000183,0.000189,0.000182,0.000177,0.000183,0.00017,0.000176
3,0.000282,0.000429,0.000451,0.000195,0.000215,0.000181,0.000224,0.000255,0.000235,0.000187,...,0.000185,0.000204,0.000176,0.000176,0.000192,0.000181,0.000169,0.00018,0.000181,0.000176
4,0.000282,0.000263,0.000195,0.000291,0.000134,0.000289,0.000203,0.000188,0.000193,0.000173,...,0.00021,0.000187,0.000174,0.000189,0.000185,0.00017,0.000175,0.000178,0.000176,0.000176
5,0.000957,0.000224,0.000233,0.000271,0.00031,0.0002,0.000197,0.000218,0.000216,0.00023,...,0.000213,0.000184,0.000174,0.000172,0.000179,0.000193,0.000169,0.000184,0.00018,0.000176
6,0.000294,0.000192,0.000348,0.000363,0.000224,0.000248,0.000251,0.000288,0.000155,0.000251,...,0.000189,0.000182,0.000174,0.000187,0.00018,0.000172,0.00017,0.000178,0.000175,0.000176
7,0.000917,0.000398,0.000145,0.000146,0.000297,0.000197,0.000136,0.000238,0.00026,0.000191,...,0.000174,0.000188,0.000178,0.000167,0.000179,0.00019,0.000182,0.000178,0.000177,0.000176
8,0.000391,0.000556,0.00025,0.000382,0.000141,0.000208,0.000201,0.000196,0.000206,0.000179,...,0.000188,0.000176,0.000171,0.000188,0.000176,0.000178,0.000176,0.000181,0.000171,0.000176
9,0.000308,0.000186,0.000501,0.000233,0.000275,0.000233,0.000193,0.000199,0.000165,0.000173,...,0.000181,0.000162,0.000182,0.000186,0.000171,0.000174,0.000179,0.000172,0.00017,0.000176
