In [2]:
import os
import numpy as np
import pandas as pd

# Load the data
def load_data(ticker):
    dir = os.path.join('Database', 'US', f"{ticker}-1d-5.csv")
    data = pd.read_csv(dir, index_col=0)
    data.index = pd.to_datetime(data.index, utc=True)
    data.index = data.index.tz_convert('US/Eastern')

    return data


tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'CRM',
           'DIS', 'CVX', 'COP', 'CMCSA', 'DHR']
market = '^GSPC'

In [34]:
import statsmodels.api as sm

tickers_data = {}
for ticker in tickers:
    tickers_data[ticker] = load_data(ticker)

market_data = load_data(market)

def calculate_beta(stock_returns, market_returns):
    model = sm.OLS(stock_returns, sm.add_constant(market_returns))
    results = model.fit()

    # Extract the beta
    beta = results.params.values[1]

    idio_vol = np.std(results.resid)

    return beta, idio_vol


def add_beta(ticker_data, market_data):
    period = 66
    beta_holder = {}
    iv_holder = {}

    merged_df = pd.merge(
        ticker_data['Close'],
        market_data['Close'],
        how = 'left',
        left_index=True, right_index=True,
        suffixes=('_stock', '_market')
        )
    merged_df.dropna(inplace=True)
                         
    for i, date in enumerate(merged_df.index):
        if i < period:
            continue

        stock_returns = merged_df.iloc[i-period:i]['Close_stock'].pct_change()
        market_returns = merged_df.iloc[i-period:i]['Close_market'].pct_change()

        stock_returns.dropna(inplace=True)
        market_returns.dropna(inplace=True)

        beta, idio_vol = calculate_beta(stock_returns, market_returns)
        beta_holder[date] = beta

    beta_holder = pd.Series(beta_holder)
    beta_holder.name = 'Beta'

    beta_holder.index = pd.to_datetime(beta_holder.index, utc=True)
    beta_holder.index = beta_holder.index.tz_convert('US/Eastern')

    iv_holder = pd.Series(iv_holder)
    iv_holder.name = 'IdioVol'
    iv_holder.index = pd.to_datetime(iv_holder.index, utc=True)
    iv_holder.index = iv_holder.index.tz_convert('US/Eastern')

    # Merge the beta with the original data
    ticker_data = pd.merge(ticker_data, beta_holder, how='left', left_index=True, right_index=True)
    ticker_data = pd.merge(ticker_data, iv_holder, how='left', left_index=True, right_index=True)

    return ticker_data

for ticker, ticker_data in tickers_data.items():
    print (ticker, "is about to be processed.")
    tickers_data[ticker] = add_beta(ticker_data, market_data)


AAPL is about to be processed.
MSFT is about to be processed.
GOOG is about to be processed.
AMZN is about to be processed.
CRM is about to be processed.
DIS is about to be processed.
CVX is about to be processed.
COP is about to be processed.
CMCSA is about to be processed.
DHR is about to be processed.


In [46]:
start_dates = []
end_dates = []
df = tickers_data['AAPL']

for year in df.index.year.unique():
    for month in range(1, 13):
        df_tm = df[(df.index.year == year) & (df.index.month == month)]

        if len(df_tm) == 0:
            continue

        start_dates.append(df_tm.index[0])
        end_dates.append(df_tm.index[-1])
        

betas_holder = []
iv_holder = []
for ticker in tickers:
    betas_holder.append(tickers_data[ticker]['Beta'])
    iv_holder.append(tickers_data[ticker]['IdioVol'])

betas_holder = pd.concat(betas_holder, axis=1)
betas_holder.columns = tickers

iv_holder = pd.concat(iv_holder, axis=1)
iv_holder.columns = tickers

N = 2
returns = []
for date in start_dates:

    day_returns = []
    top_n_beta = betas_holder.loc[date].nlargest(N).index.tolist()
    bottom_n_beta = betas_holder.loc[date].nsmallest(N).index.tolist()

    top_n_iv = iv_holder.loc[date, top_n_beta].nlargest(N).index.tolist()
    bottom_n_iv = iv_holder.loc[date, bottom_n_beta].nlargest(N).index.tolist()

    for ticker in top_n_beta:
        df_tmp = tickers_data[ticker].loc[date:, 'Close'].iloc[:30]
        day_returns.append(-1*(df_tmp.iloc[-1] / df_tmp.iloc[0] - 1))

    for ticker in bottom_n_beta:
        df_tmp = tickers_data[ticker].loc[date:, 'Close'].iloc[:30]
        day_returns.append(df_tmp.iloc[-1] / df_tmp.iloc[0] - 1)

    returns.append(np.mean(day_returns))


sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252)

print (sharpe)



-1.671081325177757
