In [175]:
df = pd.read_csv('../data/engineered_factset_campaign_data.csv', parse_dates=[
    '18_months_pre_announcement_date',
    '1_year_pre_announcement_date',
    '6_months_pre_announcement_date',
    '90_days_pre_announcement_date',
    'campaign_announcement_date',
    '6_months_post_announcement_date',
    '1_year_post_announcement_date',
    '18_months_post_announcement_date'
])

In [82]:
df_sp = pd.read_csv('../data/sp.csv', parse_dates=['Date'])

df_sp = (
    df_sp
    .loc[:, ['Date', 'Adj Close']]
    .rename(columns={
        'Date': 'date',
        'Adj Close': 'price',
    })
    .sort_values(['date'])
    .assign(sp_daily_return=lambda df: df.price.pct_change())
)

In [101]:
df_price = pd.read_csv('../data/FactSet_Pricing.txt', parse_dates=['FSDate'])

df_price = (
    df_price
    .rename(columns={
        'FactSetID': 'company_id',
        'FSDate': 'date',
        'FGPRICE': 'price',
        'FGVolume': 'volume'
    })
    .sort_values(['company_id', 'date'])
    .assign(price=lambda df: df.price.astype(float))
    .assign(daily_return=lambda df: df.groupby('company_id').price.pct_change())
    .assign(daily_return=lambda df: df.daily_return.clip(-0.50, 0.50))
    .pipe(pd.merge, df_sp[['date', 'sp_daily_return']], how='left', on='date')
)

In [None]:
df_price['beta'] = (
    pd.rolling_cov(df_price['daily_return'], df_price['sp_daily_return'], window=250) /
    pd.rolling_var(df_price['sp_daily_return'], window=250)
)

In [141]:
df_price_subset = (
    pd.merge(
        df_price,
        df[[
            'campaign_id',
            'company_id',
            '18_months_pre_announcement_date',
            '1_year_pre_announcement_date',
            '6_months_pre_announcement_date',
            '90_days_pre_announcement_date',
            'campaign_announcement_date',
            '6_months_post_announcement_date',
            '1_year_post_announcement_date',
            '18_months_post_announcement_date'
        ]],
        on=['company_id'],
        how='left'
    )
    .loc[lambda df: df.date.between(df['18_months_pre_announcement_date'], df['18_months_post_announcement_date'])]
)

In [144]:
def calculate_beta(gb):
    return pd.Series({
        '18_months_pre_announcement_date': gb['18_months_pre_announcement_date'].iloc[0],
        'campaign_announcement_date': gb['campaign_announcement_date'].iloc[0],
        '18_months_post_announcement_date': gb['18_months_post_announcement_date'].iloc[0],
        '18_months_pre_date_market_return': gb.loc[lambda df: df.date.between(df['18_months_pre_announcement_date'], df['campaign_announcement_date'])].sp_daily_return.sum(),
        '1_year_pre_date_market_return': gb.loc[lambda df: df.date.between(df['1_year_pre_announcement_date'], df['campaign_announcement_date'])].sp_daily_return.sum(),
        '6_months_pre_date_market_return': gb.loc[lambda df: df.date.between(df['6_months_pre_announcement_date'], df['campaign_announcement_date'])].sp_daily_return.sum(),
        '6_months_post_date_market_return': gb.loc[lambda df: df.date.between(df['campaign_announcement_date'], df['6_months_post_announcement_date'])].sp_daily_return.sum(),
        '1_year_post_date_market_return': gb.loc[lambda df: df.date.between(df['campaign_announcement_date'], df['1_year_post_announcement_date'])].sp_daily_return.sum(),
        '18_months_post_date_market_return': gb.loc[lambda df: df.date.between(df['campaign_announcement_date'], df['18_months_post_announcement_date'])].sp_daily_return.sum(),
        'beta': gb[['daily_return', 'sp_daily_return']].dropna().cov().iloc[0, 1] / gb[['sp_daily_return']].var().iloc[0]
    })

df_price_beta = df_price_subset.groupby(['campaign_id', 'company_id']).apply(calculate_beta)

In [145]:
df_price_beta.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,18_months_pre_announcement_date,campaign_announcement_date,18_months_post_announcement_date,18_months_pre_date_market_return,1_year_pre_date_market_return,6_months_pre_date_market_return,6_months_post_date_market_return,1_year_post_date_market_return,18_months_post_date_market_return,beta
campaign_id,company_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0000396364C,000DRZ-E,2006-07-22,2008-01-22,2009-07-22,0.070913,-0.073259,-0.146721,-0.026223,-0.370881,-0.201813,0.550437
0000411278C,000DS9-E,2010-11-29,2012-05-29,2013-11-29,0.143743,0.027883,0.11586,0.076779,0.231829,0.326371,0.548463
0000556550C,001NXZ-E,2006-09-24,2008-03-24,2009-09-24,0.046874,-0.043876,-0.11054,-0.099703,-0.402938,-0.12423,0.764134
0000719478C,00286W-E,2010-02-05,2011-08-05,2013-02-05,0.141713,0.072626,-0.082935,0.02664,0.02664,0.02664,0.820827
0000792044C,0015VV-E,2002-10-21,2004-04-21,2005-10-21,0.237738,0.201746,0.081929,-0.015247,-0.006055,0.005784,0.154293


In [156]:
df_price_beta.to_csv('../data/factset_campaign_betas.csv')