## Use SX5E and bloomberg download data to get me started

In [417]:
import pandas as pd
import numpy as np
from datetime import datetime


In [436]:
#initial cleaning function
def clean_sheet(dataframe):
    adj=dataframe.dropna(axis=1,how='all')
    adj=dataframe.set_index(adj.columns[0]) #use stock tickers as the index
    return adj

In [437]:
#pull the data from excel, form a dictionary of tab names and dataframes
tabs_to_grab=['Quarter','Date','Reported EPS','Comp EPS','Consensus EPS','50d MA','3d return','Volume','Momentum',
             'Revision','Market cap','PE','property','Price']

data_dict={}

for s in tabs_to_grab:
    data_dict[s]=clean_sheet(pd.read_excel(r'C:\Users\Eric.Li\Documents\Post result code\Catalyst.xlsx',sheet_name=s))


In [438]:
#signal formation and use the date as the base dataframe
signal=data_dict['Date'][(data_dict['3d return']>2) & (data_dict['Revision']>0.02)]
signal=signal.dropna(axis=1,how='all')
signal=signal.dropna(axis=0,how='all')

In [441]:
#Stack signals into a list of tuples (stocks, dates), convert dates into a common format of datetime
signal_list=[]
for index, row in signal.iterrows():
    adj_row=row.dropna()
    for s in adj_row.values:
        ts=(s - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's') 
        adj_time=datetime.utcfromtimestamp(ts).strftime("%d/%b/%Y") #transform the datetime into a common format
        signal_list.append((adj_row.name,adj_time))

  


In [442]:
# grab the price time series and transform it into the same datetime format
price=data_dict["Price"]
new_column=pd.to_datetime(price.columns).strftime("%d/%b/%Y").tolist()
price.columns=new_column

In [443]:
# create a dictionary to have signal information (tuple) as keys and return data time series as values to make it easier to check
#; replace datetime with number of days post announcement to make it more comparable 
def signal_return_dict(signal_list,price_dataframe,holding):
    signal_dict={}
    for s in signal_list:
        price_series=price_dataframe.loc[s[0]]
        return_series=price_series.diff(1)/price_series
        day0=return_series.index.tolist().index(s[1]) #index of day 0
        target_series=return_series.iloc[day0:day0+min(holding, len(return_series[day0:]))]
        target_series.index=range(min(holding, len(return_series[day0:]))) #replace datetime with number of days post announcement
        target_series.name=s
        signal_dict[s]=target_series

    return signal_dict

return_dict=signal_return_dict(signal_list,price,63)

In [408]:
# obtain the dataframe of return time series for the signal stocks
return_series_list=[]
for index,key in return_dict.items():
    return_series_list.append(key)
    
return_df=pd.concat(return_series_list,axis=1)
return_df=return_df.dropna(how='any',axis=1) #drop stocks with limited history to make it more comparable

In [412]:
return_df.cumsum(axis=0)

Unnamed: 0_level_0,ADS GY,ALV GY,ASML NA,ASML NA,BAS GY,BMW GY,BMW GY,CRH ID,DG FP,DPW GY,...,NOKIA FH,PHIA NA,SAN FP,SAN SQ,SIE GY,SIE GY,SIE GY,TEF SQ,VIV FP,VIV FP
Unnamed: 0_level_1,08/Mar/2017,16/Feb/2017,19/Jul/2017,19/Oct/2016,24/Oct/2017,03/Aug/2017,04/May/2017,25/Aug/2016,07/Feb/2017,11/May/2016,...,02/Feb/2017,24/Oct/2016,28/Oct/2016,25/Jan/2017,04/Aug/2016,04/May/2016,25/Jan/2016,23/Feb/2017,31/Aug/2017,09/Nov/2016
0,0.086074,-0.002526,0.054700,0.021609,-0.009877,0.006551,0.004582,0.022873,-0.009200,0.021768,...,0.054382,0.041735,0.035979,0.038730,0.043005,0.013705,0.002038,0.017651,-0.004929,0.003596
1,0.104046,0.024808,0.072111,0.015351,0.000447,0.030767,0.024784,0.026893,0.035476,0.023657,...,0.050774,0.041369,0.029356,0.037043,0.056657,0.021321,0.081398,0.027224,0.044628,0.086083
2,0.111848,0.032428,0.068311,0.035309,0.017398,0.026446,0.033572,0.049646,0.043324,0.038180,...,0.042588,0.054917,0.030623,0.036113,0.068700,0.029522,0.057322,0.025661,0.035672,0.077121
3,0.114349,0.040890,0.069070,0.037694,0.030294,0.023847,0.029551,0.044546,0.042596,0.038180,...,0.066989,0.046905,0.041757,0.006508,0.091759,0.033392,0.034902,0.031257,0.038401,0.079675
4,0.089862,0.046897,0.067550,0.031853,0.028051,0.014855,0.040488,0.043228,0.051247,0.036501,...,0.075348,0.056642,0.047843,-0.003977,0.084172,0.048009,0.053614,0.057889,0.036413,0.058548
5,0.112134,0.042068,0.073972,0.030913,0.028051,0.014605,0.036273,0.034754,0.043400,0.044090,...,0.083205,0.046809,0.037642,0.006979,0.102785,0.047161,0.045266,0.056475,0.029908,0.060111
6,0.130981,0.030465,0.078110,0.032373,0.052944,0.013354,0.003031,0.047871,0.051758,-0.005444,...,0.077939,0.032027,0.047466,0.022665,0.102785,0.035470,0.028287,0.064988,0.027651,0.055138
7,0.131800,0.028629,0.054622,0.035698,0.052736,0.016471,0.004290,0.060978,0.044501,-0.000419,...,0.096035,0.024206,0.055972,0.031298,0.099517,0.039317,0.014374,0.075003,0.051157,0.014277
8,0.122155,0.033197,0.041356,0.038392,0.063250,0.013971,-0.003323,0.047863,0.034385,0.002472,...,0.099898,0.010807,0.111190,0.009647,0.087708,0.039317,0.018250,0.080427,0.062774,0.003261
9,0.118005,0.053776,0.039793,0.015388,0.067867,0.015718,-0.006331,0.047535,0.039055,0.009362,...,0.104597,0.022004,0.108981,-0.010675,0.082962,0.035348,0.011272,0.086310,0.089852,0.001329


In [416]:
return_df.columns[2]


('ASML NA', '19/Jul/2017')