# Use SX5E and bloomberg download data to get me started

In [46]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [47]:
#initial cleaning function
def clean_sheet(dataframe):
    adj=dataframe.dropna(axis=1,how='all')
    adj=dataframe.set_index(adj.columns[0]) #use stock tickers as the index
    return adj

In [202]:
#pull the data from excel, form a dictionary of tab names and dataframes
tabs_to_grab=['Quarter','Date','Reported EPS','Comp EPS','Consensus EPS','50d MA','3d return','3d rel return','Volume','Momentum',
             'Revision','Market cap','PE','Property','Price']

data_dict={}

for s in tabs_to_grab:
    data_dict[s]=clean_sheet(pd.read_excel(r'C:\Users\Eric.Li\Documents\Post result data\2016-2018.xlsx',sheet_name=s))

In [203]:
# grab the price time series and transform it into the same datetime format; also generate abs_return, rel_return dataframe
price=data_dict["Price"].dropna(axis=1)
new_column=[i.strftime("%d/%b/%Y") for i in price.columns.tolist() if type(i)!=str]
price.columns=new_column


## Signal 2 - based on last quarter's standing

In [204]:
## grab the date 
date=data_dict["Date"].dropna(axis=1,how='all')
adj_date=date.fillna(pd.Timestamp('1900-1-1'))

In [205]:
## form the signal 2: separate date range into a unit of 3 months, evaluate the the top and bottom quartile values, use the
# dataframe for the next quarter to generate signals, and put them into long and short buckets
start_date=pd.Timestamp(2015,12,31)
end_date=pd.Timestamp(2018,3,31)
per='Q'


def signal2_build(date_df,return_df,start_date,end_date,top_quartile,bottom_quartile):
    dt_rng=pd.date_range(start_date,end_date,freq=per)
    long_list=[]
    short_list=[]
    for s in range(len(dt_rng)-2):
        rank_list=[]
        rank=return_df[(date_df>=dt_rng[s])&(date_df<=dt_rng[s+1])]
        signal=date_df[(date_df>=dt_rng[s+1])&(date_df<=dt_rng[s+2])]
        signal_value=return_df[(date_df>=dt_rng[s+1])&(date_df<=dt_rng[s+2])]
        for index,row in rank.iterrows():
            adj_row=row.dropna()
            for val in adj_row.values:
                rank_list.append(val)
        rank_series=pd.Series(rank_list)
        top=signal[signal_value>=np.percentile(rank_series,top_quartile)].dropna(axis=1,how='all').dropna(axis=0,how='all')
        bottom=signal[signal_value<=np.percentile(rank_series,bottom_quartile)].dropna(axis=1,how='all').dropna(axis=0,how='all')
        long_list.append(top)
        short_list.append(bottom)
    long_signal=pd.concat(long_list,axis=1)
    short_signal=pd.concat(short_list,axis=1)
    return long_signal, short_signal
                
long_signal,short_signal=signal2_build(adj_date,data_dict["3d rel return"],start_date,end_date,80,20)   


In [206]:
#Stack signals into a list of tuples (stocks, dates), convert dates into a common format of datetime
def signal2_stacking(signal):
    signal2_list=[]
    for index, row in signal.iterrows():
        adj_row=row.dropna()
        for s in adj_row.values:
            ts=(s - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's') 
            adj_time=datetime.utcfromtimestamp(ts).strftime("%d/%b/%Y") #transform the datetime into a common format
            signal2_list.append((adj_row.name,adj_time))
    return signal2_list

signal2_long_list=signal2_stacking(long_signal)
signal2_short_list=signal2_stacking(short_signal)

  import sys


In [207]:
# grab the price time series and transform it into the same datetime format; also generate abs_return, rel_return dataframe
price=data_dict["Price"].dropna(axis=1)
new_column=[i.strftime("%d/%b/%Y") for i in price.columns.tolist() if type(i)!=str]
price.columns=new_column

abs_return=price.diff(1,axis=1)/price
abs_return=abs_return.dropna(how='all',axis=1)
rel_return=abs_return - abs_return.iloc[-1]
rel_return=rel_return.dropna(how='all',axis=1)

In [208]:
# create a dictionary to have signal information (tuple) as keys and return data time series as values to make it easier to check
#; replace datetime with number of days post announcement to make it more comparable 
def signal2_return_dict(signal2_list,return_dataframe,holding):
    signal2_dict={}
    for s in signal2_list:
        return_series=return_dataframe.loc[s[0]]
        day0=return_series.index.tolist().index(s[1]) #index of day 0
        target_series=return_series.iloc[day0:day0+min(holding, len(return_series[day0:]))]
        target_series.index=range(min(holding, len(return_series[day0:]))) #replace datetime with number of days post announcement
        target_series.name=s
        signal2_dict[s]=target_series
    return signal2_dict

In [209]:
signal2_return_dict(signal2_long_list,abs_return,60)

{('ABI BB', '01/Mar/2018'): 0    0.021805
 1   -0.007548
 2    0.015637
 3    0.004856
 4   -0.001215
 Name: (ABI BB, 01/Mar/2018), dtype: float64,
 ('ABI BB', '04/May/2017'): 0     0.049496
 1     0.001830
 2    -0.002752
 3     0.002745
 4    -0.000916
 5    -0.001376
 6     0.002743
 7    -0.005055
 8    -0.002765
 9    -0.006961
 10   -0.032088
 11    0.012299
 12   -0.003799
 13    0.002841
 14   -0.000474
 15    0.000000
 16    0.000000
 17   -0.004760
 18   -0.008641
 19   -0.002889
 20    0.002880
 21    0.007150
 22   -0.001910
 23    0.000954
 24   -0.007208
 25   -0.023610
 26   -0.000985
 27   -0.005944
 28    0.003455
 29   -0.000988
 30    0.003447
 31    0.000492
 32    0.005384
 33    0.006323
 34   -0.004888
 35   -0.000978
 36   -0.012382
 37    0.010779
 38   -0.011899
 39   -0.005985
 40   -0.027467
 41   -0.008893
 42    0.006064
 43    0.000719
 44   -0.002781
 45   -0.011248
 46    0.013763
 47    0.009563
 48   -0.018653
 49    0.020205
 50    0.006256
 51    0.

[('ABI BB', '29/Jul/2016'),
 ('ABI BB', '04/May/2017'),
 ('ABI BB', '27/Jul/2017'),
 ('ABI BB', '01/Mar/2018'),
 ('AD NA', '01/Jun/2016'),
 ('AD NA', '25/Aug/2016'),
 ('AD NA', '08/Nov/2017'),
 ('AD NA', '28/Feb/2018'),
 ('ADS GY', '08/Mar/2017'),
 ('ADS GY', '09/Nov/2017'),
 ('AIR FP', '15/Feb/2018'),
 ('ALV GY', '05/Aug/2016'),
 ('ALV GY', '16/Feb/2017'),
 ('ASML NA', '18/Jan/2017'),
 ('ASML NA', '19/Jul/2017'),
 ('ASML NA', '17/Jan/2018'),
 ('BAS GY', '24/Oct/2017'),
 ('BAYN GY', '27/Jul/2016'),
 ('BN FP', '28/Jul/2016'),
 ('CRH ID', '25/Aug/2016'),
 ('DG FP', '29/Jul/2016'),
 ('DG FP', '07/Feb/2017'),
 ('DPW GY', '11/May/2016'),
 ('DPW GY', '03/Aug/2016'),
 ('DTE GY', '11/May/2017'),
 ('DTE GY', '03/Aug/2017'),
 ('DTE GY', '09/Nov/2017'),
 ('EI FP', '01/Mar/2018'),
 ('ENEL IM', '28/Jul/2016'),
 ('ENGI FP', '02/Mar/2017'),
 ('ENGI FP', '08/Mar/2018'),
 ('ENI IM', '01/Mar/2017'),
 ('ENI IM', '28/Jul/2017'),
 ('EOAN GY', '09/Aug/2017'),
 ('FP FP', '27/Oct/2017'),
 ('FRE GY', '03/May/2