In [1]:
import pandas as pd
import numpy as np
import datetime
import plotly.express as px

## Whole Dataframe

In [2]:
df = pd.read_csv("Dataframes/economic_story_dataframe.csv")
df

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual,Earnings Per Share - Mean,Earnings Per Share - Actual Surprise,Revenue - Actual,Enterprise Value,Long Term Growth - Mean,Number of Analysts,Recommendation - Mean (1-5),Recommendation - Mean (1-5).1,"WACC Inflation Adjusted Risk Free Rate, (%)",Net Income after Tax,Price Target - Mean,Price Close
0,POOL.OQ,2022-10-01,4.78,4.58750,4.196,1.615339e+09,1.406160e+10,,10.0,2.27273,2.200,3.747421,189965000.0,360.11111,318.21
1,POOL.OQ,2022-07-01,7.63,7.51700,1.503,2.055818e+09,1.556187e+10,,9.0,2.20000,2.000,3.092855,307205000.0,435.37500,351.23
2,POOL.OQ,2022-04-01,4.23,3.14867,34.342,1.412650e+09,1.843682e+10,,9.0,2.00000,2.300,2.325202,179203000.0,519.88889,422.85
3,POOL.OQ,2022-01-01,2.63,1.87500,40.267,1.035557e+09,2.384882e+10,,8.0,2.30000,2.300,1.515266,107542000.0,571.00000,566.00
4,POOL.OQ,2021-10-01,4.51,3.84833,17.194,1.411448e+09,1.769954e+10,,8.0,2.30000,2.300,1.527139,184573000.0,545.28571,434.41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20115,AVY.N,2014-01-01,0.69,0.68000,1.471,1.583900e+09,5.553068e+09,7.00,8.0,2.33333,2.625,,42100000.0,54.62500,50.19
20116,AVY.N,2013-10-01,0.69,0.63833,8.095,1.504900e+09,5.029514e+09,7.00,7.0,2.62500,2.875,,62000000.0,47.71429,43.52
20117,AVY.N,2013-07-01,0.71,0.70250,1.068,1.552300e+09,5.436229e+09,14.35,8.0,2.87500,2.875,,70800000.0,45.00000,42.76
20118,AVY.N,2013-04-01,0.59,0.57571,2.482,1.498900e+09,5.459128e+09,14.35,7.0,2.87500,3.000,,66800000.0,42.57143,43.07


# Creating subset dataframes for every factor

## Characteristics dataframe

In [3]:
characteristics = df.loc[:, ["Instrument", "Date", "Earnings Per Share - Actual Surprise", "Revenue - Actual", "Net Income after Tax"]] #  "Enterprise Value",
characteristics

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual Surprise,Revenue - Actual,Net Income after Tax
0,POOL.OQ,2022-10-01,4.196,1.615339e+09,189965000.0
1,POOL.OQ,2022-07-01,1.503,2.055818e+09,307205000.0
2,POOL.OQ,2022-04-01,34.342,1.412650e+09,179203000.0
3,POOL.OQ,2022-01-01,40.267,1.035557e+09,107542000.0
4,POOL.OQ,2021-10-01,17.194,1.411448e+09,184573000.0
...,...,...,...,...,...
20115,AVY.N,2014-01-01,1.471,1.583900e+09,42100000.0
20116,AVY.N,2013-10-01,8.095,1.504900e+09,62000000.0
20117,AVY.N,2013-07-01,1.068,1.552300e+09,70800000.0
20118,AVY.N,2013-04-01,2.482,1.498900e+09,66800000.0


In [4]:
characteristics["Net Income after Tax"] = characteristics["Net Income after Tax"].fillna(1)
characteristics["loss firm status"] = [1 if x > 1 else -1 if x <= 0 else 0 for x in characteristics["Net Income after Tax"]]
characteristics = characteristics.drop("Net Income after Tax", axis=1)

In [5]:
characteristics = characteristics.dropna()
characteristics["loss firm status"] = [-1 if x > 0 else 1 if x < -1 else 0 for x in characteristics["loss firm status"]]
characteristics

# loss firm status:
# -1 = not a loss firm (is profitable)
# 1 = loss firm (negative profit or profit is equal to 0!!)
# 0 = value of NA, but we can change this to make NA values loss firm, or we drop them altogether

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual Surprise,Revenue - Actual,loss firm status
0,POOL.OQ,2022-10-01,4.196,1.615339e+09,-1
1,POOL.OQ,2022-07-01,1.503,2.055818e+09,-1
2,POOL.OQ,2022-04-01,34.342,1.412650e+09,-1
3,POOL.OQ,2022-01-01,40.267,1.035557e+09,-1
4,POOL.OQ,2021-10-01,17.194,1.411448e+09,-1
...,...,...,...,...,...
20115,AVY.N,2014-01-01,1.471,1.583900e+09,-1
20116,AVY.N,2013-10-01,8.095,1.504900e+09,-1
20117,AVY.N,2013-07-01,1.068,1.552300e+09,-1
20118,AVY.N,2013-04-01,2.482,1.498900e+09,-1


In [6]:
characteristics.to_csv("Dataframes/characteristics.csv", index=False)

### Analysts dataframe

In [7]:
analysts = df.loc[:, ["Instrument", "Date", "Earnings Per Share - Actual Surprise", "Number of Analysts", "Recommendation - Mean (1-5)", "Recommendation - Mean (1-5).1"]]
#analysts

In [8]:
analysts["Recommendation change"] = analysts["Recommendation - Mean (1-5)"] - analysts["Recommendation - Mean (1-5).1"]
analysts.insert(5, "Recommendation change", analysts.pop("Recommendation change"))
analysts = analysts.drop("Recommendation - Mean (1-5).1", axis=1)
analysts

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual Surprise,Number of Analysts,Recommendation - Mean (1-5),Recommendation change
0,POOL.OQ,2022-10-01,4.196,10.0,2.27273,0.07273
1,POOL.OQ,2022-07-01,1.503,9.0,2.20000,0.20000
2,POOL.OQ,2022-04-01,34.342,9.0,2.00000,-0.30000
3,POOL.OQ,2022-01-01,40.267,8.0,2.30000,0.00000
4,POOL.OQ,2021-10-01,17.194,8.0,2.30000,0.00000
...,...,...,...,...,...,...
20115,AVY.N,2014-01-01,1.471,8.0,2.33333,-0.29167
20116,AVY.N,2013-10-01,8.095,7.0,2.62500,-0.25000
20117,AVY.N,2013-07-01,1.068,8.0,2.87500,0.00000
20118,AVY.N,2013-04-01,2.482,7.0,2.87500,-0.12500


In [9]:
analysts.to_csv("Dataframes/analysts.csv", index=False)

### Macroeconomic factors dataframe

In [10]:
macro = df.loc[:, ["Instrument", "Date", "Earnings Per Share - Actual Surprise", "WACC Inflation Adjusted Risk Free Rate, (%)"]]
macro

Unnamed: 0,Instrument,Date,Earnings Per Share - Actual Surprise,"WACC Inflation Adjusted Risk Free Rate, (%)"
0,POOL.OQ,2022-10-01,4.196,3.747421
1,POOL.OQ,2022-07-01,1.503,3.092855
2,POOL.OQ,2022-04-01,34.342,2.325202
3,POOL.OQ,2022-01-01,40.267,1.515266
4,POOL.OQ,2021-10-01,17.194,1.527139
...,...,...,...,...
20115,AVY.N,2014-01-01,1.471,
20116,AVY.N,2013-10-01,8.095,
20117,AVY.N,2013-07-01,1.068,
20118,AVY.N,2013-04-01,2.482,


In [11]:
macro.to_csv("Dataframes/macro.csv", index=False)

In [12]:
# df['ESG Score'] = pd.Series()
# df['Governance Pillar Score'] = pd.Series()
# df['Independent Board Members'] = pd.Series()
#
# for i in range(len(df2)):
#     for j in range(len(df)):
#         if df.loc[j, "Instrument"] == df2.loc[i, "Instrument"] and df.loc[j, "Date"].year == df2.loc[i, "Date"].year:
#             df.loc[j, "ESG Score"] = df2.loc[i, "ESG Score"]
#             df.loc[j, "Governance Pillar Score"] = df2.loc[i, "Governance Pillar Score"]
#             df.loc[j, "Independent Board Members"] = df2.loc[i, "Independent Board Members"]
#
# df

#Comment: Takes too much time