# Import Libraries

In [None]:
import pandas as pd # For DataFrame Manipulation

#Libraries to download data from Yahoo Finance
from yahoo_fin.stock_info import *
import yfinance as yf

#Libraries for System functions
import datetime
import time
import os, sys
import shutil

from tqdm.notebook import tqdm #Library to see progress of loop iterations

#Libraries for Treemap
from functools import partial
from d3IpyPlus import *
#d3IpyPlus was found in https://github.com/maclandrol/d3IpyPlus
## One just needs to include the file d3IpyPlus.py in the same path as the Jupyter Notebook's file

#Libraries for Table
from bokeh.io import show, save, output_file
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, TableColumn

# Initial Setup

In [None]:
#Create a Data Folder
Data_folder = os.path.abspath(os.getcwd() +'/Data/')
if not os.path.exists(Data_folder):
    os.makedirs(Data_folder)

#Clean older files and folders in the Data Folder
filelist = [ f for f in os.listdir(Data_folder)]
for f in filelist:
    shutil.rmtree(os.path.join(Data_folder, f), ignore_errors=True)

filelist = [ f for f in os.listdir(Data_folder)]
for f in filelist:
    os.remove(os.path.join(Data_folder, f))

#Create New Folder for EuroStoxx
eurostoxx_folder = os.path.join(Data_folder, 'EuroStoxx')
if not os.path.exists(eurostoxx_folder):
    os.makedirs(eurostoxx_folder)
    
#Create New Folder for SP500
sp500_folder = os.path.join(Data_folder, 'SP500')
if not os.path.exists(sp500_folder):
    os.makedirs(sp500_folder)
    
#Create New Folder for Currencies
currencies_folder = os.path.join(Data_folder, 'Currencies')
if not os.path.exists(currencies_folder):
    os.makedirs(currencies_folder)

In [None]:
#Define the file name with information (Company name, Stock Ticker and so on)
file_name = 'Stoxx600.csv'

In [None]:
#Obtain the Sector and Industry for one Ticker based on the Excel File
def GICS(ticker):
    GICS_table = pd.read_csv(file_name, index_col = 'Yahoo_Ticker')
    name = GICS_table[GICS_table.index==ticker]['Name'][0]
    sector = GICS_table[GICS_table.index==ticker]['Sector'][0]
    country = GICS_table[GICS_table.index==ticker]['Country'][0]
    currency = GICS_table[GICS_table.index==ticker]['Currency_Yahoo'][0]
    Ticker_ID = GICS_table[GICS_table.index==ticker]['Ticker'][0]
    return name, sector, country, currency, Ticker_ID

In [None]:
#Formula to Convert variables to number values
def converter(variable):
    convert_matrix = {'%': 1, 'K': 1000, 'k': 1000, 'M': 1000000, 'B': 1000000000, 'T': 1000000000000}
    if pd.isnull(variable):
        variable = 'nan'
    elif isinstance(variable, float):
        variable = variable
    else:
        variable = variable.replace(',', '')
        units = variable[-1]
        if (units == '%' or units == 'K' or units == 'k' or units == 'M' or units == 'B' or units == 'T'):
            variable = round(float(variable[:-1])*convert_matrix[units],2)
        else:
            variable = round(float(variable),2)
    return variable

# EuroStoxx

## Obtaining the Stock Market Data

In [None]:
#Define the List of tickers
Index_data = pd.read_csv(file_name)

tickers = Index_data['Yahoo_Ticker'].drop_duplicates().tolist()
currencies = Index_data['Currency_Yahoo'].drop_duplicates().tolist()

In [None]:
#Loop through the tickers' list (tqdm gives the progress bar)
for ticker in tqdm(tickers):
    try:
        ##Obtain information on the Name, Sector and Industry of each Ticker
        name, sector, country, currency, Ticker_ID = GICS(ticker)
        
        ##Get Data from Yahoo: Key Stats Table and the Historical Price Data
        key_stats = get_stats(ticker)
        Shares_Outs = key_stats.loc[18][-1]

        price = yf.download(ticker, start="2020-03-30", end="2020-03-31")['Adj Close']
        
        ##Work on the Data Received: 
        Shares_Outs = converter(Shares_Outs) #Convert Shares Outsanding from text to number, using Formula defined earlier
        price = round(price[-1],2) #Obtain the Price value and round it to 2
        
        Market_Cap = round(Shares_Outs*price,2)
        
        ##Construct Ticker Table to Print
        ticker_data_table = pd.DataFrame(data={
            'Ticker': [Ticker_ID], 'Name': [name], 'Sector': [sector], 'Country': [country], 'Currency':[currency],
            'Price':[price], 'Market_Cap': [Market_Cap]
        }).set_index('Ticker')
        
        ##Save Table
        ticker_data_table.to_csv(os.path.join(eurostoxx_folder,ticker)+'_Table.csv')
                
    except:
        continue

In [None]:
#Join All the Ticker Data Files into a Table
files = os.listdir(eurostoxx_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(eurostoxx_folder,file)) 
    df.set_index('Ticker', inplace=True, drop=True)

    dfList.append(df)

ticker_data_table = pd.concat(dfList, axis=0)

ticker_data_table.to_csv(os.path.join(Data_folder,'Ticker_Data_Table.csv'))

In [None]:
#Loop through the Currencies' list (tqdm gives the progress bar)
for currency in tqdm(currencies):
    try:
        currency_value_EUR = yf.download(currency, start="2020-03-30", end="2020-03-31")['Adj Close']

        currency_value_EUR = round(currency_value_EUR[-1],4)

        ##Construct Ticker Table to Print
        currency_data_table = pd.DataFrame(data={
            'Currency': [currency], 'Currency_Value_EUR': [currency_value_EUR]
        }).set_index('Currency')

        currency_pair = currency[0:6] #Cannot Save file name with "="

        ##Save Table
        currency_data_table.to_csv(os.path.join(currencies_folder,currency_pair)+'_Table.csv')
                
    except:
        continue

In [None]:
#Join All the Currencies Data Files into a Table
files = os.listdir(currencies_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(currencies_folder,file)) 
    df.set_index('Currency', inplace=True, drop=True)

    dfList.append(df)

currency_data_table = pd.concat(dfList, axis=0)

#Add EUREUR to be 1
currency_data_table = currency_data_table.reset_index()
currency_data_table.loc[-1] = 1
currency_data_table['Currency'].loc[-1] = 'EUREUR=X'
currency_data_table = currency_data_table.set_index('Currency')

currency_data_table.to_csv(os.path.join(Data_folder,'Currencies_Data_Table.csv'))

In [None]:
ticker_data_table = ticker_data_table.reset_index()
full_table = ticker_data_table.merge(currency_data_table, on='Currency', how='left')

# Converting the Market Cap from Different currencies to Euro and adjusting it to Billions of Euros
full_table['Market Cap Bn EUR'] = (full_table['Market_Cap'] / full_table['Currency_Value_EUR'])
full_table['Market Cap Bn EUR'] = full_table['Market Cap Bn EUR'] / 1000000000

#Adjusting the value of Stocks traded in London since inforomation is presented in pences (100 pence = 1 pound)
for row in full_table.index:
    if full_table['Currency'].loc[row] == 'EURGBP=X':
        full_table['Market Cap Bn EUR'].loc[row] = full_table['Market Cap Bn EUR'][row] / 100


        
full_table = full_table.set_index('Name')
#Save Final Table to csv
full_table.to_csv('EuroStoxx600_Final_Table.csv')

In [None]:
full_table = pd.read_csv('EuroStoxx600_Final_Table.csv')

#### Note that I couldn't obtain the information for all the tickers so I obtain the rest manually

## Generating the TreeMap Figure

In [None]:
#Reducing the Table to Include only required variables for the TreeMap graph
tmap_df = full_table
tmap_df = tmap_df.drop(columns=['Ticker','Currency', 'Price', 'Market_Cap','Currency_Value_EUR'])

In [None]:
#Running the Treemamp Code
tmap = TreeMap(id=["Country","Sector","Name"], size="Market Cap Bn EUR", color="Market Cap Bn EUR", legend=False, width=700)
tmap.draw(tmap_df)

In [None]:
#To print the dump html code the below can be used. However, in this case changes were needed in the html text.
print(tmap.dump_html(tmap_df))

In [None]:
#Generating the Treemap with Sectors first and Countries on the next level
tmap = TreeMap(id=["Sector","Country","Name"], size="Market Cap Bn EUR", color="Market Cap Bn EUR", legend=False, width=700)
tmap.draw(tmap_df)

In [None]:
#To print the dump html code the below can be used. However, in this case changes were needed in the html text.
print(tmap.dump_html(tmap_df))

## Save the DataFrame to html format

In [None]:
full_table = full_table.reset_index()

#Reshape the Currency Information, to have only the currency and not the currency pair
currencies_print = full_table['Currency']
for row in currencies_print.index:
    currencies_print.loc[row] = currencies_print.loc[row][3:6]

#Drop unnecessary columns
full_table = full_table.drop(columns=['Currency', 'Market_Cap', 'Currency_Value_EUR'])

#Include new column of currencies
full_table['Currency'] = currencies_print

#Round Market Cap column
full_table['Market Cap Bn EUR'] = full_table['Market Cap Bn EUR'].round(4)

#Include column for rank
full_table['Rank'] = full_table['Market Cap Bn EUR'].rank(ascending = 0)

#Sort Table by Ranking column
full_table = full_table.sort_values(by='Rank', ascending=True)

order = [7,0,1,2,4,6,3,5] # setting column's order
full_table = full_table[[full_table.columns[i] for i in order]]
full_table = full_table.dropna()

In [None]:
#Define the Table columns
full_table_columns = full_table.columns

#Define the Source and the Columns of the DataFrame
source = ColumnDataSource(full_table)

columns = [TableColumn(field=Ci, title=Ci, width=20) for Ci in full_table_columns]

#Create the DataFrame
eurostoxx_data_table = DataTable(source=source, columns=columns, width=900, height=400, selectable = True, index_position = None)

#Save the DataFrame
output_file('EuroStoxx_Data_Table.html')
save(eurostoxx_data_table)

# SP500

### Same procedure as above, difference is that there's no need for exchange conversion neither including "Country" variable

In [None]:
#Obtain the Sector and Industry for one Ticker based on the Excel File
file_name = 'SP500.csv'

In [None]:
#Define the List of tickers
Index_data = pd.read_csv(file_name)

tickers = Index_data['Yahoo_Ticker'].drop_duplicates().tolist()

In [None]:
#Loop through the tickers' list (tqdm gives the progress bar)
for ticker in tqdm(tickers):
    try:
        ##Obtain information on the Name, Sector and Industry of each Ticker
        name, sector, country, currency, Ticker_ID = GICS(ticker)
        
        ##Get Data from Yahoo: Key Stats Table and the Historical Price Data
        key_stats = get_stats(ticker)
        Shares_Outs = key_stats.loc[18][-1]

        price = yf.download(ticker, start="2020-03-30", end="2020-03-31")['Adj Close']
        
        ##Work on the Data Received: 
        Shares_Outs = converter(Shares_Outs) #Convert Shares Outsanding from text to number, using Formula defined earlier
        price = round(price[-1],2) #Obtain the Price value and round it to 2
        
        Market_Cap = round(Shares_Outs*price,2)
        
        ##Construct Ticker Table to Print
        ticker_data_table = pd.DataFrame(data={
            'Ticker': [Ticker_ID], 'Name': [name], 'Sector': [sector], 'Country': [country], 'Currency':[currency],
            'Price':[price], 'Market_Cap': [Market_Cap]
        }).set_index('Ticker')
        
        ##Save Table
        ticker_data_table.to_csv(os.path.join(sp500_folder,ticker)+'_Table.csv')
                
    except:
        continue

In [None]:
#Join All the Ticker Data Files into a Table
files = os.listdir(sp500_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(sp500_folder,file)) 
    df.set_index('Ticker', inplace=True, drop=True)

    dfList.append(df)

ticker_data_table = pd.concat(dfList, axis=0)

ticker_data_table.to_csv(os.path.join(Data_folder,'SP500_Ticker_Data_Table.csv'))

In [None]:
ticker_data_table = ticker_data_table.reset_index()
full_table = ticker_data_table

# Converting the Market Cap from Different currencies to Euro and adjusting it to Billions of Euros
full_table['Market Cap Bn USD'] = full_table['Market_Cap'] / 1000000000
        
full_table = full_table.set_index('Name')
#Save Final Table to csv
full_table.to_csv('SP500_Final_Table.csv')

In [None]:
full_table = pd.read_csv('SP500_Final_Table.csv')

#### Note that I couldn't obtain the information for all the tickers so I obtain the rest manually. Also, some tickers don't get downloaded on the first try so it's worth to try a second time to download them.

## Generating the TreeMap Figure

In [None]:
#Reducing the Table to Include only required variables for the TreeMap graph
tmap_df = full_table
tmap_df = tmap_df.drop(columns=['Ticker','Currency', 'Price', 'Market_Cap'])

In [None]:
#Running the Treemamp Code
tmap = TreeMap(id=["Country","Sector","Name"], size="Market Cap Bn USD", color="Market Cap Bn USD", legend=False, width=700)
tmap.draw(tmap_df)

In [None]:
#To print the dump html code the below can be used. However, in this case changes were needed in the html text.
print(tmap.dump_html(tmap_df))

## Save the DataFrame to html format

In [None]:
#Reshape the Currency Information, to have only the currency and not the currency pair
currencies_print = full_table['Currency']
for row in currencies_print.index:
    currencies_print.loc[row] = currencies_print.loc[row][3:6]

#Drop unnecessary columns
full_table = full_table.drop(columns=['Currency', 'Market_Cap'])

#Include new column of currencies
full_table['Currency'] = currencies_print

#Round Market Cap column
full_table['Market Cap Bn USD'] = full_table['Market Cap Bn USD'].round(4)

#Include column for rank
full_table['Rank'] = full_table['Market Cap Bn USD'].rank(ascending = 0)

#Sort Table by Ranking column
full_table = full_table.sort_values(by='Rank', ascending=True)

order = [7,0,1,2,4,6,3,5] # setting column's order
full_table = full_table[[full_table.columns[i] for i in order]]

In [None]:
#Define the Table columns
full_table_columns = full_table.columns

#Define the Source and the Columns of the DataFrame
source = ColumnDataSource(full_table)

columns = [TableColumn(field=Ci, title=Ci, width=20) for Ci in full_table_columns]

#Create the DataFrame
sp500_data_table = DataTable(source=source, columns=columns, width=900, height=400, selectable = True, index_position = None)

#Save the DataFrame
output_file('SP500_Data_Table.html')
save(sp500_data_table)