# Import Libraries

In [None]:
from yahoo_fin.stock_info import *

from bs4 import BeautifulSoup
import urllib
import requests
import re
from json import loads

import talib as tb

import pandas as pd
import numpy as np

import datetime
import time
import os, sys
import shutil

from tqdm import tqdm #Used in the for loops to track the progress of the loop

# Initial Setup

In [None]:
#Create a Data Folder
Data_folder = os.path.abspath(os.getcwd() +'/Data/')
if not os.path.exists(Data_folder):
    os.makedirs(Data_folder)

#Clean older files and folders in the Data Folder
filelist = [ f for f in os.listdir(Data_folder)]
for f in filelist:
    shutil.rmtree(os.path.join(Data_folder, f), ignore_errors=True)

filelist = [ f for f in os.listdir(Data_folder)]
for f in filelist:
    os.remove(os.path.join(Data_folder, f))

#Create New Folder for Fundamental Analysis
fundamental_analysis_folder = os.path.join(Data_folder, 'Fundamental Analysis')
if not os.path.exists(fundamental_analysis_folder):
    os.makedirs(fundamental_analysis_folder)
    
#Create New Folder for Analysts Recommendations
analysts_recommendations_folder = os.path.join(Data_folder, 'Analysts Recommendations')
if not os.path.exists(analysts_recommendations_folder):
    os.makedirs(analysts_recommendations_folder)
        
#Create New Folder for Technical Analysis
technical_analysis_folder = os.path.join(Data_folder, 'Technical Analysis')
if not os.path.exists(technical_analysis_folder):
    os.makedirs(technical_analysis_folder)

In [None]:
#Convert variables to number values
def converter(variable):
    convert_matrix = {'%': 1, 'K': 1000, 'M': 1000000, 'B': 1000000000, 'T': 1000000000000}
    if pd.isnull(variable):
        variable = 'nan'
    elif type(variable) == float:
        variable = variable
    else:
        units = variable[-1]
        if (units == '%' or units == 'K' or units == 'M' or units == 'B' or units == 'T'):
            variable = round(float(variable[:-1])*convert_matrix[units],2)
        else:
            variable = round(float(variable),2)
    return variable

In [None]:
#Obtain the Sector and Industry for one Ticker based on the Excel File: 'SP500_Index.csv'
def GICS(ticker):
    GICS_table = pd.read_csv('SP500_Index.csv', index_col = 'Ticker')
    name = GICS_table[GICS_table.index==ticker]['Name'][0]
    sector = GICS_table[GICS_table.index==ticker]['Sector'][0]
    industry = GICS_table[GICS_table.index==ticker]['Industry'][0]
    return name, sector, industry

## Define the Tickers (Company Investment Symbol) 

In [None]:
SP500 = ['MMM','ABT','ABBV','ABMD','ACN','ATVI','ADBE','AMD','AAP','AES','AMG','AFL','A','APD','AKAM','ALK','ALB','ARE','ALXN','ALGN','ALLE','AGN','ADS','LNT','ALL','GOOGL','MO','AMZN','AMCR','AEE','AAL','AEP','AXP','AIG','AMT','AWK','AMP','ABC','AME','AMGN','APH','ADI','ANSS','ANTM','AON','AOS','APA','AIV','AAPL','AMAT','APTV','ADM','ARNC','ANET','AJG','AIZ','ATO','T','ADSK','ADP','AZO','AVB','AVY','BHGE','BLL','BAC','BK','BAX','BBT','BDX','BRK-B','BBY','BIIB','BLK','HRB','BA','BKNG','BWA','BXP','BSX','BMY','AVGO','BR','BF-B','CHRW','COG','CDNS','CPB','COF','CPRI','CAH','KMX','CCL','CAT','CBOE','CBRE','CBS','CDW','CE','CELG','CNC','CNP','CTL','CERN','CF','SCHW','CHTR','CVX','CMG','CB','CHD','CI','XEC','CINF','CTAS','CSCO','C','CFG','CTXS','CLX','CME','CMS','KO','CTSH','CL','CMCSA','CMA','CAG','CXO','COP','ED','STZ','COO','CPRT','GLW','CTVA','COST','COTY','CCI','CSX','CMI','CVS','DHI','DHR','DRI','DVA','DE','DAL','XRAY','DVN','FANG','DLR','DFS','DISCA','DISCK','DISH','DG','DLTR','D','DOV','DOW','DTE','DUK','DRE','DD','DXC','ETFC','EMN','ETN','EBAY','ECL','EIX','EW','EA','EMR','ETR','EOG','EFX','EQIX','EQR','ESS','EL','EVRG','ES','RE','EXC','EXPE','EXPD','EXR','XOM','FFIV','FB','FAST','FRT','FDX','FIS','FITB','FE','FRC','FISV','FLT','FLIR','FLS','FMC','F','FTNT','FTV','FBHS','FOXA','FOX','BEN','FCX','GPS','GRMN','IT','GD','GE','GIS','GM','GPC','GILD','GL','GPN','GS','GWW','HAL','HBI','HOG','HIG','HAS','HCA','HCP','HP','HSIC','HSY','HES','HPE','HLT','HFC','HOLX','HD','HON','HRL','HST','HPQ','HUM','HBAN','HII','IEX','IDXX','INFO','ITW','ILMN','IR','INTC','ICE','IBM','INCY','IP','IPG','IFF','INTU','ISRG','IVZ','IPGP','IQV','IRM','JKHY','JEC','JBHT','SJM','JNJ','JCI','JPM','JNPR','KSU','K','KEY','KEYS','KMB','KIM','KMI','KLAC','KSS','KHC','KR','LB','LHX','LH','LRCX','LW','LVS','LEG','LDOS','LEN','LLY','LNC','LIN','LKQ','LMT','L','LOW','LYB','MTB','MAC','M','MRO','MPC','MKTX','MAR','MMC','MLM','MAS','MA','MKC','MXIM','MCD','MCK','MDT','MRK','MET','MTD','MGM','MCHP','MU','MSFT','MAA','MHK','TAP','MDLZ','MNST','MCO','MS','MOS','MSI','MSCI','MYL','NDAQ','NOV','NTAP','NFLX','NWL','NEM','NWSA','NWS','NEE','NLSN','NKE','NI','NBL','JWN','NSC','NTRS','NOC','NCLH','NRG','NUE','NVDA','NVR','ORLY','OXY','OMC','OKE','ORCL','PCAR','PKG','PH','PAYX','PYPL','PNR','PBCT','PEP','PKI','PRGO','PFE','PM','PSX','PNW','PXD','PNC','PPG','PPL','PFG','PG','PGR','PLD','PRU','PEG','PSA','PHM','PVH','QRVO','PWR','QCOM','DGX','RL','RJF','RTN','O','REG','REGN','RF','RSG','RMD','RHI','ROK','ROL','ROP','ROST','RCL','CRM','SBAC','SLB','STX','SEE','SRE','SHW','SPG','SWKS','SLG','SNA','SO','LUV','SPGI','SWK','SBUX','STT','SYK','STI','SIVB','SYMC','SYF','SNPS','SYY','TMUS','TROW','TTWO','TPR','TGT','TEL','FTI','TFX','TXN','TXT','TMO','TIF','TWTR','TJX','TSCO','TDG','TRV','TRIP','TSN','UDR','ULTA','USB','UAA','UA','UNP','UAL','UNH','UPS','URI','UTX','UHS','UNM','VFC','VLO','VAR','VTR','VRSN','VRSK','VZ','VRTX','VIAB','V','VNO','VMC','WAB','WMT','WBA','DIS','WM','WAT','WEC','WCG','WFC','WELL','WDC','WU','WRK','WY','WHR','WMB','WLTW','WYNN','XEL','XRX','XLNX','XYL','YUM','ZBH','ZION','ZTS']

tickers = SP500

# Fundamental Analysis

In [None]:
#Loop through the tickers' list (tqdm gives the progress bar)
for ticker in tqdm(tickers):
    try:
        #Obtain information on the Name, Sector and Industry of each Ticker
        name, sector, industry = GICS(ticker)

        #Get Tables from Yahoo: Key Stats Table and the Quote Table
        key_stats = get_stats(ticker)
        quote_table = get_quote_table(ticker)

        try:
            quote_table = get_quote_table(ticker)
            Price = round(quote_table['Quote Price'],2)
        except:
            Price = yahoo_financials.get_current_price()

        # Getting the Market Cap: Identifying biggest companies
        Market_Cap = converter(key_stats['Value'][0])

        #Fundamental Analysis Multiples: Price Multiples
        P = Price
        PE = converter(key_stats['Value'][2]) if converter(key_stats['Value'][2]) != 'nan' else converter(key_stats['Value'][3])
        PEG = converter(key_stats['Value'][4]) if converter(key_stats['Value'][4]) != 'nan' else 0
        PB = converter(key_stats['Value'][6]) if converter(key_stats['Value'][6]) != 'nan' else 0

        #Fundamental Analysis Multiples: Enterprise Value (EV) Multiples
        EV_EBITDA = converter(key_stats['Value'][8]) if converter(key_stats['Value'][8]) != 'nan' else 0

        ##EV to Sales is not available in Key Stats table so we will need to construct it
        EV = converter(key_stats['Value'][1]) if converter(key_stats['Value'][1]) != 'nan' else 0
        P_Sales = converter(key_stats['Value'][5]) if converter(key_stats['Value'][5]) != 'nan' else 0
        Sales = Market_Cap/P_Sales if P_Sales != 0 else 0
        EV_Sales = round(EV / Sales,2)

        #Construct Ticker Table to Print
        fundamental_data_table = pd.DataFrame(data={
            'Ticker': [ticker], 'Name': [name], 'Sector': [sector], 'Industry': [industry], 'Price': [P], 'PE': [PE], 
            'PEG': [PEG], 'PB': [PB], 'EV_EBITDA': [EV_EBITDA], 'EV_Sales': [EV_Sales]
        }).set_index('Ticker')
        ##Save Table
        fundamental_data_table.to_csv(os.path.join(fundamental_analysis_folder,ticker)+'_Table.csv')
        
    except:
        continue

In [None]:
#Join All the Ticker Data Files into a Table
files= os.listdir(fundamental_analysis_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(fundamental_analysis_folder,file)) 
    df.set_index('Ticker', inplace=True, drop=True)

    dfList.append(df)

fundamental_data_table = pd.concat(dfList, axis=0)

fundamental_data_table.to_csv('Ticker_Data_Table.csv')

## Fundamental Analysis: Calculate the Industry Averages

In [None]:
#Import the complete Ticker Data to include Industry metrics
fundamental_data_table = pd.read_csv('Ticker_Data_Table.csv')

    #Calculate Industry metrics
Ind_PE = fundamental_data_table.groupby('Industry')['PE'].mean()
Ind_PEG = fundamental_data_table.groupby('Industry')['PEG'].mean()
Ind_PB = fundamental_data_table.groupby('Industry')['PB'].mean()
Ind_EV_EBITDA = fundamental_data_table.groupby('Industry')['EV_EBITDA'].mean()
Ind_EV_Sales = fundamental_data_table.groupby('Industry')['EV_Sales'].mean()

    #Include Industry metrics in the Full table
fundamental_data_table['Ind_PE'] = pd.merge(fundamental_data_table['Industry'], Ind_PE, on = 'Industry', how = 'left').iloc[:,1]
fundamental_data_table['Ind_PEG'] = pd.merge(fundamental_data_table['Industry'], Ind_PEG, on = 'Industry', how = 'left').iloc[:,1]
fundamental_data_table['Ind_PB'] = pd.merge(fundamental_data_table['Industry'], Ind_PB, on = 'Industry', how = 'left').iloc[:,1]
fundamental_data_table['Ind_EV_EBITDA'] = pd.merge(fundamental_data_table['Industry'], Ind_EV_EBITDA, on = 'Industry', how = 'left').iloc[:,1]
fundamental_data_table['Ind_EV_Sales'] = pd.merge(fundamental_data_table['Industry'], Ind_EV_Sales, on = 'Industry', how = 'left').iloc[:,1]

#Save Final Ticker Table to File
fundamental_data_table = fundamental_data_table.set_index('Ticker')
fundamental_data_table = fundamental_data_table[['Name', 'Sector', 'Industry', 'Price', 'PE', 'Ind_PE', 'PEG', 'Ind_PEG', 
                                                 'PB', 'Ind_PB', 'EV_EBITDA', 'Ind_EV_EBITDA', 'EV_Sales','Ind_EV_Sales']]
fundamental_data_table = fundamental_data_table.round(2)
fundamental_data_table.to_csv(os.path.join(Data_folder,'Fundamental_Data_Table.csv'))

# Analyst Recommendation

In [None]:
#Loop through the tickers' list (tqdm gives the progress bar)
for ticker in tqdm(tickers):
    try:
        #Obtain information on the Name, Sector and Industry of each Ticker
        name, sector, industry = GICS(ticker)
        
        #Obtain the Current Price
        quote_table = get_quote_table(ticker)
        
        #Webscrapping the Data in Yahoo
        r = requests.get('https://finance.yahoo.com/quote/'+ticker+'/analysis?p='+ticker)
        soup = BeautifulSoup(r.text, "lxml")
        script = soup.find("script",text=re.compile("root.App.main")).text
        data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
        
        #Obatining the Analysts' Recommendation, Number of Options and Price Target
        Analyst_recomm = data["context"]["dispatcher"]["stores"]['QuoteSummaryStore']['financialData']['recommendationKey']
        Analysts_number = data["context"]["dispatcher"]["stores"]['QuoteSummaryStore']['financialData']['numberOfAnalystOpinions']['raw']
        P_Target = quote_table['1y Target Est']
        
        try:
            quote_table = get_quote_table(ticker)
            P = round(quote_table['Quote Price'],2)
        except:
            P = yahoo_financials.get_current_price()

        #Obatin the Probability of Upside from the Target Price
        Prob_Up = round((P_Target/P-1)*100,2)
        
        #Construct Ticker Table to Print
        analysts_data_table = pd.DataFrame(data={
            'Ticker': [ticker], 'Name': [name], 'Sector': [sector], 'Industry': [industry],'Price': [P], 'P_Target': [P_Target], 
            'Prob_Up %': [Prob_Up], 'Analyst_recomm': [Analyst_recomm], 'Analysts_number': [Analysts_number], 
        }).set_index('Ticker')
        ##Save Table
        analysts_data_table.to_csv(os.path.join(analysts_recommendations_folder,ticker)+'_Table.csv')
                
    except:
        continue

In [None]:
#Join All the Ticker Data Files into a Table
files= os.listdir(analysts_recommendations_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(analysts_recommendations_folder,file)) 
    df.set_index('Ticker', inplace=True, drop=True)

    dfList.append(df)

analysts_data_table = pd.concat(dfList, axis=0)
analysts_data_table = analysts_data_table.round(2)

analysts_data_table.to_csv(os.path.join(Data_folder,'Analysts_Data_Table.csv'))

# Technical Analysis

In [None]:
#Price Data: Defining the Start and End dates 
finishing_period = time.strftime('%d/%m/%Y')

    ##Let's define the start date as 1 year ago
beginning_period_unformat = datetime.datetime.strptime(finishing_period, '%d/%m/%Y') - datetime.timedelta(days=366)
beginning_period = beginning_period_unformat.strftime('%d/%m/%Y')

for ticker in tqdm(tickers):
    try:
        #Obtain information on the Name, Sector and Industry of each Ticker
        name, sector, industry = GICS(ticker)
        
        #Price Data: Defining the Start and End dates 
        finishing_period = time.strftime('%m/%d/%Y')

        ##Let's define the start date as 1 year ago
        beginning_period_unformat = datetime.datetime.strptime(finishing_period, '%m/%d/%Y') - datetime.timedelta(days=366)
        beginning_period = beginning_period_unformat.strftime('%m/%d/%Y')

        historical_data = get_data(ticker, start_date = beginning_period , end_date = finishing_period)

        #Let's Create a DataFrame with the downloaded Data
        df = pd.DataFrame(historical_data, columns=['open', 'high', 'low', 'close', 'adjclose', 'volume'])
        df['date'] = pd.to_datetime(df.index, format='%Y/%m/%d')

        #Let's define the Variables for the OHLCV metrics
        open_price = df.open.values
        high_price = df.high.values
        low_price = df.low.values
        close_price = df.close.values
        volume_moment = np.array(df.volume.values, dtype='f8')

        #Indicators Calculation
        PSAR = tb.SAR(high= high_price, low= low_price, acceleration=0, maximum=0)
        
        MA_200d = tb.SMA(close_price, timeperiod=200)
        MA_50d = tb.SMA(close_price, timeperiod=50)
        Golden_Cross = MA_50d - MA_200d

        RSI = tb.RSI(close_price, timeperiod=14)
        CCI= tb.CCI(high= high_price, low= low_price, close= close_price, timeperiod=14)

        ADX = tb.ADX(high= high_price, low= low_price, close= close_price, timeperiod=14)
        DI_plus = tb.PLUS_DI(high= high_price, low= low_price, close= close_price, timeperiod=14)
        DI_minus = tb.MINUS_DI(high= high_price, low= low_price, close= close_price, timeperiod=14)
        DI_index = DI_plus - DI_minus

        #Construct Ticker Table to Print
        technical_data_table = pd.DataFrame(data={
            'date': df['date'],'Ticker': ticker, 'Name': name, 'Sector': sector, 'Industry': industry,
            'Price': close_price, 'PSAR': PSAR, 'Golden_Cross': Golden_Cross, 'RSI': RSI, 'CCI': CCI, 
            'ADX': ADX, 'DI_index': DI_index, 'Chaikin_Osc': Chaikin_Osc,
        }).set_index('date')

        technical_data_table = technical_data_table.reset_index()
        technical_data_table.sort_values(by='date', inplace=True, ascending=False)
        technical_data_table = technical_data_table.set_index('date')
        technical_data_table.to_csv(os.path.join(technical_analysis_folder,ticker)+'_Table.csv')
    except:
        continue

In [None]:
#Join All the Ticker Data Files into a Table
files= os.listdir(technical_analysis_folder)

dfList = []

for file in files:
    df = pd.read_csv(os.path.join(technical_analysis_folder,file)) 
    df_final = df[:1]
    df_final = df_final.drop(columns=['date'])
    df_final.set_index('Ticker', inplace=True, drop=True)

    dfList.append(df_final)

technical_data_table = pd.concat(dfList, axis=0)
technical_data_table = technical_data_table.round(2)

technical_data_table.to_csv(os.path.join(Data_folder,'Technical_Data_Table.csv'))

# Save the DataFrames to html format

In [None]:
from bokeh.plotting import figure, show, save
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import TableColumn, DataTable
from bokeh.layouts import row, widgetbox

import holoviews as hv
from holoviews import opts

import panel as pn

hv.extension('bokeh')

## Save the Fundamental Analysis table to html format

In [None]:
fundamental_data_table = pd.read_csv(os.path.join(Data_folder,'Fundamental_Data_Table.csv'))

#Get a list of all Sectors and Industries, and include one entry for "All"
sectors = fundamental_data_table.Sector.unique()
sectors = np.append('All',sectors)

industries = fundamental_data_table.Industry.unique()
industries = np.append('All',industries)

#Instantiate a Table and define the Table Generation dynamics
##Note that the "if's" were introduced to account for the "All" values in Sectors and Industries as no stock has this value 
def load_stocks(Sector, Industry):
    if Sector == 'All' and Industry == 'All':
        table = hv.Table(fundamental_data_table)    
    elif Sector == 'All':
        table = hv.Table(fundamental_data_table[fundamental_data_table.Industry.isin([Industry])])
    elif Industry == 'All':
        table = hv.Table(fundamental_data_table[fundamental_data_table.Sector.isin([Sector])])    
    else:
        table = hv.Table(fundamental_data_table[fundamental_data_table.Sector.isin([Sector]) & fundamental_data_table.Industry.isin([Industry])])    
    table = table.opts(opts.Table(width=950, height=280, selectable = True, index_position = None))
    return table

#Instantiate the DynamicMap function, so to generate the Table defined with the Widgets for Sectors and Industries
dmap = hv.DynamicMap(load_stocks, kdims=['Sector','Industry']).redim.values(Sector=sectors, Industry=industries)
dmap = dmap.opts(framewise=True)

In [None]:
#Let's use the Panel library to be able to save the Table generated
p = pn.panel(dmap,  widget_location='top_left')
p.save('Fundamental_Analysis_table.html', embed = True, max_states=1536) 
##Note that max_states had to be introduce since it's over 1000, otherwise there's no need to include it

## Save the Analysts Recommendations table to html format

In [None]:
analysts_data_table = pd.read_csv(os.path.join(Data_folder,'Analysts_Data_Table.csv'))

#Get a list of all the Recommendations' types, and include one entry for "All"
recommendations = analysts_data_table.Analyst_recomm.unique()
recommendations = np.append('All',recommendations)
idx = [0, 4, 1, 2, 3,5]
recommendations = recommendations[idx]

#Instantiate a Table and define the Table Generation dynamics
##Note that the "if" was introduced to account for the "All" values in Recommendations as no stock has this value 
def load_stocks(Recommendation):
    if Recommendation == 'All':
        table = hv.Table(analysts_data_table)       
    else:
        table = hv.Table(analysts_data_table[analysts_data_table.Analyst_recomm.isin([Recommendation])])    
    table = table.opts(opts.Table(width=950, height=280, selectable = True, index_position = None))
    return table

#Instantiate the DynamicMap function, so to generate the Table defined with the Widget for Recommendation
dmap = hv.DynamicMap(load_stocks, kdims=['Recommendation']).redim.values(Recommendation=recommendations)
dmap = dmap.opts(framewise=True)

In [None]:
#Let's use the Panel library to be able to save the Table generated
p = pn.panel(dmap,  widget_location='top_left')
p.save('Analysts_Recommendation_table.html', embed = True) 

## Save the Technical Analysis table to html format

In [None]:
technical_data_table = pd.read_csv(os.path.join(Data_folder,'Technical_Data_Table.csv'))

#Get a maximum and minimum values for the CCI and ADX
CCI_min_abs = round(technical_data_table['CCI'].min() -5,-1)
CCI_max_abs = round(technical_data_table['CCI'].max() +5,-1)

ADX_min_abs = round(technical_data_table['ADX'].min() -5, -1)
ADX_max_abs = round(technical_data_table['ADX'].max() +5, -1)
#Note that the other Sliders will have the standard range:
    ## RSI: 0 to 100

#Instantiate a Table and define the Table Generation dynamics
def load_stocks(RSI_min, RSI_max, CCI_min, CCI_max, ADX_value): 
    table = hv.Table(technical_data_table[(technical_data_table['RSI'].between(RSI_min,RSI_max)) & 
                                          (technical_data_table['CCI'].between(CCI_min,CCI_max)) &
                                          (technical_data_table['ADX'] > ADX_value)                                           
                                         ]).opts(opts.Table(width=850, index_position = None))
    return table

#Instantiate the DynamicMap function, so to generate the Table defined with the Widgets for RSI, CCI and ADX
dmap = hv.DynamicMap(load_stocks, kdims=['RSI_min','RSI_max', 'CCI_min','CCI_max', 'ADX_value']).redim.range(
    RSI_min=(0, 100), RSI_max=(0, 100), CCI_min=(CCI_min_abs, CCI_max_abs), CCI_max=(CCI_min_abs, CCI_max_abs), 
    ADX_value=(ADX_min_abs, ADX_max_abs))
dmap = dmap.opts(framewise=True)

In [None]:
#Let's use the Panel library to be able to adjust the Widgets and save the Table generated
p = pn.pane.HoloViews(dmap, widgets={
    'RSI_min': pn.widgets.DiscreteSlider(name='RSI_min', options=np.arange(0, 101, 25).tolist(), value=0, width=150),
    'RSI_max': pn.widgets.DiscreteSlider(name='RSI_max', options=np.arange(0, 101, 25).tolist(), value=100, width=150),
    'CCI_min': pn.widgets.DiscreteSlider(name='CCI_min', options=[CCI_min_abs, -100, 0, 100, CCI_max_abs], value=CCI_min_abs, width=150),
    'CCI_max': pn.widgets.DiscreteSlider(name='CCI_max', options=[CCI_min_abs, -100, 0, 100, CCI_max_abs], value=CCI_max_abs, width=150),
    'ADX_value': pn.widgets.DiscreteSlider(name='ADX_value', options=np.arange(ADX_min_abs, ADX_max_abs, 10).tolist(), value=ADX_min_abs, width=150)},
                 ).layout
p.save('Technical_Analysis_table.html', embed = True, max_states = 3750)