In [1]:
# imports
%reload_ext autoreload
%autoreload 2
%matplotlib inline 

from matplotlib import pyplot as plt

from utils.basic_utils import *
from utils.pricing import *
import utils.fundamental as fu

pd.options.display.float_format = '{:,.2f}'.format

Loading utils/config.json


In [2]:
# use the latest saved data for profile and quote info
dates = read_dates('quote')
tgt_date = dates[-1] # last date saved in S3

quotes = load_csvs('quote_consol', [tgt_date])
quotes.set_index('symbol', drop=False, inplace=True)

profile = load_csvs('summary_detail', ['assetProfile'])
profile.set_index('symbol', drop=False, inplace=True)

keystats = load_csvs('summary_detail', ['defaultKeyStatistics/' + str(tgt_date)])
keystats.set_index('symbol', drop=False, inplace=True)
finstats = load_csvs('summary_detail', ['financialData/' + str(tgt_date)])
finstats.set_index('symbol', drop=False, inplace=True)

Loading file quote/csv/2019-06-04
Loading file summary-categories/assetProfile
Loading file summary-categories/defaultKeyStatistics/2019-06-04
Loading file summary-categories/financialData/2019-06-04


In [3]:
# read from S3
val_df = pd.read_csv(csv_load(f'valuation/waterfall/{tgt_date}'), parse_dates=True)
val_df.storeDate = pd.to_datetime(val_df.storeDate, unit='s')
val_df.set_index(['storeDate', 'symbol'], inplace=True)
val_df.dropna(subset=['premDisc'], inplace=True)
tickers = val_df.reset_index().symbol

In [4]:
val_df.columns

Index(['AAT', 'AT', 'FX', 'ROA', 'ROE', 'baseMult', 'baseRate', 'currentValue',
       'discountRate', 'equityRiskPremium', 'equityValue', 'financialCurrency',
       'growthMult', 'growthRate', 'netDebt', 'npvGrowth', 'npvGrowthCF',
       'npvSteadyCF', 'premDisc', 'projCashROE', 'projFedFundsIncr',
       'steadyCF', 'totalReinvCapital'],
      dtype='object')

In [5]:
q_cols = ['forwardPE', 'trailingPE', 'marketCap', 'regularMarketPrice']
p_cols = ['sector', 'industry', 'country']
k_cols = ['pegRatio', 'shortPercentOfFloat']
f_cols = ['earningsGrowth', 'recommendationMean', 'targetMeanPrice', 'targetMedianPrice', 'numberOfAnalystOpinions']
for c in p_cols: val_df.loc[:, c] = tickers.map(profile[c].to_dict()).values
for c in q_cols: val_df.loc[:, c] = tickers.map(quotes[c].to_dict()).values
for c in k_cols: val_df.loc[:, c] = tickers.map(keystats[c].to_dict()).values
for c in f_cols: val_df.loc[:, c] = tickers.map(finstats[c].to_dict()).values

In [6]:
large_vals = [
    'netDebt', 'npvSteadyCF', 'equityValue', 
    'currentValue', 'totalReinvCapital', 'steadyCF', 'projCashROE',
    'npvGrowth', 'npvGrowthCF', 'projCashROE', 'marketCap']
# convert large values to billions
val_df.loc[:, large_vals] = val_df.loc[:, large_vals] / 10**9

# high level pre-processing / clean up
val_df.loc[:, 'pegRatio'] = val_df.forwardPE / (val_df.growthRate * 100)
div_cols = ['targetMeanPrice', 'targetMedianPrice']
val_df.loc[:, div_cols] = val_df[div_cols].div(
    tickers.map(
        quotes.regularMarketPrice.to_dict()).values, axis=0)
val_df.replace([np.inf, -np.inf], np.nan, inplace=True)
val_df.shape

(1099, 37)

In [7]:
# remove outliers from universe
show = ['premDisc', 'pegRatio', 'forwardPE', 'growthRate', 
        'shortPercentOfFloat', 'targetMedianPrice']
treshold = 3
no_out_df = fu.excl_outliers(val_df, show, treshold)
no_out_df.shape

(1000, 37)

In [8]:
# mask results by valuation
mask_on = True
mask = (val_df.premDisc > 0) & (val_df.premDisc < 3)
clean_df = no_out_df.loc[mask].copy() if mask_on else no_out_df.copy()
clean_df.shape

(614, 37)

In [9]:
# one company sampling
ticker = 'AAPL'
print(ticker in tickers.values, ticker in clean_df.index.levels[1])
val_df.loc[(slice(None),ticker),:].T

True True


storeDate,2019-06-04
symbol,AAPL
AAT,1.25
AT,0.69
FX,1.00
ROA,0.17
ROE,0.30
baseMult,13.09
baseRate,0.02
currentValue,821.66
discountRate,0.08
equityRiskPremium,0.05


In [10]:
# aggregate / deep dive metrics
gby = ['sector',]
gby = ['sector', 'industry', 'symbol'] # by uncommenting this line deep dive into sectors below
zoom_in = [x for x in clean_df.sector.unique()] # all sectors
zoom_in = ['Healthcare'] # or just a few selected
agg_functions = 'median'
sort_by = ['premDisc']

ind_sum_df = clean_df.groupby(by=gby).agg(agg_functions)[show]
if 'symbol' in gby: ind_sum_df = ind_sum_df.loc[(zoom_in,),:]
else: ind_sum_df = ind_sum_df
ind_sum_df.sort_values(by=sort_by)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,premDisc,pegRatio,forwardPE,growthRate,shortPercentOfFloat,targetMedianPrice
sector,industry,symbol,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Healthcare,Biotechnology,EXEL,0.26,0.33,17.14,0.52,0.06,1.46
Healthcare,Drug Manufacturers - Specialty & Generic,SUPN,0.54,0.32,11.24,0.35,0.18,1.87
Healthcare,Drug Manufacturers - Major,BIIB,0.57,0.65,7.47,0.11,0.02,1.09
Healthcare,Biotechnology,UTHR,0.63,0.44,10.38,0.23,0.08,1.16
Healthcare,Medical Distribution,MCK,0.65,2.5,8.33,0.03,0.02,1.09
Healthcare,Medical Distribution,ABC,0.67,,11.19,0.0,0.05,1.05
Healthcare,Medical Distribution,CAH,0.75,,8.35,0.0,0.05,1.19
Healthcare,Drug Manufacturers - Major,ABBV,0.89,0.34,8.18,0.24,0.02,1.13
Healthcare,Drug Manufacturers - Major,BMY,0.93,0.67,10.6,0.16,0.06,1.18
Healthcare,Drug Manufacturers - Major,GILD,0.97,,9.31,0.0,0.01,1.27


In [None]:
# rank results relative to sector or entire universe
low_better = ['premDisc', 'pegRatio', 'forwardPE', 'shortPercentOfFloat']
high_better = ['earningsGrowth', 'targetMedianPrice']
cols = low_better + high_better

by_sector_on = True

rank_df = clean_df.copy()
if by_sector_on:
    # relative to sector
    super_list = []
    for s in rank_df.sector.unique():
        group = rank_df.loc[rank_df.sector == s, cols]
        ranked_df = fu.rank_group(group, low_better, high_better)
        super_list.append(ranked_df)
    ranked_df = pd.concat(super_list, axis=0)
else:
    # relative to market
    ranked_df = fu.rank_group(rank_df, low_better, high_better)

ranked_df.loc[:, 'eq_wgt_mean_rank'] = ranked_df.mean(axis=1)
ranked_df.sort_values('eq_wgt_mean_rank')