In [None]:
# imports
%reload_ext autoreload
%autoreload 2
%matplotlib inline 

from matplotlib import pyplot as plt

from utils.basic_utils import *
from utils.pricing import *
import utils.fundamental as fu

pd.options.display.float_format = '{:,.2f}'.format

In [None]:
# use the latest saved data for profile and quote info
dates = read_dates('quote')
tgt_date = dates[-1] # last date saved in S3

quotes = load_csvs('quote_consol', [tgt_date])
quotes.set_index('symbol', drop=False, inplace=True)

profile = load_csvs('summary_detail', ['assetProfile'])
profile.set_index('symbol', drop=False, inplace=True)

keystats = load_csvs('summary_detail', ['defaultKeyStatistics/' + str(tgt_date)])
keystats.set_index('symbol', drop=False, inplace=True)
finstats = load_csvs('summary_detail', ['financialData/' + str(tgt_date)])
finstats.set_index('symbol', drop=False, inplace=True)

In [None]:
# read from S3
val_df = pd.read_csv(csv_load(f'valuation/waterfall/{tgt_date}'), parse_dates=True)
val_df.storeDate = pd.to_datetime(val_df.storeDate, unit='s')
val_df.set_index(['storeDate', 'symbol'], inplace=True)
val_df.dropna(subset=['premDisc'], inplace=True)
tickers = val_df.reset_index().symbol

In [None]:
val_df.columns

In [None]:
q_cols = ['forwardPE', 'trailingPE', 'marketCap', 'regularMarketPrice']
p_cols = ['sector', 'industry', 'country']
k_cols = ['pegRatio', 'shortPercentOfFloat']
f_cols = ['earningsGrowth', 'recommendationMean', 'targetMeanPrice', 'targetMedianPrice', 'numberOfAnalystOpinions']
for c in p_cols: val_df.loc[:, c] = tickers.map(profile[c].to_dict()).values
for c in q_cols: val_df.loc[:, c] = tickers.map(quotes[c].to_dict()).values
for c in k_cols: val_df.loc[:, c] = tickers.map(keystats[c].to_dict()).values
for c in f_cols: val_df.loc[:, c] = tickers.map(finstats[c].to_dict()).values

In [None]:
large_vals = [
    'netDebt', 'npvSteadyCF', 'equityValue', 
    'currentValue', 'totalReinvCapital', 'steadyCF', 'projCashROE',
    'npvGrowth', 'npvGrowthCF', 'projCashROE', 'marketCap']
# convert large values to billions
val_df.loc[:, large_vals] = val_df.loc[:, large_vals] / 10**9

# high level pre-processing / clean up
val_df.loc[:, 'pegRatio'] = val_df.forwardPE / (val_df.growthRate * 100)
div_cols = ['targetMeanPrice', 'targetMedianPrice']
val_df.loc[:, div_cols] = val_df[div_cols].div(
    tickers.map(
        quotes.regularMarketPrice.to_dict()).values, axis=0)
val_df.replace([np.inf, -np.inf], np.nan, inplace=True)
val_df.shape

In [None]:
# remove outliers from universe
show = ['premDisc', 'pegRatio', 'forwardPE', 'growthRate', 
        'shortPercentOfFloat', 'targetMedianPrice']
treshold = 3
no_out_df = fu.excl_outliers(val_df, show, treshold)
no_out_df.shape

In [None]:
# mask results by valuation
mask_on = True
mask = (val_df.premDisc > 0) & (val_df.premDisc < 3)
clean_df = no_out_df.loc[mask].copy() if mask_on else no_out_df.copy()
clean_df.shape

In [None]:
# one company sampling
ticker = 'AAPL'
print(ticker in tickers.values, ticker in clean_df.index.levels[1])
val_df.loc[(slice(None),ticker),:].T

In [None]:
# aggregate / deep dive metrics
gby = ['sector',]
gby = ['sector', 'industry', 'symbol'] # by uncommenting this line deep dive into sectors below
zoom_in = [x for x in clean_df.sector.unique()] # all sectors
zoom_in = ['Healthcare'] # or just a few selected
agg_functions = 'median'
sort_by = ['premDisc']

ind_sum_df = clean_df.groupby(by=gby).agg(agg_functions)[show]
if 'symbol' in gby: ind_sum_df = ind_sum_df.loc[(zoom_in,),:]
else: ind_sum_df = ind_sum_df
ind_sum_df.sort_values(by=sort_by)

In [None]:
# rank results relative to sector or entire universe
low_better = ['premDisc', 'pegRatio', 'forwardPE', 'shortPercentOfFloat']
high_better = ['earningsGrowth', 'targetMedianPrice']
cols = low_better + high_better

by_sector_on = True

rank_df = clean_df.copy()
if by_sector_on:
    # relative to sector
    super_list = []
    for s in rank_df.sector.unique():
        group = rank_df.loc[rank_df.sector == s, cols]
        ranked_df = fu.rank_group(group, low_better, high_better)
        super_list.append(ranked_df)
    ranked_df = pd.concat(super_list, axis=0)
else:
    # relative to market
    ranked_df = fu.rank_group(rank_df, low_better, high_better)

ranked_df.loc[:, 'eq_wgt_mean_rank'] = ranked_df.mean(axis=1)
ranked_df.sort_values('eq_wgt_mean_rank')