In [1]:
# imports
%reload_ext autoreload
%autoreload 2
%matplotlib inline 

import pandas as pd
import numpy as np
import os

from utils.basic_utils import config, read_dates, load_csvs, csv_load, excl, csv_store, numeric_cols
from utils.pricing import roll_vol, load_px_close, discret_rets, get_ind_index
from utils.fundamental import filter_cols, filter_cols
from scipy.stats import linregress

import matplotlib as mpl
import matplotlib.pyplot as plt

pd.options.display.float_format = '{:,.2f}'.format

Loading utils/config.json


In [2]:
# utility functions

conf_slope = lambda y: linregress(range(len(y)), y).slope

def top_recomm_by_class(pred_df, labels, top_items):
    mask = pred_df.hard_pred_label == pred_df.soft_pred_label
    agree_df = pred_df.loc[mask].drop_duplicates()
    # should add sector and industries, group for allocation insights
    # should add marketcap, beta, etc, group for risk exposure insights
    label_mask = agree_df.soft_pred_label.isin(['bear', 'short', 'long', 'bull'])
    return agree_df.loc[label_mask]\
        .sort_values(by='soft_confidence', ascending=False)\
        .groupby(by='soft_pred_label').head(top_items)\
        .sort_values(by='soft_pred_class')

def pred_distrib(pred_df, count_col):
    dist = pd.value_counts(pred_df[count_col]).to_frame()
    dist['weights'] = dist / dist.sum()
    return dist

def add_desc_stats(df, descriptive_cols):
    
    tickers = df.index
    for k in descriptive_cols.keys():
        desc_df, cols = descriptive_cols[k]['df'], descriptive_cols[k]['columns']
        for c in cols: df.loc[:, c] = tickers.map(desc_df[c].to_dict()).values 

    df = clean_df(df, large_vals, div_cols)

    df.loc[:, 'pegRatio'] = df.forwardPE / (val_df.growthRate * 100)
    df.loc[:, 'size'] = discret_rets(df.marketCap, mkt_cap_cuts, mkt_cap_labels)
        
    return df

def clean_df(df, large_vals, div_cols):
    # convert large values to billions
    df.loc[:, large_vals] = df.loc[:, large_vals] / 10**9
    df.loc[:, div_cols] = df[div_cols].div(df.regularMarketPrice, axis=0)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    return df

def ml_votes_by_class(fndm_dfs, group_mask):
    super_list = []
    for key in fndm_dfs.keys():
        df = fndm_dfs[key]
        super_list.append(
            fndm_dfs[key].loc[:, group_mask].set_index(re_index_col)\
            .rename(columns={group_mask[0]: key}))
    df = pd.concat(super_list, axis=1, sort=False)
    # df.apply(pd.value_counts).loc[fwd_ret_labels].style.background_gradient(cmap='RdYlGn')
    return df.T.apply(pd.value_counts).T

q_group = lambda x, q: x.quantile(q)

ticker_across_mls = lambda ticker, df: df.set_index('symbol').loc[ticker]

# equal weight ml_confidence
equal_wgt_conf = lambda x: ticker_across_mls(x[0]).loc[:, x[1]].mean()

# take index and class, and calculate a dot product, weighted confidence
score_wgt_conf = lambda x: ticker_across_mls(x[0], lf_df).loc[
    :, [x[1], 'model']].set_index('model').T.mul(model_weights).sum(1).iloc[0]

def load_s3_preds(s3_path, tgt_date, key_list, eqty_symbols, verbose=True):
    """ Read ML prediction files """
    print(f'Loading files for {tgt_date}')
    fndm_dfs = {k: pd.read_csv(csv_load(f'{s3_path[k]}{tgt_date}'), 
            index_col='pred_date', parse_dates=True) for k in key_list}

    super_list = []
    for key in fndm_dfs.keys(): 
        fndm_dfs[key].loc[:, 'model'] = key
        fndm_dfs[key] = fndm_dfs[key].loc[fndm_dfs[key].symbol.isin(eqty_symbols), :]
        df = fndm_dfs[key]
        if verbose: print(f'{key.upper()}, {df.symbol.shape[0]} unique symbols')
        super_list.append(df)
        
    return pd.concat(super_list, axis=0)

def get_wtd_ML_results(lf_df, model_weights):
    """ calculate model weighted class and confidence level for ranking / sorting """
    spc_df = lf_df.pivot_table(
        index=['symbol'], columns=['model'], 
        values=['pred_class'], aggfunc='mean')\
        ['pred_class'][list(model_weights.index)]
    wgt_class_df = (spc_df * model_weights.T).sum(axis=1)
    sc_df = lf_df.loc[:, ['symbol', 'confidence', 'model']]\
        .pivot(index='symbol', columns='model', values='confidence')\
                   .loc[:, model_weights.index]
    wgt_conf_df = (sc_df * model_weights.T).sum(axis=1)
    wtg_df = pd.concat([wgt_class_df, wgt_conf_df], axis=1, sort=False)
    wtg_df.columns = ['wtg_class', 'wtg_ML_prob']
    wtg_df[lf_df.index.name] = lf_df.index.unique()[0]
    
    return wtg_df

def visualize_MLs(lf_df, key_list):
    pvt_df = lf_df.pivot_table(
        index=['model'], columns=['soft_pred_label'], 
        values=['soft_confidence'], aggfunc='count')
    class_distrib = pvt_df.div(pvt_df.sum(axis=1), axis=0)['soft_confidence'][fwd_ret_labels]
    # plot mean class across models    
    class_distrib.mean(axis=0).plot.barh(title='Mean prediction by class',);
    plt.savefig('../images/ML_mean_weight.png', dpi=300, rot=0, bbox_inches='tight')
    # plot class distribution by model    
    class_distrib.plot(title='Prediction distribution by model', kind='barh', stacked=True, cmap='RdYlGn');
    plt.savefig('../images/ML_class_distrib.png', dpi=300, rot=0, bbox_inches='tight')
    return class_distrib.T[key_list]

def add_rank(df, show, lib_cols):
    lib_rank = df[lib_cols].rank(method='dense')
    # L_df[low_is_better]
    hib_rank = df[excl(show, lib_cols)].rank(numeric_only=True, ascending=False, method='dense')
    # L_df[excl(show, low_is_better)]
    df['rank'] = lib_rank.join(hib_rank).mean(1)
    return df[['rank'] + show].sort_values(by='rank')

In [3]:
# s3 paths
s3_path = {
    'macro': 'recommend/macro_ML/',
    'px_mom': 'recommend/micro_ML/',
    'bottom_up': 'recommend/bottomup_ML/',
    'fin_data': 'recommend/fdmn_ML-fin_data/',
    'key_statistics': 'recommend/fdmn_ML-key_statistics/',
    'eps_trend': 'recommend/fdmn_ML-eps_trend/',
    'eps_estimates': 'recommend/fdmn_ML-eps_estimates/',
    'day_quote': 'recommend/fdmn_ML-day_quote/',
    'iv_value': 'valuation/waterfall/'
}

In [4]:
# environment variables
bench = '^GSPC'
fwd_ret_labels = ["bear", "short", "neutral", "long", "bull"]
show_classes = ['bear', 'short', 'long', 'bull']
min_confidence = 0.5
max_rows = 20

key = 'fin_data'
agg_funcs = ['count', 'median', 'max']
mkt_cap_cuts = [0, 0.3, 2, 10, 300, 5000]
mkt_cap_labels = ['micro', 'small', 'mid', 'large', 'mega']

re_index_col = 'symbol'
value_col = 'soft_confidence'
show = ['symbol', 'soft_confidence']
mask_col, class_value = 'soft_pred_label', 'bull'
hard_vote, soft_vote = 'hard_pred_label', 'soft_pred_label'

large_vals = ['marketCap']
div_cols = ['targetMeanPrice', 'targetMedianPrice']

In [5]:
# context / descriptive data
dates = read_dates('quote')
tgt_date = dates[-1] # last date saved in S3

quotes = load_csvs('quote_consol', [tgt_date])

dollar_vol = ((quotes.averageDailyVolume10Day * quotes.regularMarketPrice) / 10**6)
quotes.loc[(dollar_vol > dollar_vol.quantile(0.2)) & (quotes['quoteType'] == 'EQUITY')] # the delta
mask = (dollar_vol > dollar_vol.quantile(0.1)) & (quotes['quoteType'] == 'EQUITY') & (quotes['regularMarketPrice'] > 5)
eqty_symbols = excl(quotes.loc[mask].symbol, ['GOOGL'])
# quotes.loc[quotes['quoteType'] == 'EQUITY'].index.difference(eqty_symbols) # the delta

profile = load_csvs('summary_detail', ['assetProfile'])
keystats = load_csvs('summary_detail', ['defaultKeyStatistics/' + str(tgt_date)])
finstats = load_csvs('summary_detail', ['financialData/' + str(tgt_date)])

for df in (quotes, profile, keystats, finstats):
    df.set_index('symbol', drop=False, inplace=True)
    
path = 'valuation/waterfall/'
print(f'Loading file {path}{tgt_date}')
val_df = pd.read_csv(csv_load(path+tgt_date), parse_dates=True)
val_df.set_index('symbol', inplace=True)
val_df.dropna(subset=['premDisc'], inplace=True)
val_df = val_df.loc[(np.abs(val_df.premDisc) < val_df.premDisc.median() * 3).values, :]

descriptive_cols = {
    'quotes': { 
        'df': quotes, 
        'columns': [
            'shortName', 'forwardPE', 'trailingPE', 
            'marketCap', 'regularMarketPrice', 
            'fiftyDayAverageChangePercent', 'fiftyTwoWeekHighChangePercent', 
            'fiftyTwoWeekLowChangePercent', 'twoHundredDayAverageChangePercent'
        ],},
    'profile': { 
        'df': profile, 
        'columns': ['sector', 'industry', 'country'],
    },
    'keystats': { 
        'df': keystats, 
        'columns': [
            'pegRatio', 'shortPercentOfFloat', 
            'beta', 'enterpriseToEbitda', 'enterpriseToRevenue'],
    },
    'finstats': { 
        'df': finstats, 
        'columns': [
            'earningsGrowth', 'recommendationMean', 
            'targetMeanPrice', 'targetMedianPrice', 
            'numberOfAnalystOpinions'],
    },
    'valuation': { 
        'df': val_df, 
        'columns': ['premDisc', 'growthRate'],
    },
}

Loading file quote/csv/2019-05-14
Loading file summary-categories/assetProfile
Loading file summary-categories/defaultKeyStatistics/2019-05-14
Loading file summary-categories/financialData/2019-05-14
Loading file valuation/waterfall/2019-05-14


In [6]:
# get latest pricing file from inferece server
px_close_ds = 'universe-px-ds'
tmp_path = '../tmp/'
os.makedirs(tmp_path, exist_ok=True)
!scp -i ~/.ssh/qc_infra.pem ubuntu@54.184.34.152:~/inception/tmp/{px_close_ds} {tmp_path}{px_close_ds}

universe-px-ds                                100%   18MB   8.2MB/s   00:02    


### Macro / Risk Exposure

In [12]:
# Read macro predictions
key = 'macro'
show = ['^GSPC', 'pred_label', 'pred_label', 'confidence',
 'bear', 'short', 'neutral', 'long', 'bull']
# show = ['hard_pred_label','soft_pred_label','soft_confidence','bear','short','neutral','long','bull']
macro_df = pd.read_csv(csv_load(f'{s3_path[key]}{tgt_date}'), index_col='pred_date', parse_dates=True)
macro_df.loc[:, show].round(3).tail()

Unnamed: 0_level_0,^GSPC,pred_label,pred_label,confidence,bear,short,neutral,long,bull
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-05-08,2879.42,long,long,0.99,0.0,0.0,0.01,0.99,0.01
2019-05-09,2870.72,long,long,1.0,0.0,0.0,0.0,1.0,0.0
2019-05-10,2881.4,long,long,1.0,0.0,0.0,0.0,1.0,0.0
2019-05-13,2811.87,long,long,1.0,0.0,0.0,0.0,1.0,0.0
2019-05-14,2834.41,long,long,1.0,0.0,0.0,0.0,1.0,0.0


In [None]:
# Visualize macro predictions
pred_df = macro_df
pre_class_cols = filter_cols(pred_df.columns, "pred_class")
pred_df.loc[:,[bench] + pre_class_cols].plot(
    secondary_y=pre_class_cols, 
    figsize=(15, 5));
plt.savefig('../images/macro_chart.png', dpi=300, rot=0)
pred_df[fwd_ret_labels].plot.area(
        title='ML Model Probabilities',
        figsize=(15, 2), ylim=(0, 1), cmap='RdYlGn', rot=0);
f'Confidence Mean: {pred_df["soft_confidence"].mean().round(3)}, \
    Median {pred_df["soft_confidence"].median().round(3)}'
plt.savefig('../images/macro_prob.png', dpi=300)

### Bottom-up MLs

In [9]:
#
key_list = ['px_mom', 'bottom_up']
lf_df = load_s3_preds(s3_path, tgt_date, key_list, eqty_symbols)

# Model score weights 
ml_score_dict = {
    'px_mom': 0.90, 'bottom_up': 0.87,
}
cut_off_confidence = 0.6
model_scores = np.array(list(map(ml_score_dict.get, list(key_list))))
adj_weights = (model_scores - cut_off_confidence) / (model_scores - cut_off_confidence).sum()
model_weights = pd.Series({x:y for x,y in zip(list(key_list), adj_weights)})
model_weights

Loading files for 2019-05-14
PX_MOM, 1078 unique symbols
BOTTOM_UP, 1056 unique symbols


px_mom      0.53
bottom_up   0.47
dtype: float64

### Recomendations

#### Weighted models

In [10]:
long_list = ['BABA', 'FB', 'AAPL', 'TSLA']

In [11]:
# weighted appraoch of two models
confidence_treshold = 0.7
filter_lf_df = lf_df.loc[(lf_df.confidence > confidence_treshold)]
wtg_df = get_wtd_ML_results(filter_lf_df, model_weights)
show = [
    'wtg_class', 'wtg_ML_prob', 'targetMedianPrice', 
    'premDisc', 'forwardPE', 'pegRatio', 'enterpriseToEbitda', 'enterpriseToRevenue', 'growthRate', 
    'fiftyDayAverageChangePercent', 'fiftyTwoWeekHighChangePercent',
    'fiftyTwoWeekLowChangePercent', 'twoHundredDayAverageChangePercent',
    'beta', 'enterpriseToEbitda', 'enterpriseToRevenue',
    'shortName', 'country', 'sector', 'industry', 'size',]

In [12]:
wtg_df.loc[wtg_df.index.isin(long_list)].sort_values(by='wtg_class')

Unnamed: 0_level_0,wtg_class,wtg_ML_prob,pred_date
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BABA,0.95,0.43,2019-05-14
FB,0.95,0.41,2019-05-14
AAPL,2.0,0.94,2019-05-14
TSLA,2.0,0.94,2019-05-14


In [13]:
print(f'Long recommendations for {tgt_date}')

L_df = wtg_df.loc[(wtg_df['wtg_class'] > 3) & (wtg_df['wtg_ML_prob'] > confidence_treshold)]\
    .sort_values(by='wtg_ML_prob', ascending=False)

long_low_is_better = [
    'premDisc', 'forwardPE', 'pegRatio', 
    'fiftyDayAverageChangePercent', 'fiftyTwoWeekHighChangePercent',
    'fiftyTwoWeekLowChangePercent', 'twoHundredDayAverageChangePercent', 
    'beta', 'enterpriseToEbitda', 'enterpriseToRevenue',
]

L_df = add_desc_stats(L_df, descriptive_cols).sort_values(['wtg_class'], ascending=False)
L_df = add_rank(L_df, show, long_low_is_better)
L_df.T

Long recommendations for 2019-05-14


symbol,RLGY,STT,HPQ,RGNX,TTM,MYL,BBVA,ING,ITUB,SNV,...,CHD,ALLE,CINF,MKC,TREE,VMC,LMT,APD,QCOM,OLLI
rank,20.71,22.30,23.71,23.93,26.44,26.46,26.67,26.78,27.67,30.00,...,67.36,67.93,68.00,68.29,69.29,69.82,70.85,71.79,76.45,79.45
wtg_class,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05,...,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05,3.05
wtg_ML_prob,0.98,1.00,0.93,0.87,0.98,0.78,0.98,0.97,0.98,0.99,...,0.94,0.92,0.86,0.89,0.98,0.96,0.88,0.86,0.87,0.98
targetMedianPrice,1.34,1.19,1.26,1.90,1.18,1.57,1.33,1.67,1.19,1.27,...,0.99,1.02,0.93,0.96,1.12,1.08,1.10,1.07,1.14,0.91
premDisc,-0.74,,0.56,0.96,,-0.65,,,,0.44,...,2.97,2.41,0.47,2.75,2.00,,3.01,1.85,,
forwardPE,6.85,8.61,8.06,-15.34,2.83,4.49,9.33,11.54,9.21,8.25,...,26.94,18.72,26.02,27.25,38.41,22.46,13.51,21.96,16.19,39.39
pegRatio,3.94,,0.74,-0.24,,inf,,,,0.45,...,3.76,2.00,4.82,2.07,1.24,,inf,10.92,,
enterpriseToEbitda,8.56,,6.32,-29.50,,7.09,,,,,...,20.94,17.57,12.30,23.21,49.84,17.75,12.32,15.38,16.49,34.55
enterpriseToRevenue,0.89,-1.61,0.52,15.28,,2.23,,,,5.72,...,4.79,3.89,2.49,4.67,6.39,4.51,1.94,5.27,5.07,4.91
growthRate,0.02,,0.11,0.63,,0.00,,,,0.18,...,0.07,0.09,0.05,0.13,0.31,,0.00,0.02,,


In [91]:
print(f'Short recommendations for {tgt_date}')

S_df = wtg_df.loc[(wtg_df['wtg_class'] < 1) & (wtg_df['wtg_ML_prob'] > confidence_treshold)]\
    .sort_values(by='wtg_ML_prob', ascending=False)

short_low_is_better = ['wtg_class', 'targetMedianPrice', ]

S_df = add_desc_stats(S_df, descriptive_cols).sort_values(['wtg_class'], ascending=False)
S_df = add_rank(S_df, show, short_low_is_better)
S_df.T

Short recommendations for 2019-05-13


symbol,RGEN,SBUX,MSI,ACM,PDCO,NWSA,ATVI,VLO,FTI,TOT,ENR,BIDU,X,MIK
rank,2.27,4.50,4.64,4.73,4.79,6.50,6.79,6.86,7.21,8.07,8.36,9.00,9.21,9.79
wtg_class,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95,0.95
wtg_ML_prob,0.99,0.82,0.86,1.00,0.95,0.94,0.78,1.00,0.88,0.86,0.91,0.80,0.98,0.84
targetMedianPrice,1.04,1.01,1.08,1.12,0.99,1.22,1.19,1.27,1.31,1.34,1.30,1.37,1.23,1.57
premDisc,,0.57,1.65,,2.02,1.52,1.86,1.07,-2.04,0.77,,0.82,0.47,0.71
forwardPE,65.01,24.97,16.87,10.33,14.47,26.08,17.01,7.33,14.01,8.36,12.45,13.96,7.57,4.18
pegRatio,,0.83,1.87,,14.21,8.62,inf,1.80,0.93,0.41,,0.80,0.45,2.13
enterpriseToEbitda,67.08,19.85,15.25,10.23,12.92,7.12,13.51,6.86,7.43,5.29,15.63,,3.21,5.61
enterpriseToRevenue,15.25,3.99,3.93,0.42,0.50,0.78,4.53,0.39,0.84,0.92,3.18,,0.32,0.81
growthRate,,0.30,0.09,,0.01,0.03,0.00,0.04,0.15,0.21,,0.18,0.17,0.02


#### Single model

In [64]:
# single model
active_model = key_list[0]
one_model = lf_df.loc[(lf_df.model == active_model)]
one_model = one_model.infer_objects()
print(f'Recommendatiosn for {active_model.upper()}')
one_model.loc[one_model.symbol.isin(long_list)].sort_values(by='pred_class')

Recommendatiosn for PX_MOM


Unnamed: 0_level_0,symbol,pred_class,pred_label,confidence,bear,short,neutral,long,bull,model
pred_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-05-10,WB,0,bear,1.0,1.0,0.0,0.0,0.0,0.0,px_mom
2019-05-10,YY,0,bear,0.6,0.59,0.04,0.17,0.0,0.19,px_mom
2019-05-10,MOMO,1,short,0.58,0.27,0.58,0.15,0.0,0.0,px_mom
2019-05-10,BABA,2,neutral,0.97,0.0,0.0,0.97,0.03,0.0,px_mom
2019-05-10,FB,2,neutral,0.57,0.06,0.03,0.57,0.28,0.06,px_mom
2019-05-10,TSLA,2,neutral,0.86,0.0,0.14,0.86,0.0,0.0,px_mom
2019-05-10,CTRP,4,bull,0.53,0.0,0.0,0.0,0.47,0.53,px_mom
2019-05-10,VIPS,4,bull,0.99,0.0,0.0,0.0,0.01,0.99,px_mom


In [None]:
print(f'Long recommendations for {tgt_date}')
mask = (one_model['pred_class'] > 2) & (one_model['confidence'] > confidence_treshold)
L_df = one_model.loc[mask].sort_values(by='confidence', ascending=False)
L_df

In [None]:
print(f'Short recommendations for {tgt_date}')
mask = (one_model['pred_class'] < 2) & (one_model['confidence'] > confidence_treshold)
S_df = one_model.loc[mask].sort_values(by='confidence', ascending=False)
S_df

#### Daily recommendations

In [84]:
lf_df = load_s3_preds(s3_path, tgt_date, key_list, eqty_symbols)
wtg_df = get_wtd_ML_results(lf_df, model_weights)
wtg_df = add_desc_stats(wtg_df, descriptive_cols)

Loading files for 2019-05-08
PX_MOM, 592 unique symbols
BOTTOM_UP, 25 unique symbols


In [85]:
# todays long positions
show = ['wtg_class','wtg_ML_prob','forwardPE', 'industry', 'targetMedianPrice', 'size']
L_df = wtg_df.loc[wtg_df['wtg_class'] > 3].sort_values(by='wtg_ML_prob', ascending=False).head(max_rows)
print(f'Long recommendations for {tgt_date}')
L_df.index

Long recommendations for 2019-05-08


Index([], dtype='object', name='symbol')

In [86]:
# todays short positions
S_df = wtg_df.loc[wtg_df['wtg_class'] < 1].sort_values(by='wtg_ML_prob', ascending=False).head(max_rows)
print(f'Short recommendations for {tgt_date}')
S_df

Short recommendations for 2019-05-08


Unnamed: 0_level_0,wtg_class,wtg_ML_prob,pred_date,shortName,forwardPE,trailingPE,marketCap,regularMarketPrice,fiftyDayAverageChangePercent,fiftyTwoWeekHighChangePercent,...,enterpriseToEbitda,enterpriseToRevenue,earningsGrowth,recommendationMean,targetMeanPrice,targetMedianPrice,numberOfAnalystOpinions,premDisc,growthRate,size
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ANET,0.77,0.77,2019-05-08,"Arista Networks, Inc.",25.03,56.72,20.63,269.33,-0.14,-0.19,...,25.22,8.47,0.38,2.2,1.15,1.11,27.0,2.18,0.47,large
BILI,0.46,0.73,2019-05-08,Bilibili Inc.,288.17,,5.64,17.29,-0.04,-0.24,...,,,,1.8,1.24,1.3,12.0,-0.74,1.25,mid
MAT,0.91,0.23,2019-05-08,"Mattel, Inc.",115.25,,3.98,11.53,-0.11,-0.36,...,26.75,1.54,,2.7,1.25,1.21,12.0,-0.73,0.0,mid
YUM,0.46,0.23,2019-05-08,"Yum! Brands, Inc.",23.76,23.75,30.9,101.0,-0.0,-0.03,...,21.45,7.38,-0.35,2.5,1.03,1.02,17.0,4.24,0.0,large
IRDM,0.91,0.23,2019-05-08,Iridium Communications Inc,-142.36,,3.06,27.05,0.0,-0.04,...,17.65,8.97,,2.4,0.98,1.11,5.0,-2.36,0.05,mid
TM,0.46,0.23,2019-05-08,Toyota Motor Corporation,10.64,7.45,173.79,121.83,-0.0,-0.12,...,,,-0.8,1.0,1.21,1.21,2.0,0.39,0.0,large
SON,0.46,0.23,2019-05-08,Sonoco Products Company,17.03,20.44,6.34,63.35,0.03,-0.02,...,10.79,1.46,0.0,2.7,0.94,0.94,10.0,2.5,0.14,mid
MKC,0.46,0.23,2019-05-08,"McCormick & Company, Incorporat",27.27,31.34,20.41,154.64,0.05,-0.01,...,22.78,4.58,-0.65,2.7,0.96,0.96,10.0,2.78,0.13,large
ZTS,0.46,0.23,2019-05-08,Zoetis Inc.,26.31,35.7,48.87,102.1,0.01,-0.02,...,24.88,9.18,-0.1,1.9,1.07,1.09,11.0,3.4,0.37,large
DVA,0.0,0.23,2019-05-08,DaVita Inc.,9.9,56.18,8.6,51.69,-0.05,-0.35,...,9.96,1.82,,2.0,1.29,1.32,9.0,2.79,0.02,mid


#### Company details

In [87]:
ticker = 'TSLA'
show = [
    'hard_pred_label','soft_pred_label','soft_confidence',
    'bear', 'short', 'neutral', 'long', 'bull', 'model']
wgt_input = (ticker, 'soft_confidence')
f'{quotes.loc[ticker].shortName}, {profile.loc[ticker].sector}, {profile.loc[ticker].industry}'

'Tesla, Inc., Consumer Cyclical, Auto Manufacturers'

In [88]:
# Company view across models
print(f'Model predictions for {ticker} on {tgt_date}')
print(f'Weighted ML confidence level: {score_wgt_conf(wgt_input)}')
lf_df.loc[lf_df['symbol'] == ticker, show]

Model predictions for TSLA on 2019-05-08


IndexingError: Too many indexers

In [None]:
quotes.loc[ticker]

### Universe Intrinsic Value Analysis

In [207]:
# Value companies in the universe
iv_show = ['premDisc', 'forwardPE', 'targetMedianPrice', 'pegRatio', 'growthRate']
iv_sizes = ['mega', 'large', 'mid']

iv_mask = (val_df.premDisc < 0.95) & (val_df.premDisc > 0)
iv_mapper = val_df.loc[iv_mask, 'premDisc']
iv_df = add_desc_stats(iv_mapper.to_frame(), descriptive_cols)
iv_df = iv_df.loc[iv_df['size'].isin(iv_sizes) & (iv_df.targetMedianPrice > 1.1), :]

iv_df.groupby(by=['sector', ]).agg(['count', 'median'])\
    .sort_values(by=(iv_show[0], 'median'))[iv_show]

Unnamed: 0_level_0,premDisc,premDisc,forwardPE,forwardPE,targetMedianPrice,targetMedianPrice,pegRatio,pegRatio,growthRate,growthRate
Unnamed: 0_level_1,count,median,count,median,count,median,count,median,count,median
sector,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
Financial Services,19,0.35,19,9.66,19,1.12,19,1.04,19,0.07
Basic Materials,6,0.57,6,8.11,6,1.22,6,0.4,6,0.2
Industrials,9,0.58,9,10.28,9,1.15,9,0.82,9,0.21
Healthcare,4,0.59,4,11.55,4,1.49,4,0.27,4,0.53
Energy,10,0.64,9,8.54,10,1.28,9,0.47,10,0.18
Utilities,2,0.73,2,3.86,2,419.01,2,inf,2,0.04
Consumer Cyclical,18,0.74,18,10.3,18,1.19,18,2.26,18,0.04
Technology,13,0.78,13,14.13,13,1.14,13,0.85,13,0.16
Communication Services,4,0.8,4,8.86,4,1.25,4,6.07,4,0.01
Consumer Defensive,3,0.85,3,9.55,3,1.11,3,0.48,3,0.25


In [219]:
show = [
    'targetMedianPrice', 
    'premDisc', 'forwardPE', 'pegRatio', 'enterpriseToEbitda', 'enterpriseToRevenue', 'growthRate', 
    'fiftyDayAverageChangePercent', 'fiftyTwoWeekHighChangePercent',
    'fiftyTwoWeekLowChangePercent', 'twoHundredDayAverageChangePercent',
    'beta', 'enterpriseToEbitda', 'enterpriseToRevenue',
    'shortName', 'country', 'sector', 'industry', 'size',]
summary = add_rank(iv_df, show, long_low_is_better)
# add_rank(iv_df, show, long_low_is_better).tail(10)

Unnamed: 0_level_0,rank,targetMedianPrice,premDisc,forwardPE,pegRatio,enterpriseToEbitda,enterpriseToRevenue,growthRate,fiftyDayAverageChangePercent,fiftyTwoWeekHighChangePercent,fiftyTwoWeekLowChangePercent,twoHundredDayAverageChangePercent,beta,enterpriseToEbitda,enterpriseToRevenue,shortName,country,sector,industry,size
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
TX,13.75,1.35,0.29,5.75,0.26,3.15,0.70,0.22,-0.03,-0.38,0.10,-0.10,0.19,3.15,0.70,Ternium S.A. Ternium S.A.,Luxembourg,Basic Materials,Steel,mid
HFC,18.75,1.27,0.60,7.65,0.47,3.84,0.45,0.16,-0.07,-0.45,0.01,-0.16,1.08,3.84,0.45,HollyFrontier Corporation,United States,Energy,Oil & Gas Refining & Marketing,mid
PSX,21.08,1.31,0.49,8.08,0.25,9.02,0.49,0.32,-0.09,-0.29,0.12,-0.08,0.78,9.02,0.49,Phillips 66,United States,Energy,Oil & Gas Refining & Marketing,large
X,22.00,1.33,0.54,7.02,0.42,3.31,0.33,0.17,-0.07,-0.58,0.17,-0.23,2.37,3.31,0.33,United States Steel Corporation,United States,Basic Materials,Steel,mid
JWN,25.91,1.21,0.81,10.15,inf,5.05,0.50,0.00,-0.07,-0.40,0.03,-0.18,0.28,5.05,0.50,"Nordstrom, Inc.",United States,Consumer Cyclical,Department Stores,mid
NKTR,25.92,2.30,0.71,-13.06,-0.21,6.47,3.79,0.63,-0.02,-0.65,0.12,-0.13,3.42,6.47,3.79,Nektar Therapeutics,United States,Healthcare,Biotechnology,mid
ENIC,26.30,836.85,0.69,0.02,0.00,,,0.08,-0.05,-0.19,0.16,-0.03,0.48,,,Enel Chile S.A.,Chile,Utilities,Utilities - Regulated Electric,mid
STLD,26.33,1.23,0.59,8.91,0.38,4.41,0.74,0.23,-0.04,-0.38,0.13,-0.08,1.49,4.41,0.74,"Steel Dynamics, Inc.",United States,Basic Materials,Steel,mid
M,26.64,1.10,0.71,7.94,inf,4.97,0.42,0.00,-0.05,-0.45,0.02,-0.17,0.27,4.97,0.42,Macy's Inc,United States,Consumer Cyclical,Department Stores,mid
JHG,27.09,1.12,0.53,8.48,0.18,5.42,1.63,0.48,-0.10,-0.35,0.18,-0.04,,5.42,1.63,Janus Henderson Group plc,United Kingdom,Financial Services,Asset Management,mid


In [209]:
iv_df.loc[:, iv_show + ['sector', 'size']].sort_values(
    by=['size', 'premDisc', 'sector'], ascending=False).head(20)

Unnamed: 0_level_0,premDisc,forwardPE,targetMedianPrice,pegRatio,growthRate,sector,size
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GOOGL,0.91,22.25,1.13,1.37,0.16,Technology,mega
GOOG,0.91,22.08,1.14,1.36,0.16,Technology,mega
FB,0.72,20.84,1.16,0.62,0.34,Technology,mega
REPYY,0.95,,2.29,,0.17,Energy,large
BKNG,0.95,16.1,1.11,0.92,0.17,Consumer Cyclical,large
EQNR,0.94,10.06,1.25,0.63,0.16,Energy,large
WBA,0.93,8.97,1.1,inf,0.0,Consumer Defensive,large
NTAP,0.91,14.13,1.13,2.9,0.05,Technology,large
INTC,0.91,11.17,1.11,1.11,0.1,Technology,large
CCL,0.91,10.82,1.18,2.27,0.05,Consumer Cyclical,large


### Other Experiments

In [197]:
# correlation to FXI (China)
px_close[['FXI', 'BABA', 'FB', 'GOOG', 'CTRP'] + list(L_df.index)].tail(60).corr()['FXI'].sort_values()

SQ     -0.04
OMCL    0.09
FFIV    0.11
TYL     0.30
RNG     0.40
GLW     0.45
FB      0.46
WB      0.59
FTNT    0.63
GOOG    0.70
CTRP    0.74
MOMO    0.77
NVDA    0.78
LITE    0.78
INTU    0.79
INTC    0.79
GRMN    0.80
FTV     0.82
YY      0.83
BABA    0.84
FXI     1.00
Name: FXI, dtype: float64