In [1]:
import sys
import os
sys.path.append('../') # root directory

In [2]:
from constants import constant
import pandas as pd
import numpy as np
import constants.constant as constant
import utils.commonutils as utils
import utils.scraperutils as scraperutils
from constants.namedtuples import Column
from write.JsonFileWriter import write
from read.JsonFileReader import parse
from pathlib import Path
from tqdm import tqdm
from itertools import chain

In [3]:
os.listdir('data')

['MorningStarStat.json', '.ipynb_checkpoints']

In [4]:
stocks = parse('data/MorningStarStat.json')
KEY_STATS = 'keyStats'
FIN_STATS = 'financeStats'
TICKER = 'symbol'
PERIOD_ALL = 100
NULL = '—'
len(stocks)

530

In [5]:
KEY_STAT_NAME = 'name'
KEY_STAT_VALUES = 'values'
KEY_STAT_PERIOD = 'period'
KEY_STAT_VALUE = 'value'

In [6]:
stat_name = 'Operating Margin'
stock_ticker = 'TAP'

In [7]:
tickers = pd.Series([s[TICKER] for s in stocks])
tickers.shape

(530,)

## Retrieve Stock, Stat & Values from Raw Data

In [8]:
def get_stock(stocks, ticker):
    '''
    return stock based on ticker
    '''
    for stock in stocks:
        if stock[TICKER] == ticker:
            return stock

In [9]:
def get_stat(stats, name):
    '''
    return stat based on name & stats passed
    '''
    for stat in stats:
        if stat[KEY_STAT_NAME] == name:
            return stat

In [10]:
def get_value(stat, period=-1):
    '''
    return stat values for the passed stat, using period as the index. -1 returns TTM. 100 returns all. 
    '''
    if period == PERIOD_ALL:
        return stat[KEY_STAT_VALUES]
    else:
        return stat[KEY_STAT_VALUES][period]

In [11]:
def get_stat_value(stock, stat_type, stat_name, period=-1):
    stat = get_stat(stock[stat_type], stat_name)
    value = get_value(stat, period=period)
    return value

In [None]:
stock = get_stock(stocks, stock_ticker)
stat = get_stat(stock[KEY_STATS], stat_name)
values = get_value(stat, period=PERIOD_ALL)
values

In [None]:
get_stat_value(stat)

In [None]:
pd.Series([v[KEY_STAT_VALUE] for v in values], dtype='float64')

## Screener

In [12]:
stat_names = [s[KEY_STAT_NAME] for s in chain(stocks[0][KEY_STATS], stocks[0][FIN_STATS])]
tickers = [s[TICKER] for s in stocks]
len(tickers), len(stat_names)

(530, 85)

In [13]:
# Efficiency
ROIC = 'Return on Invested Capital %'
OPERATING_MARGIN = 'Operating Margin'
GROSS_MARGIN = 'Gross Margin'
NET_MARGIN = 'Net Margin %'

# Growth
OPERATING_CF_GROWTH = 'Operating Cash Flow Growth % YOY'
REVENUE_GROWTH_3 = 'Revenue % 3-Year Average'
REVENUE_GROWTH_5 = 'Revenue % 5-Year Average'
OPERATING_INCOME_GROWTH_3 = 'Operating Income % 3-Year Average'
OPERATING_INCOME_GROWTH_5 = 'Operating Income % 5-Year Average'

# Financial Risks
DEBT_EQUITY = 'Debt/Equity'

## High Growth & Efficiency

In [41]:
screen = pd.DataFrame()
screen['ticker'] = tickers

screen['roic'] = pd.Series(
    [get_stat_value(s, KEY_STATS, ROIC)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

screen['op_inc_3_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_INCOME_GROWTH_3, period=-2)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

screen['op_margin'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_MARGIN)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

In [34]:
high_roic = screen['roic'] > 25 # 18% is 75th percentile
high_op_inc_growth = screen['op_inc_3_yr_growth'] > 20 # 14.84$ is 75th percentile
roic_gt_25.sum(), high_op_inc_growth.sum()

(61, 76)

In [42]:
screen[roic_gt_25 & high_op_inc_growth]

Unnamed: 0,ticker,roic,op_inc_3_yr_growth,op_margin
11,WST,27.11,21.96,25.86
63,REGN,45.25,52.26,55.67
64,GOOG,28.36,35.86,30.55
95,ASML,35.6,30.14,35.12
103,AMD,42.92,100.74,22.2
122,FB,28.08,23.35,39.65
146,BIO,35.28,37.28,16.75
160,INCY,29.35,56.91,20.11
215,MKTX,26.14,22.92,50.53
228,QCOM,41.44,136.29,30.88


In [33]:
screen['roic'].describe()

count    449.000000
mean      12.742183
std       13.751915
min      -80.700000
25%        5.920000
50%       10.730000
75%       18.030000
max      103.250000
Name: roic, dtype: float64

In [53]:
arkk = pd.read_csv('../output/20220220_Tickers/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv')
arkk = arkk.rename({'ticker': 'Symbol'}, axis=1)
arkk.loc[arkk['Symbol'].notna(), 'Symbol'].to_excel('../input/StockList.xlsx', index=False)

In [64]:
arkk

Unnamed: 0,date,fund,company,Symbol,cusip,shares,market value ($),weight (%)
0,02/22/2022,ARKK,TESLA INC,TSLA,88160R101,1168067.0,"$1,001,010,057.66",9.01%
1,02/22/2022,ARKK,TELADOC HEALTH INC,TDOC,87918A105,11023811.0,"$716,547,715.00",6.45%
2,02/22/2022,ARKK,ZOOM VIDEO COMMUNICATIONS-A,ZM,98980L101,5524693.0,"$701,415,023.28",6.31%
3,02/22/2022,ARKK,COINBASE GLOBAL INC -CLASS A,COIN,19260Q107,3438115.0,"$650,353,833.40",5.85%
4,02/22/2022,ARKK,ROKU INC,ROKU,77543R102,5603429.0,"$630,161,625.34",5.67%
5,02/22/2022,ARKK,EXACT SCIENCES CORP,EXAS,30063P105,7981035.0,"$591,634,124.55",5.32%
6,02/22/2022,ARKK,UNITY SOFTWARE INC,U,91332U101,5542691.0,"$549,225,251.19",4.94%
7,02/22/2022,ARKK,UIPATH INC - CLASS A,PATH,90364P105,14303928.0,"$542,834,067.60",4.88%
8,02/22/2022,ARKK,INTELLIA THERAPEUTICS INC,NTLA,45826J105,5678892.0,"$491,337,735.84",4.42%
9,02/22/2022,ARKK,BLOCK INC,SQ,852234103,4795228.0,"$468,589,680.16",4.22%
