In [1]:
import sys
import os
sys.path.append('../') # root directory

In [2]:
from constants import constant
import pandas as pd
import numpy as np
import constants.constant as constant
import utils.commonutils as utils
import utils.scraperutils as scraperutils
from constants.namedtuples import Column
from write.JsonFileWriter import write
from read.JsonFileReader import parse
from pathlib import Path
from tqdm import tqdm
from itertools import chain

In [3]:
os.listdir('data')

['MorningStarStat.json',
 'ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv',
 '.ipynb_checkpoints',
 'ARKK.json']

In [4]:
stocks = parse('data/ARKK.json')
KEY_STATS = 'keyStats'
FIN_STATS = 'financeStats'
TICKER = 'symbol'
PERIOD_ALL = 100
NULL = '—'
len(stocks)

36

In [5]:
KEY_STAT_NAME = 'name'
KEY_STAT_VALUES = 'values'
KEY_STAT_PERIOD = 'period'
KEY_STAT_VALUE = 'value'

In [28]:
stat_name = 'Operating Margin'
stock_ticker = 'NTLA'

## Retrieve Stock, Stat & Values from Raw Data

In [29]:
def get_stock(stocks, ticker):
    '''
    return stock based on ticker
    '''
    for stock in stocks:
        if stock[TICKER] == ticker:
            return stock

In [30]:
def get_stat(stats, name):
    '''
    return stat based on name & stats passed
    '''
    for stat in stats:
        if stat[KEY_STAT_NAME] == name:
            return stat

In [31]:
def get_value(stat, period=-1):
    '''
    return stat values for the passed stat, using period as the index. -1 returns TTM. 100 returns all. 
    '''
    if period == PERIOD_ALL:
        return stat[KEY_STAT_VALUES]
    else:
        return stat[KEY_STAT_VALUES][period]

In [32]:
def get_stat_value(stock, stat_type, stat_name, period=-1):
    stat = get_stat(stock[stat_type], stat_name)
    value = get_value(stat, period=period)
    return value

In [33]:
stock

In [34]:
stock = get_stock(stocks, stock_ticker)
stat = get_stat(stock[KEY_STATS], stat_name)
values = get_value(stat, period=PERIOD_ALL)
values

[{'period': '2011-12', 'value': '—'},
 {'period': '2012-12', 'value': '—'},
 {'period': '2013-12', 'value': '—'},
 {'period': '2014-12', 'value': '—'},
 {'period': '2015-12', 'value': '-221.86'},
 {'period': '2016-12', 'value': '-195.15'},
 {'period': '2017-12', 'value': '-266.32'},
 {'period': '2018-12', 'value': '-298.58'},
 {'period': '2019-12', 'value': '-246.78'},
 {'period': '2020-12', 'value': '-235.51'},
 {'period': 'TTM', 'value': '-857.78'}]

In [None]:
get_stat_value(stat)

In [12]:
pd.Series([v[KEY_STAT_VALUE] for v in values], dtype='float64')

NameError: name 'values' is not defined

## ARKK Analysis

In [29]:
arkk = pd.read_csv('data/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv')

In [30]:
# sort the arkk df based on stocks loaded from json
arkk.index = arkk['ticker']
tickers = pd.Series([s[TICKER] for s in stocks])
print(tickers.shape, arkk.shape)
arkk = arkk.loc[tickers]
arkk.shape

(36,) (39, 8)


(36, 8)

In [112]:
# Efficiency
ROIC = 'Return on Invested Capital %'
OPERATING_MARGIN = 'Operating Margin'
GROSS_MARGIN = 'Gross Margin'
NET_MARGIN = 'Net Margin %'

# Growth
OPERATING_CF_GROWTH = 'Operating Cash Flow Growth % YOY'
REVENUE_GROWTH_1 = 'Revenue % Year over Year'
REVENUE_GROWTH_3 = 'Revenue % 3-Year Average'
REVENUE_GROWTH_5 = 'Revenue % 5-Year Average'
OPERATING_INCOME_GROWTH_1 = 'Operating Income % Year over Year'
OPERATING_INCOME_GROWTH_3 = 'Operating Income % 3-Year Average'
OPERATING_INCOME_GROWTH_5 = 'Operating Income % 5-Year Average'

# Financial Risks
DEBT_EQUITY = 'Debt/Equity'

OPERATING_CF = 'Operating Cash Flow'
OPERATING_INC = 'Operating Income'

In [47]:
arkk['roic'] = pd.Series(
    [get_stat_value(s, KEY_STATS, ROIC)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['op_margin'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_MARGIN)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['gross_margin'] = pd.Series(
    [get_stat_value(s, KEY_STATS, GROSS_MARGIN)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['net_margin'] = pd.Series(
    [get_stat_value(s, KEY_STATS, NET_MARGIN)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

In [60]:
arkk['op_inc_1_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_INCOME_GROWTH_1, period=-2)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['rev_3_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, REVENUE_GROWTH_3, period=-2)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['rev_1_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, REVENUE_GROWTH_1, period=-2)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

arkk['ocf_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_CF_GROWTH, period=-2)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64').values

In [113]:
arkk['ocf'] = pd.Series(
    [get_stat_value(s, FIN_STATS, OPERATING_CF, period=-1)['value'] for s in stocks]
).replace({NULL: float('nan')}).values#.replace({',':''}).astype('float64').values
arkk['op_income'] = pd.Series(
    [get_stat_value(s, FIN_STATS, OPERATING_INC, period=-1)['value'] for s in stocks]
).replace({NULL: float('nan')}).values#.replace({',':''}).astype('float64').values

In [119]:
arkk.to_csv('export/arkk/arkk_analysis_20220305.csv', index=False)

In [62]:
arkk.isna().sum(axis=0)

date                   0
fund                   0
company                0
ticker                 0
cusip                  0
shares                 0
market value ($)       0
weight (%)             0
roic                   0
op_margin              1
gross_margin           5
net_margin             2
op_inc_3_yr_growth    35
rev_3_yr_growth       11
ocf_yr_growth         23
op_inc_1_yr_growth    31
rev_1_yr_growth        2
dtype: int64

In [85]:
arkk_stat_names = ['rev_1_yr_growth', 'rev_3_yr_growth', 'ocf_yr_growth', 'op_inc_1_yr_growth', 'gross_margin', 'op_margin', 'net_margin', 'roic']
arkk_stats = pd.DataFrame({col: arkk[col].describe() for col in arkk_stat_names})

In [97]:
arkk_stats

Unnamed: 0,rev_1_yr_growth,rev_3_yr_growth,ocf_yr_growth,op_inc_1_yr_growth,gross_margin,op_margin,net_margin,roic
count,34.0,25.0,13.0,5.0,31.0,35.0,34.0,36.0
mean,68.736765,72.206,183.563077,1119.942,58.71129,-54.608857,-74.767353,-12.756944
std,74.100075,109.434002,291.273947,2225.192632,19.785143,214.531771,170.678476,27.197398
min,-18.12,-7.98,-18.06,10.65,24.14,-857.78,-854.1,-98.59
25%,24.1125,30.46,14.87,68.0,43.71,-60.28,-69.1425,-26.41
50%,51.225,54.99,48.9,197.99,60.72,-32.08,-31.64,-9.125
75%,93.715,73.3,132.45,225.78,74.76,4.65,2.32,3.815
max,325.81,564.1,868.57,5097.29,90.75,714.5,63.2,33.77


In [120]:
arkk.shape

(36, 19)

In [121]:
arkk[arkk['rev_1_yr_growth'].isna()]

Unnamed: 0_level_0,date,fund,company,ticker,cusip,shares,market value ($),weight (%),roic,op_margin,gross_margin,net_margin,op_inc_3_yr_growth,rev_3_yr_growth,ocf_yr_growth,op_inc_1_yr_growth,rev_1_yr_growth,ocf,op_income
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
CGEN,02/22/2022,ARKK,COMPUGEN LTD,CGEN,M25722105,5651408,"$16,276,055.04",0.15%,-30.47,-442.31,90.75,-426.51,,,,,,-30,-35
CRSP,02/22/2022,ARKK,CRISPR THERAPEUTICS AG,CRSP,H17182108,6095582,"$355,311,474.78",3.20%,17.36,40.82,,41.28,,564.1,,,,539,374


In [98]:
arkk.to_csv('export/arkk_analysis_20220301.csv', index=False)
arkk_stats.to_csv('export/arkk_stats_20220301.csv', index=False)

In [99]:
arkk[arkk['rev_3_yr_growth'] < 0]

Unnamed: 0_level_0,date,fund,company,ticker,cusip,shares,market value ($),weight (%),roic,op_margin,gross_margin,net_margin,op_inc_3_yr_growth,rev_3_yr_growth,ocf_yr_growth,op_inc_1_yr_growth,rev_1_yr_growth
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
PACB,02/22/2022,ARKK,PACIFIC BIOSCIENCES OF CALIF,PACB,69404D108,11863154,"$136,070,376.38",1.22%,-2.84,-114.29,43.98,-30.39,,-5.49,,,-13.2
SSYS,02/22/2022,ARKK,STRATASYS LTD,SSYS,M85548101,4591664,"$118,694,514.40",1.07%,-5.42,-11.25,43.44,-7.92,,-7.98,,,-18.12


In [100]:
arkk

Unnamed: 0_level_0,date,fund,company,ticker,cusip,shares,market value ($),weight (%),roic,op_margin,gross_margin,net_margin,op_inc_3_yr_growth,rev_3_yr_growth,ocf_yr_growth,op_inc_1_yr_growth,rev_1_yr_growth
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
U,02/22/2022,ARKK,UNITY SOFTWARE INC,U,91332U101,5542691,"$549,225,251.19",4.94%,-21.09,-46.08,77.1,-44.77,,,,,42.58
BEAM,02/22/2022,ARKK,BEAM THERAPEUTICS INC,BEAM,07373V105,6026795,"$403,915,800.90",3.63%,-65.07,,,,,,,,33.33
TWOU,02/22/2022,ARKK,2U INC,TWOU,90214J101,5745188,"$55,383,612.32",0.50%,-8.27,-16.11,71.19,-18.0,,39.27,,,34.78
MTLS,02/22/2022,ARKK,MATERIALISE NV-ADR,MTLS,57667T100,3751513,"$74,430,017.92",0.67%,2.32,3.47,56.9,3.25,,6.13,5.55,,-13.34
TWST,02/22/2022,ARKK,TWIST BIOSCIENCE CORP,TWST,90184D100,2178861,"$115,937,193.81",1.04%,-24.06,-122.6,38.77,-112.7,,73.3,,,46.87
FATE,02/22/2022,ARKK,FATE THERAPEUTICS INC,FATE,31189P102,5913842,"$195,984,723.88",1.76%,-29.35,-331.08,,-359.67,,97.09,,,194.33
CERS,02/22/2022,ARKK,CERUS CORP,CERS,157085101,9861026,"$52,559,268.58",0.47%,-33.59,-39.38,60.72,-41.7,,30.57,,,21.83
CGEN,02/22/2022,ARKK,COMPUGEN LTD,CGEN,M25722105,5651408,"$16,276,055.04",0.15%,-30.47,-442.31,90.75,-426.51,,,,,
TDOC,02/22/2022,ARKK,TELADOC HEALTH INC,TDOC,87918A105,11023811,"$716,547,715.00",6.45%,-7.34,-32.39,67.87,-43.6,,67.38,,,97.71
NVTA,02/22/2022,ARKK,INVITAE CORP,NVTA,46185L103,17320190,"$161,770,574.60",1.46%,-13.36,-206.95,26.21,-93.9,,60.03,,,28.95
