In [1]:
import sys
import os
sys.path.append('../') # root directory

In [13]:
from constants import constant
import pandas as pd
import numpy as np
import constants.constant as constant
import utils.commonutils as utils
import utils.scraperutils as scraperutils
from constants.namedtuples import Column
from write.JsonFileWriter import write
from read.JsonFileReader import parse
from pathlib import Path
from tqdm import tqdm
from itertools import chain

In [3]:
os.listdir('data')

['MorningStarStat.json', '.ipynb_checkpoints']

In [19]:
stocks = parse('data/MorningStarStat.json')
KEY_STATS = 'keyStats'
FIN_STATS = 'financeStats'
TICKER = 'symbol'
PERIOD_ALL = 100
NULL = '—'
len(stocks)

530

In [20]:
KEY_STAT_NAME = 'name'
KEY_STAT_VALUES = 'values'
KEY_STAT_PERIOD = 'period'
KEY_STAT_VALUE = 'value'

In [21]:
stat_name = 'Operating Margin'
stock_ticker = 'TAP'

In [22]:
tickers = pd.Series([s[TICKER] for s in stocks])
tickers.shape

(530,)

## Retrieve Stock, Stat & Values from Raw Data

In [8]:
def get_stock(stocks, ticker):
    '''
    return stock based on ticker
    '''
    for stock in stocks:
        if stock[TICKER] == ticker:
            return stock

In [9]:
def get_stat(stats, name):
    '''
    return stat based on name & stats passed
    '''
    for stat in stats:
        if stat[KEY_STAT_NAME] == name:
            return stat

In [10]:
def get_value(stat, period=-1):
    '''
    return stat values for the passed stat, using period as the index. -1 returns TTM. 100 returns all. 
    '''
    if period == PERIOD_ALL:
        return stat[KEY_STAT_VALUES]
    else:
        return stat[KEY_STAT_VALUES][period]

In [11]:
def get_stat_value(stock, stat_type, stat_name, period=-1):
    stat = get_stat(stock[stat_type], stat_name)
    value = get_value(stat, period=period)
    return value

In [None]:
stock = get_stock(stocks, stock_ticker)
stat = get_stat(stock[KEY_STATS], stat_name)
values = get_value(stat, period=PERIOD_ALL)
values

In [None]:
get_stat_value(stat)

In [None]:
pd.Series([v[KEY_STAT_VALUE] for v in values], dtype='float64')

## Screener

In [23]:
stat_names = [s[KEY_STAT_NAME] for s in chain(stocks[0][KEY_STATS], stocks[0][FIN_STATS])]
tickers = [s[TICKER] for s in stocks]
len(tickers), len(stat_names)

(530, 85)

In [24]:
# Efficiency
ROIC = 'Return on Invested Capital %'
OPERATING_MARGIN = 'Operating Margin'
GROSS_MARGIN = 'Gross Margin'
NET_MARGIN = 'Net Margin %'

# Growth
OPERATING_CF_GROWTH = 'Operating Cash Flow Growth % YOY'
REVENUE_GROWTH_3 = 'Revenue % 3-Year Average'
REVENUE_GROWTH_5 = 'Revenue % 5-Year Average'
OPERATING_INCOME_GROWTH_3 = 'Operating Income % 3-Year Average'
OPERATING_INCOME_GROWTH_5 = 'Operating Income % 5-Year Average'

# Financial Risks
DEBT_EQUITY = 'Debt/Equity'

## High Growth & Efficiency

In [36]:
screen = pd.DataFrame()
screen['ticker'] = tickers

screen['roic'] = pd.Series(
    [get_stat_value(s, KEY_STATS, ROIC)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

screen['op_inc_3_yr_growth'] = pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_INCOME_GROWTH_3)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

In [34]:
roic_gt_25 = screen['roic'] > 25 # 18% is 75th percentile
roic_gt_25.sum()

In [38]:
pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_INCOME_GROWTH_3)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')

0     NaN
1     NaN
2     NaN
3     NaN
4     NaN
       ..
525   NaN
526   NaN
527   NaN
528   NaN
529   NaN
Length: 530, dtype: float64

In [None]:
pd.Series(
    [get_stat_value(s, KEY_STATS, OPERATING_INCOME_GROWTH_3)['value'] for s in stocks]
).replace({NULL: float('nan')}).astype('float64')