In [327]:
# imports
%reload_ext autoreload
%autoreload 2

from basic_utils import *
import matplotlib.pyplot as plt

In [330]:
dates = read_dates('summary')

In [382]:
symbol_col = 'symbol'
symbol = 'CRM'

In [353]:
res = get_grouped_ds(symbol, 'summary')

In [361]:
path = get_path('summary', dates[-1])
fileList = list_files('summary', dates[-1])

In [383]:
path + json_ext.format(symbol)

'summary/2019-02-22/CRM.json'

In [384]:
summary = json_load(path + json_ext.format(symbol))[0]

#### processing functions

In [183]:
print('Found {} keys for {}'.format(len(summary.keys()), symbol))
summary.keys()

Found 5 keys for SPY


dict_keys(['assetProfile', 'fundProfile', 'defaultKeyStatistics', 'topHoldings', 'fundPerformance'])

In [9]:
# utility fuctions
fin_stmt_mappings = {
    "CF": {
        "A":"cashflowStatementHistory",
        "Q":"cashflowStatementHistoryQuarterly",
        "B":"cashflowStatements"},
    "BS": {
        "A":"balanceSheetHistory",
        "Q":"balanceSheetHistoryQuarterly",
        "B":"balanceSheetStatements"},
    "IS": {
        "A":"incomeStatementHistory",
        "Q":"incomeStatementHistoryQuarterly",
        "B":"incomeStatementHistory"}}

show_structure = lambda dict_struct: {k: type(v) for k, v in dict_struct.items()}
remove_empty_keys = lambda dict_struct: {k: v for k, v in dict_struct.items() if dict_struct[k]}
get_column_order = lambda route: list(remove_empty_keys(route))
get_symbol_index = lambda route, indexKey: [indexKey for x in range(len(route))]

def create_df(route, indexKey):
    order = get_column_order(route[-1])
    df = pd.DataFrame(route)[order]
    df = set_symbol(df, indexKey)
    return df
def create_normalized_df(route, indexKey):
    order = get_column_order(route[-1])
    df = clean_up_fmt(json_normalize(route))[order]
    df = set_symbol(df, indexKey)
    return df
def clean_single_row_df(route):
    order = get_column_order(route)
    df = clean_up_fmt(json_normalize(route))[order]
    return df
def single_row_df(route, indexKey):
    df = clean_single_row_df(route)
    df = set_symbol(df, indexKey)
    return df

def set_storeDate(df, date):
    df['storeDate'] = datetime.strptime(str(date), '%Y-%m-%d').timestamp()
    return df
def set_symbol(df, symbol):
    df[symbol_col] = symbol
    return df

def parse_finstmt(summary, stmt, symbol):
    df = pd.DataFrame()
    mapping = fin_stmt_mappings[stmt]
    A = summary[mapping['A']][mapping['B']]
    if A: 
        df = create_normalized_df(A, symbol)
        df['period'] = 'A'
    Q = summary[mapping['Q']][mapping['B']]
    if Q: 
        q_df = create_normalized_df(Q, symbol)
        q_df['period'] = 'Q'
        df = df.append(q_df, sort=False)
    return df

def parse_earnings_trend(summary, symbol):
    route = summary['earningsTrend']['trend']
    epsEst_df = pd.DataFrame()
    revEst_df = pd.DataFrame()
    epsTrend_df = pd.DataFrame()
    epsRev_df = pd.DataFrame()
    period_df = pd.DataFrame()

    for item in route:
        epsEst_df = epsEst_df.append(single_row_df(item.pop('earningsEstimate'), symbol), sort=False)
        revEst_df = revEst_df.append(single_row_df(item.pop('revenueEstimate'), symbol), sort=False)
        epsTrend_df = epsTrend_df.append(single_row_df(item.pop('epsTrend'), symbol), sort=False)
        epsRev_df = epsRev_df.append(single_row_df(item.pop('epsRevisions'), symbol), sort=False)
        period_df = period_df.append(clean_single_row_df(item), sort=False)

    if 'growth' in epsEst_df.columns: epsEst_df.drop(labels='growth', axis=1, inplace=True)
    if 'growth' in revEst_df.columns: revEst_df.drop(labels='growth', axis=1, inplace=True)
        
    epsEst_df = pd.concat([period_df, epsEst_df], axis=1)
    revEst_df = pd.concat([period_df, revEst_df], axis=1)
    epsTrend_df = pd.concat([period_df, epsTrend_df], axis=1)
    epsRev_df = pd.concat([period_df, epsRev_df], axis=1)
    
    return epsEst_df, revEst_df, epsTrend_df, epsRev_df

def get_mult_rows(key, summary, symbol): 
    if key in summary: return create_normalized_df(summary[key], symbol)
def get_single_row(key, summary, symbol): 
    if key in summary: return single_row_df(summary[key], symbol)

def direct_row(summary, symbol): return single_row_df(summary, symbol)
def direct_rows(summary, symbol): return create_normalized_df(summary, symbol)

#### Unpack the summaries

In [10]:
# Pull a specific security
# index = 55
# print(fileList[index])
# summary = json_load(fileList[index])[0]

In [409]:
# unpack daily summary
def unpack_summaries(dates):
    
    for d in dates:

        profile_df = pd.DataFrame()
        officers_df = pd.DataFrame()
        keyStats_df = pd.DataFrame()
        finStats_df = pd.DataFrame()
        finStmtBS_df = pd.DataFrame()
        finStmtIS_df = pd.DataFrame()
        finStmtCF_df = pd.DataFrame()
        earningsEst_df = pd.DataFrame()
        revenueEst_df = pd.DataFrame()
        epsTrend_df = pd.DataFrame()
        epsRevisions_df = pd.DataFrame()
        netSharePA_df = pd.DataFrame()
        majorHolders_df = pd.DataFrame()
        ownershipList_df = pd.DataFrame()
        fundOwnership_df = pd.DataFrame()
        recommendHistory_df = pd.DataFrame()
        recommendTrend_df = pd.DataFrame()
        insiderHolders_df = pd.DataFrame()
        earningsHistory_df = pd.DataFrame()
        indexTrend_df = pd.DataFrame()

        print('Unpacking summary for {}'.format(d))
        fileList = list_files('summary', d)
        i = 0
        for f in fileList:
            symbol = f.split('/')[2].split('.json')[0]
            consol_summary = json_load(f)

            if consol_summary:
                summary = consol_summary[0]

                # profile
                key = 'assetProfile'
                if key in summary:
                    officers = summary[key].pop('companyOfficers')
                    if officers:
                        officers_df = officers_df.append(direct_rows(officers, symbol), sort=False)
                    profile_df = profile_df.append(get_single_row(key, summary, symbol), sort=False)

                # stats
                key = 'defaultKeyStatistics'
                if key in summary:
                    keyStats_df = keyStats_df.append(get_single_row(key, summary, symbol), sort=False)

                key = 'financialData'
                if key in summary:
                    finStats_df = finStats_df.append(get_single_row(key, summary, symbol), sort=False)

                # financials
                A, Q = fin_stmt_mappings['CF']['A'], fin_stmt_mappings['CF']['Q']
                if A in summary and Q in summary:
                    finStmtCF_df = finStmtCF_df.append(parse_finstmt(summary, 'CF', symbol), sort=False)
                A, Q = fin_stmt_mappings['BS']['A'], fin_stmt_mappings['BS']['Q']
                if A in summary and Q in summary:
                    finStmtBS_df = finStmtBS_df.append(parse_finstmt(summary, 'BS', symbol), sort=False)
                A, Q = fin_stmt_mappings['IS']['A'], fin_stmt_mappings['IS']['Q']
                if A in summary and Q in summary:
                    finStmtIS_df = finStmtIS_df.append(parse_finstmt(summary, 'IS', symbol), sort=False)

                # earningsTrend
                key = 'earningsTrend'
                if key in summary:
                    eps_est, rev_est, eps_trend, eps_rev = parse_earnings_trend(summary, symbol)
                    earningsEst_df = earningsEst_df.append(eps_est, sort=False)
                    revenueEst_df = revenueEst_df.append(rev_est, sort=False)
                    epsTrend_df = epsTrend_df.append(eps_trend, sort=False)
                    epsRevisions_df = epsRevisions_df.append(eps_rev, sort=False)

                # netSharePurchaseActivity
                key = 'netSharePurchaseActivity'
                if key in summary:
                    netSharePA_df = netSharePA_df.append(
                        get_single_row(key, summary, symbol), sort=False)

                # majorHoldersBreakdown 
                key = 'majorHoldersBreakdown'
                if key in summary:
                    majorHolders_df = majorHolders_df.append(
                        get_single_row(key, summary, symbol), sort=False)

                root, sub = 'institutionOwnership', 'ownershipList'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    ownershipList_df = ownershipList_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'fundOwnership', 'ownershipList'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    fundOwnership_df = fundOwnership_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'upgradeDowngradeHistory', 'history'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    recommendHistory_df = recommendHistory_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'insiderHolders', 'holders'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    insiderHolders_df = insiderHolders_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'recommendationTrend', 'trend'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    recommendTrend_df = recommendTrend_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'earningsHistory', 'history'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    earningsHistory_df = earningsHistory_df.append(
                        create_normalized_df(summary[root][sub], symbol), sort=False)

                root, sub = 'indexTrend', 'estimates'
                if root in summary and sub in summary[root] and len(summary[root][sub]):
                    route = summary[root][sub]
                    df = clean_up_fmt(json_normalize(route))
                    df = df.T.rename(columns=df.period).drop(['period'])
                    df['peRatio'] = summary['indexTrend']['peRatio']['raw']
                    df['pegRatio'] = summary['indexTrend']['pegRatio']['raw']
                    df['symbol'] = summary['indexTrend']['symbol']
                    indexTrend_df = df

            print('{} Full unpack for {}'.format(i, symbol))
            i += 1

        # overriden daily
        csv_store(set_storeDate(profile_df, d), 'summary-categories/', csv_ext.format('assetProfile'))
        csv_store(set_storeDate(officers_df, d), 'summary-categories/', csv_ext.format('companyOfficers'))

        csv_store(set_storeDate(finStmtBS_df, d), 'summary-categories/', csv_ext.format('financials-BS'))
        csv_store(set_storeDate(finStmtIS_df, d), 'summary-categories/', csv_ext.format('financials-IS'))
        csv_store(set_storeDate(finStmtCF_df, d), 'summary-categories/', csv_ext.format('financials-CF'))

        # new additions
        csv_store(set_storeDate(majorHolders_df, d), 'summary-categories/', csv_ext.format('majorHoldersBreakdown'))
        csv_store(set_storeDate(ownershipList_df, d), 'summary-categories/', csv_ext.format('institutionOwnership'))
        csv_store(set_storeDate(fundOwnership_df, d), 'summary-categories/', csv_ext.format('fundOwnership'))
        csv_store(set_storeDate(recommendHistory_df, d), 'summary-categories/', csv_ext.format('upgradeDowngradeHistory'))
        csv_store(set_storeDate(insiderHolders_df, d), 'summary-categories/', csv_ext.format('insiderHolders'))
        csv_store(set_storeDate(earningsHistory_df, d), 'summary-categories/', csv_ext.format('earningsHistory'))

        # saved a record per day
        fname = csv_ext.format(str(d))
        csv_store(set_storeDate(keyStats_df, d), 'summary-categories/defaultKeyStatistics/', fname)
        csv_store(set_storeDate(finStats_df, d), 'summary-categories/financialData/', fname)
        csv_store(set_storeDate(earningsEst_df, d), 'summary-categories/earningsEstimate/', fname)
        csv_store(set_storeDate(revenueEst_df, d), 'summary-categories/revenueEstimate/', fname)
        csv_store(set_storeDate(epsTrend_df, d), 'summary-categories/epsTrend/', fname)
        csv_store(set_storeDate(epsRevisions_df, d), 'summary-categories/epsRevisions/', fname)
        csv_store(set_storeDate(netSharePA_df, d), 'summary-categories/netSharePurchaseActivity/', fname)

        # new additions
        csv_store(set_storeDate(indexTrend_df, d), 'summary-categories/indexTrend/', fname)
        csv_store(set_storeDate(recommendTrend_df, d), 'summary-categories/recommendationTrend/', fname)

In [379]:
root, sub = 'institutionOwnership', 'ownershipList'
if root in summary and sub in summary[root] and len(summary[root][sub]):
    print(create_normalized_df(summary[root][sub], symbol))

In [None]:
# dates = read_dates('summary')
tgt_dates = dates
tgt_dates

In [412]:
unpack_summaries(tgt_dates)

Unpacking summary for 2018-09-07
0 Full unpack for A
1 Full unpack for AAPL
2 Full unpack for ABMD
3 Full unpack for ADBE
4 Full unpack for ADP
5 Full unpack for AEIS
6 Full unpack for AEO
7 Full unpack for AKAM
8 Full unpack for ALGN
9 Full unpack for ALRM
10 Full unpack for AMAT
11 Full unpack for ANET
12 Full unpack for ANSS
13 Full unpack for ANTM
14 Full unpack for APPF
15 Full unpack for ATGE
16 Full unpack for ATVI
17 Full unpack for AVX
18 Full unpack for AYX
19 Full unpack for BBY
20 Full unpack for BC
21 Full unpack for BKNG
22 Full unpack for BL
23 Full unpack for BMY
24 Full unpack for BOX
25 Full unpack for BRKR
26 Full unpack for BRKS
27 Full unpack for CA
28 Full unpack for CAKE
29 Full unpack for CBM
30 Full unpack for CCMP
31 Full unpack for CDNS
32 Full unpack for CERN
33 Full unpack for CGNX
34 Full unpack for CHGG
35 Full unpack for CI
36 Full unpack for CMG
37 Full unpack for CNC
38 Full unpack for COLM
39 Full unpack for COUP
40 Full unpack for CREE
41 Full unpack

142 Full unpack for PSTG
143 Full unpack for QCOM
144 Full unpack for QLYS
145 Full unpack for REGN
146 Full unpack for RGEN
147 Full unpack for RHT
148 Full unpack for RL
149 Full unpack for RNG
150 Full unpack for ROG
151 Full unpack for ROST
152 Full unpack for SHAK
153 Full unpack for SHOO
154 Full unpack for SKX
155 Full unpack for SLAB
156 Full unpack for SMTC
157 Full unpack for SNPS
158 Full unpack for SPLK
159 Full unpack for SQ
160 Full unpack for STMP
161 Full unpack for SUPN
162 Full unpack for SWKS
163 Full unpack for TDC
164 Full unpack for TER
165 Full unpack for THO
166 Full unpack for TIF
167 Full unpack for TJX
168 Full unpack for TRIP
169 Full unpack for TTD
170 Full unpack for TTWO
171 Full unpack for TWTR
172 Full unpack for TXN
173 Full unpack for TXRH
174 Full unpack for TYL
175 Full unpack for UBNT
176 Full unpack for ULTA
177 Full unpack for ULTI
178 Full unpack for URBN
179 Full unpack for UTHR
180 Full unpack for VAC
181 Full unpack for VAR
182 Full unpack fo

83 Full unpack for GRPN
84 Full unpack for GRUB
85 Full unpack for GSKY
86 Full unpack for GWRE
87 Full unpack for HALO
88 Full unpack for HQY
89 Full unpack for HRB
90 Full unpack for HUBS
91 Full unpack for HUM
92 Full unpack for ICUI
93 Full unpack for IDCC
94 Full unpack for ILMN
95 Full unpack for INTU
96 Full unpack for IONS
97 Full unpack for IPGP
98 Full unpack for IRBT
99 Full unpack for ISRG
100 Full unpack for JNPR
101 Full unpack for KLAC
102 Full unpack for LGND
103 Full unpack for LOGM
104 Full unpack for LOPE
105 Full unpack for LRCX
106 Full unpack for LULU
107 Full unpack for MA
108 Full unpack for MANH
109 Full unpack for MASI
110 Full unpack for MDSO
111 Full unpack for MIME
112 Full unpack for MKSI
113 Full unpack for MOH
114 Full unpack for MPWR
115 Full unpack for MRVL
116 Full unpack for MSFT
117 Full unpack for MSG
118 Full unpack for MU
119 Full unpack for MXIM
120 Full unpack for MYGN
121 Full unpack for NATI
122 Full unpack for NEOG
123 Full unpack for NEWR
1

KeyboardInterrupt: 

#### assetProfile

In [14]:
officers = summary['assetProfile'].pop('companyOfficers')

In [15]:
single_row_df(summary['assetProfile'], symbol)

Unnamed: 0,address1,city,state,zip,country,phone,website,industry,sector,longBusinessSummary,fullTimeEmployees,auditRisk,boardRisk,compensationRisk,shareHolderRightsRisk,overallRisk,governanceEpochDate,compensationAsOfEpochDate,maxAge,symbol
0,1600 Amphitheatre Parkway,Mountain View,CA,94043,United States,650-253-0000,http://www.abc.xyz,Internet Content & Information,Technology,Alphabet Inc. provides online advertising serv...,98771,2,8,10,10,10,1548979200,1514678400,86400,GOOGL


In [16]:
create_normalized_df(officers, symbol)

Unnamed: 0,maxAge,name,age,title,yearBorn,exercisedValue,unexercisedValue,symbol
0,1,Mr. Lawrence Edward Page,45,"Co-Founder, CEO & Director",1973,0,0,GOOGL
1,1,Mr. Sergey Brin,44,"Co-Founder, Pres & Director",1974,0,0,GOOGL
2,1,Ms. Ruth Myrna Porat,60,Sr. VP & CFO,1958,0,0,GOOGL
3,1,Mr. David C. Drummond,55,"Sr. VP of Corp. Devel., Chief Legal Officer & ...",1963,0,0,GOOGL
4,1,Ms. Amie Thuener O'Toole,42,Chief Accounting Officer & VP,1976,0,0,GOOGL


#### majorHoldersBreakdown

In [385]:
get_single_row('majorHoldersBreakdown', summary, symbol)

Unnamed: 0,maxAge,insidersPercentHeld,institutionsPercentHeld,institutionsFloatPercentHeld,institutionsCount,symbol
0,1,0.04657,0.85664,0.89848,1705,CRM


#### institutionOwnership

In [246]:
create_normalized_df(summary['institutionOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1538265600,"Vanguard Group, Inc. (The)",0.0714,21342069,25761584648,GOOGL
1,1,1546214400,Blackrock Inc.,0.0628,18767367,19611147820,GOOGL
2,1,1538265600,"FMR, LLC",0.055,16434628,19837910766,GOOGL
3,1,1546214400,State Street Corporation,0.0359,10742476,11225457720,GOOGL
4,1,1538265600,Price (T.Rowe) Associates Inc,0.022,6580585,7943292541,GOOGL
5,1,1538265600,Capital Research Global Investors,0.0177,5299665,6397119628,GOOGL
6,1,1546214400,Northern Trust Corporation,0.0127,3782363,3952418040,GOOGL
7,1,1538265600,Invesco Ltd.,0.0125,3722910,4493850202,GOOGL
8,1,1538265600,"Geode Capital Management, LLC",0.012,3572834,4312696464,GOOGL
9,1,1538265600,"Wellington Management Company, LLP",0.0119,3544079,4277986879,GOOGL


#### recommendationTrend

In [247]:
create_df(summary['recommendationTrend']['trend'], symbol)

Unnamed: 0,period,strongBuy,buy,hold,symbol
0,0m,13,25,5,GOOGL
1,-1m,16,26,3,GOOGL
2,-2m,16,26,3,GOOGL
3,-3m,13,25,5,GOOGL


#### earningsHistory

In [72]:
eh = create_normalized_df(summary['earningsHistory']['history'], symbol)
# eh.quarter = eh.quarter.apply(lambda x: date.fromtimestamp(x))
# eh = eh.sort_values(by='period', ascending=True).T
# eh.rename(columns=eh.loc['period']).loc[]
eh

Unnamed: 0,maxAge,epsActual,epsEstimate,epsDifference,surprisePercent,quarter,period,symbol
0,1,-3.35,-3.58,0.23,0.064,1522454400,-4q,TSLA
1,1,-3.06,-2.92,-0.14,-0.048,1530316800,-3q,TSLA
2,1,2.9,-0.19,3.09,16.263,1538265600,-2q,TSLA
3,1,1.93,2.2,-0.27,-0.123,1546214400,-1q,TSLA


#### indexTrend

In [250]:
# same for all securities, can save historically
# only need one symbol
summary['indexTrend']

{'maxAge': 1,
 'symbol': 'SP5',
 'peRatio': {'raw': 16.7537, 'fmt': '16.75'},
 'pegRatio': {'raw': 1.9006, 'fmt': '1.90'},
 'estimates': [{'period': '0q', 'growth': {'raw': 0.024, 'fmt': '0.02'}},
  {'period': '+1q', 'growth': {'raw': 0.029000001, 'fmt': '0.03'}},
  {'period': '0y', 'growth': {'raw': 0.065, 'fmt': '0.06'}},
  {'period': '+1y', 'growth': {'raw': 0.101, 'fmt': '0.10'}},
  {'period': '+5y', 'growth': {'raw': 0.109753996, 'fmt': '0.11'}},
  {'period': '-5y', 'growth': {}}]}

In [420]:
route = summary['indexTrend']['estimates']
df = clean_up_fmt(json_normalize(route))
df = df.T.rename(columns=df.period).drop(['period'])
df['peRatio'] = summary['indexTrend']['peRatio']['raw']
df['pegRatio'] = summary['indexTrend']['pegRatio']['raw']
df['symbol'] = summary['indexTrend']['symbol']
df

Unnamed: 0,0q,+1q,0y,+1y,+5y,-5y,peRatio,pegRatio,symbol
growth,0.024,0.029,0.065,0.101,0.109754,,16.7537,1.9006,SP5


#### defaultKeyStatistics

In [251]:
single_row_df(summary['defaultKeyStatistics'], symbol)

Unnamed: 0,maxAge,priceHint,enterpriseValue,forwardPE,profitMargins,floatShares,sharesOutstanding,sharesShort,sharesShortPriorMonth,sharesShortPreviousMonthDate,...,trailingEps,forwardEps,pegRatio,lastSplitFactor,lastSplitDate,enterpriseToRevenue,enterpriseToEbitda,52WeekChange,SandP52WeekChange,symbol
0,1,2,678750257152,24.02042,0.22465,603776337,299360000,1606498,2621656,1546214400,...,43.703,47.01,1.65,1000/1998,1396483200,4.961,16.792,0.03415,0.007993,GOOGL


#### netSharePurchaseActivity

In [192]:
single_row_df(summary['netSharePurchaseActivity'], symbol)

Unnamed: 0,maxAge,period,buyInfoCount,buyInfoShares,sellInfoCount,sellInfoShares,netInfoCount,netInfoShares,netPercentInsiderShares,totalInsiderShares,symbol
0,1,6m,35,391694,30,146473,65,245221,-1.293,27943,GOOG


#### fundOwnership

In [193]:
create_normalized_df(summary['fundOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1538265600,Vanguard Total Stock Market Index Fund,0.0217,7600886,9071429414,GOOG
1,1,1538265600,Vanguard 500 Index Fund,0.0164,5733009,6842174251,GOOG
2,1,1538265600,Growth Fund Of America Inc,0.0154,5375762,6415810674,GOOG
3,1,1543536000,SPDR S&P 500 ETF Trust,0.01,3497893,3828199035,GOOG
4,1,1543536000,Fidelity Contrafund Inc,0.0085,2981841,3263416245,GOOG
5,1,1538265600,Vanguard Institutional Index Fund-Institutiona...,0.0084,2937383,3505678489,GOOG
6,1,1543536000,"Invesco ETF Tr-Invesco QQQ Tr, Series 1 ETF",0.0082,2878965,3150825664,GOOG
7,1,1543536000,Fidelity 500 Index Fund,0.0062,2153268,2356601097,GOOG
8,1,1543536000,iShares Core S&P 500 ETF,0.0061,2143684,2346112080,GOOG
9,1,1538265600,Vanguard Growth Index Fund,0.006,2103274,2510194420,GOOG


#### insiderHolders

In [50]:
insiders = create_normalized_df(summary['insiderHolders']['holders'], symbol)
# insiders['latestTransDate'].apply(lambda x: datetime.fromtimestamp(x))
insiders

0   2018-12-04 16:00:00
1   2018-03-06 16:00:00
2   2018-05-06 17:00:00
3   2018-06-14 17:00:00
4   2019-02-13 16:00:00
5   2019-01-31 16:00:00
6   2018-11-08 16:00:00
7   2018-09-30 17:00:00
8   2019-02-04 16:00:00
9   2019-01-27 16:00:00
Name: latestTransDate, dtype: datetime64[ns]

#### earnings

In [None]:
financialChart = summary['earnings']['financialsChart']
finChart_df = create_normalized_df(financialChart['yearly'], symbol)
finChart_df = finChart_df.append(create_normalized_df(financialChart['quarterly'], symbol), sort=False)

earningsChart = summary['earnings']['earningsChart']
qearnChart_df = create_normalized_df(earningsChart.pop('quarterly'), symbol)
earnInfo_df = clean_up_fmt(json_normalize(earningsChart))
earnInfo_df['earningsDate'] = earningsChart['earningsDate'][0]['raw'] if len(earningsChart['earningsDate']) > 0 else np.nan

In [201]:
earningsChart

{'currentQuarterEstimate': {'raw': 10.82, 'fmt': '10.82'},
 'currentQuarterEstimateDate': '4Q',
 'currentQuarterEstimateYear': 2018,
 'earningsDate': []}

In [202]:
finChart_df

Unnamed: 0,date,revenue,earnings,symbol
0,2015,74989000000,16348000000,GOOG
1,2016,90272000000,19478000000,GOOG
2,2017,110855000000,12662000000,GOOG
3,2018,136819000000,30736000000,GOOG
0,1Q2018,31146000000,9401000000,GOOG
1,2Q2018,32657000000,3195000000,GOOG
2,3Q2018,33740000000,9192000000,GOOG
3,4Q2018,39276000000,8948000000,GOOG


In [203]:
qearnChart_df

Unnamed: 0,date,actual,estimate,symbol
0,4Q2017,9.7,9.96,GOOG
1,1Q2018,13.33,9.32,GOOG
2,2Q2018,4.54,9.54,GOOG
3,3Q2018,13.06,10.4,GOOG


In [206]:
earnInfo_df

Unnamed: 0,currentQuarterEstimate,currentQuarterEstimateDate,currentQuarterEstimateYear,earningsDate
0,10.82,4Q,2018,


#### calendarEvents

In [209]:
route = summary['calendarEvents']['earnings']
cal_df = single_row_df(summary['calendarEvents']['earnings'], symbol)
cal_df['earningsDate'] = route['earningsDate'][0]['raw'] if len(route['earningsDate']) > 0 else np.nan
cal_df

Unnamed: 0,earningsAverage,earningsLow,earningsHigh,revenueAverage,revenueLow,revenueHigh,symbol,earningsDate
0,10.82,9.28,12.13,38910500000,38029000000,39455000000,GOOG,


#### upgradeDowngradeHistory

In [210]:
route = summary['upgradeDowngradeHistory']['history']
create_normalized_df(route, symbol)

Unnamed: 0,epochGradeDate,firm,toGrade,action,symbol
0,1443744000,Oppenheimer,Outperform,up,GOOG
1,1441670400,Moffett Nathanson,Buy,init,GOOG
2,1439942400,Atlantic Equities,Overweight,up,GOOG
3,1439251200,Monness Crespi Hardt,Buy,up,GOOG
4,1439251200,Mizuho,Buy,up,GOOG
5,1439251200,Stifel Nicolaus,Buy,up,GOOG
6,1439251200,Morgan Stanley,Overweight,up,GOOG
7,1439164800,Susquehanna,Positive,init,GOOG
8,1439164800,Stifel Nicolaus,Buy,up,GOOG
9,1435276800,Mizuho,Neutral,init,GOOG


#### earningsTrend

In [211]:
import copy
summ2 = copy.deepcopy(summary)
epsEst_df, revEst_df, epsTrend_df, epsRev_df = parse_earnings_trend(summ2, symbol)

In [212]:
epsEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,yearAgoEps,numberOfAnalysts,symbol
0,1,0q,2018-12-31,0.115,10.82,9.28,12.13,9.7,31.0,GOOG
0,1,+1q,2019-03-31,-0.213,10.49,9.71,11.85,13.33,24.0,GOOG
0,1,0y,2018-12-31,0.296,41.81,37.94,43.88,32.25,30.0,GOOG
0,1,+1y,2019-12-31,0.126,47.06,43.42,53.3,41.81,37.0,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [213]:
revEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,numberOfAnalysts,yearAgoRevenue,symbol
0,1,0q,2018-12-31,0.115,38910500000.0,38029000000.0,39455000000.0,28.0,32323000000.0,GOOG
0,1,+1q,2019-03-31,-0.213,37065100000.0,34326000000.0,38190800000.0,23.0,31146000000.0,GOOG
0,1,0y,2018-12-31,0.296,136473000000.0,135572000000.0,136998000000.0,32.0,110855000000.0,GOOG
0,1,+1y,2019-12-31,0.126,162557000000.0,158247000000.0,167038000000.0,32.0,136473000000.0,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [214]:
epsTrend_df

Unnamed: 0,maxAge,period,endDate,growth,current,7daysAgo,30daysAgo,60daysAgo,90daysAgo,symbol
0,1,0q,2018-12-31,0.115,10.82,10.86,10.98,10.88,10.88,GOOG
0,1,+1q,2019-03-31,-0.213,10.49,10.45,10.49,10.48,10.48,GOOG
0,1,0y,2018-12-31,0.296,41.81,41.83,41.93,41.78,41.78,GOOG
0,1,+1y,2019-12-31,0.126,47.06,47.16,47.28,47.19,47.02,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [215]:
epsRev_df

Unnamed: 0,maxAge,period,endDate,growth,upLast7days,upLast30days,downLast30days,symbol
0,1,0q,2018-12-31,0.115,0.0,1.0,4.0,GOOG
0,1,+1q,2019-03-31,-0.213,1.0,3.0,1.0,GOOG
0,1,0y,2018-12-31,0.296,0.0,2.0,4.0,GOOG
0,1,+1y,2019-12-31,0.126,0.0,2.0,5.0,GOOG
0,1,+5y,,0.1636,,,,GOOG
0,1,-5y,,,,,,GOOG


#### financialData

In [216]:
single_row_df(summary['financialData'], symbol)

Unnamed: 0,maxAge,currentPrice,targetHighPrice,targetLowPrice,targetMeanPrice,targetMedianPrice,recommendationMean,recommendationKey,numberOfAnalystOpinions,totalCash,...,grossProfits,freeCashflow,operatingCashflow,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,profitMargins,financialCurrency,symbol
0,86400,1121.67,1400.0,1240.0,1328.0,1350.0,1.7,buy,5,109140000768,...,77270000000,19776749568,47971000320,0.215,0.56476,0.29543,0.22944,0.22465,USD,GOOG


#### financials

In [218]:
route = summary['cashflowStatementHistoryQuarterly']['cashflowStatements']

In [219]:
parse_finstmt(summary, 'CF', symbol)

Unnamed: 0,maxAge,endDate,netIncome,depreciation,changeToNetincome,changeToAccountReceivables,changeToLiabilities,changeToOperatingActivities,totalCashFromOperatingActivities,capitalExpenditures,...,otherCashflowsFromInvestingActivities,totalCashflowsFromInvestingActivities,netBorrowings,otherCashflowsFromFinancingActivities,totalCashFromFinancingActivities,effectOfExchangeRate,changeInCash,repurchaseOfStock,symbol,period
0,1,1546214400,30736000000,9029000000,3298000000,-2169000000,1438000000,7890000000,47971000000,-25139000000,...,,-28504000000,-61000000,-4043000000,-13179000000,-302000000,5986000000,-9075000000,GOOG,A
1,1,1514678400,12662000000,6899000000,8284000000,-3768000000,1121000000,3682000000,37091000000,-13184000000,...,1419000000.0,-31401000000,-86000000,-3366000000,-8298000000,405000000,-2203000000,-4846000000,GOOG,A
2,1,1483142400,19478000000,6100000000,7158000000,-2578000000,333000000,2420000000,36036000000,-10212000000,...,-1978000000.0,-31165000000,-1335000000,-3304000000,-8332000000,-170000000,-3631000000,-3693000000,GOOG,A
3,1,1451520000,16348000000,5024000000,5609000000,-2094000000,246000000,1618000000,26572000000,-9950000000,...,75000000.0,-23711000000,-23000000,-2422000000,-4225000000,-434000000,-1798000000,-1780000000,GOOG,A
0,1,1546214400,8948000000,2575000000,969000000,-2887000000,873000000,2869000000,12987000000,-7081000000,...,,-6876000000,-5000000,-91000000,-2746000000,-107000000,3258000000,-2650000000,GOOG,Q
1,1,1538265600,9192000000,2356000000,1794000000,-670000000,293000000,1480000000,13210000000,-5282000000,...,,-10408000000,-25000000,-1253000000,-3478000000,-29000000,-705000000,-2200000000,GOOG,Q
2,1,1530316800,3195000000,2112000000,1345000000,-312000000,20000000,5210000000,10132000000,-5477000000,...,,-3374000000,-1344000000,-1541000000,-4937000000,-331000000,1490000000,-2052000000,GOOG,Q
3,1,1522454400,9401000000,1986000000,-810000000,1700000000,252000000,-1669000000,11642000000,-7299000000,...,,-7846000000,1313000000,-1158000000,-2018000000,165000000,1943000000,-2173000000,GOOG,Q


In [220]:
parse_finstmt(summary, 'BS', symbol)

Unnamed: 0,maxAge,endDate,cash,shortTermInvestments,netReceivables,inventory,otherCurrentAssets,totalCurrentAssets,longTermInvestments,propertyPlantEquipment,...,totalCurrentLiabilities,totalLiab,commonStock,retainedEarnings,treasuryStock,otherStockholderEquity,totalStockholderEquity,netTangibleAssets,symbol,period
0,1,1546214400,16701000000,92439000000,21193000000,1107000000,4236000000,135676000000,13859000000,59719000000,...,34620000000,55164000000,45049000000,134885000000,-2306000000,-2306000000,177628000000,157520000000,GOOG,A
1,1,1514678400,10715000000,91156000000,18705000000,749000000,2983000000,124308000000,7813000000,42383000000,...,24183000000,44793000000,40247000000,113247000000,-992000000,-992000000,152502000000,133063000000,GOOG,A
2,1,1483142400,12918000000,73415000000,15632000000,268000000,3175000000,105408000000,5878000000,34234000000,...,16756000000,28461000000,36307000000,105131000000,-2402000000,-2402000000,139036000000,119261000000,GOOG,A
3,1,1451520000,15409000000,56517000000,13459000000,491000000,1590000000,90114000000,5183000000,29016000000,...,19310000000,27130000000,32982000000,89223000000,-1874000000,-1874000000,120331000000,100615000000,GOOG,A
0,1,1546214400,16701000000,92439000000,21193000000,1107000000,4236000000,135676000000,13859000000,59719000000,...,34620000000,55164000000,45049000000,134885000000,-2306000000,-2306000000,177628000000,157520000000,GOOG,Q
1,1,1538265600,13443000000,92973000000,18067000000,1212000000,4007000000,129702000000,12673000000,55300000000,...,31301000000,51698000000,43111000000,128405000000,-1676000000,-1676000000,169840000000,149497000000,GOOG,Q
2,1,1530316800,14148000000,88106000000,17244000000,698000000,3961000000,124157000000,11487000000,51672000000,...,29903000000,49610000000,42243000000,121282000000,-1525000000,-1525000000,162000000000,141443000000,GOOG,Q
3,1,1522454400,12658000000,90227000000,16814000000,636000000,3426000000,123761000000,10976000000,48845000000,...,25394000000,46110000000,41487000000,120008000000,-670000000,-670000000,160825000000,140154000000,GOOG,Q


In [221]:
parse_finstmt(summary, 'IS', symbol)

Unnamed: 0,maxAge,endDate,totalRevenue,costOfRevenue,grossProfit,researchDevelopment,sellingGeneralAdministrative,totalOperatingExpenses,operatingIncome,totalOtherIncomeExpenseNet,ebit,interestExpense,incomeBeforeTax,incomeTaxExpense,netIncomeFromContinuingOps,netIncome,netIncomeApplicableToCommonShares,symbol,period
0,1,1546214400,136819000000,59549000000,77270000000,21419000000,24459000000,105427000000,31392000000,3521000000,31392000000,-114000000,34913000000,4177000000,30736000000,30736000000,30736000000,GOOG,A
1,1,1514678400,110855000000,45583000000,65272000000,16625000000,19765000000,81973000000,28882000000,-1689000000,28882000000,-109000000,27193000000,14531000000,12662000000,12662000000,12662000000,GOOG,A
2,1,1483142400,90272000000,35138000000,55134000000,13948000000,17470000000,66556000000,23716000000,434000000,23716000000,-124000000,24150000000,4672000000,19478000000,19478000000,19478000000,GOOG,A
3,1,1451520000,74989000000,28164000000,46825000000,12282000000,15183000000,55629000000,19360000000,291000000,19360000000,-104000000,19651000000,3303000000,16348000000,16348000000,16348000000,GOOG,A
0,1,1546214400,39276000000,17918000000,21358000000,6034000000,7121000000,31073000000,8203000000,1869000000,8203000000,-29000000,10072000000,1124000000,8948000000,8948000000,8948000000,GOOG,Q
1,1,1538265600,33740000000,14281000000,19459000000,5232000000,5917000000,25430000000,8310000000,1773000000,8310000000,-28000000,10083000000,891000000,9192000000,9192000000,9192000000,GOOG,Q
2,1,1530316800,32657000000,13883000000,18774000000,5114000000,5782000000,24779000000,7878000000,-3663000000,7878000000,-27000000,4215000000,1020000000,3195000000,3195000000,3195000000,GOOG,Q
3,1,1522454400,31146000000,13467000000,17679000000,5039000000,5639000000,24145000000,7001000000,3542000000,7001000000,-30000000,10543000000,1142000000,9401000000,9401000000,9401000000,GOOG,Q


In [222]:
from datetime import datetime, timedelta, timezone
rpt_ts = 1546214400
rpt = datetime.fromtimestamp(rpt_ts)
ye = datetime(2018, 12, 31)
ye_ts = datetime.timestamp(dt)

ye_ts, rpt_ts, (ye_ts - rpt_ts) / (60*60)
60*60*8

28800

#### ETF securities

In [193]:
['assetProfile', 'fundProfile', 'defaultKeyStatistics', 'topHoldings', 'fundPerformance']

['assetProfile',
 'fundProfile',
 'defaultKeyStatistics',
 'topHoldings',
 'fundPerformance']

In [194]:
single_row_df(summary['assetProfile'], symbol)

Unnamed: 0,phone,longBusinessSummary,maxAge,symbol
0,866-732-8673,The investment seeks to provide investment res...,86400,SPY


In [195]:
single_row_df(summary['fundProfile'], symbol)

Unnamed: 0,maxAge,styleBoxUrl,family,categoryName,legalType,feesExpensesInvestment,feesExpensesInvestment.1,feesExpensesInvestment.2,feesExpensesInvestmentCat,feesExpensesInvestmentCat.1,feesExpensesInvestmentCat.2,symbol
0,1,http://us.i1.yimg.com/us.yimg.com/i/fi/3_0styl...,SPDR State Street Global Advisors,Large Blend,Exchange Traded Fund,0.03,0.0009,99389.72,50.76,0.0036,99389.72,SPY


In [197]:
single_row_df(summary['defaultKeyStatistics'], symbol)

Unnamed: 0,maxAge,priceHint,category,ytdReturn,beta3Year,totalAssets,yield,fundFamily,fundInceptionDate,legalType,threeYearAverageReturn,fiveYearAverageReturn,symbol
0,1,2,Large Blend,0.1116,1.0,250220085248,0.0189,SPDR State Street Global Advisors,727660800,Exchange Traded Fund,0.1536,0.1088,SPY


In [202]:
route = summary['topHoldings']['holdings']
create_normalized_df(route, symbol)

Unnamed: 0,symbol,holdingName,holdingPercent
0,SPY,Microsoft Corp,0.0355
1,SPY,Apple Inc,0.0331
2,SPY,Amazon.com Inc,0.0311
3,SPY,Berkshire Hathaway Inc B,0.0176
4,SPY,Facebook Inc A,0.0176
5,SPY,Johnson & Johnson,0.0157
6,SPY,JPMorgan Chase & Co,0.0152
7,SPY,Alphabet Inc Class C,0.0151
8,SPY,Alphabet Inc A,0.0148
9,SPY,Exxon Mobil Corp,0.0137


In [205]:
summary['fundPerformance'].keys()

dict_keys(['maxAge', 'performanceOverview', 'performanceOverviewCat', 'trailingReturns', 'trailingReturnsNav', 'trailingReturnsCat', 'annualTotalReturns', 'pastQuarterlyReturns', 'riskOverviewStatistics', 'riskOverviewStatisticsCat'])

In [223]:
single_row_df(summary['fundPerformance']['performanceOverview'], symbol)

Unnamed: 0,asOfDate,ytdReturnPct,oneYearTotalReturn,threeYearTotalReturn,symbol
0,1548892800,0.0801,-0.0242,0.1393,SPY


In [241]:
single_row_df(summary['fundPerformance']['performanceOverviewCat'], symbol)

Unnamed: 0,ytdReturnPct,fiveYrAvgReturnPct,symbol
0,0.075,0.1576,SPY


In [224]:
single_row_df(summary['fundPerformance']['trailingReturns'], symbol)

Unnamed: 0,ytd,oneMonth,threeMonth,oneYear,threeYear,fiveYear,tenYear,lastBullMkt,lastBearMkt,symbol
0,0.0801,0.0801,0.0034,-0.0242,0.1393,0.1084,0.1486,0.0,0.0,SPY


In [240]:
route = summary['fundPerformance']['riskOverviewStatistics']['riskStatistics']
create_normalized_df(route, symbol)

Unnamed: 0,year,alpha,beta,meanAnnualReturn,rSquared,stdDev,sharpeRatio,treynorRatio,symbol
0,5y,-0.08,1.0,0.91,100.0,11.22,0.91,10.15,SPY
1,3y,-0.08,1.0,1.14,100.0,11.14,1.12,12.76,SPY
2,10y,-0.07,1.0,1.24,100.0,13.4,1.08,14.51,SPY


#### Cleanups

In [None]:
# finStmtIS_df.columns
# cs_cols = ['totalRevenue', 'costOfRevenue', 'grossProfit',
#        'researchDevelopment', 'sellingGeneralAdministrative',
#        'totalOperatingExpenses', 'operatingIncome',
#        'totalOtherIncomeExpenseNet', 'ebit', 'interestExpense',
#        'incomeBeforeTax', 'incomeTaxExpense', 'minorityInterest',
#        'netIncomeFromContinuingOps', 'discontinuedOperations', 'netIncome',
#        'netIncomeApplicableToCommonShares']
# cs_hist_cols = ['costOfRevenue', 'grossProfit', 'researchDevelopment', 
#                 'sellingGeneralAdministrative', 'operatingIncome',
#                 'ebit', 'interestExpense','incomeTaxExpense',
#                 'netIncomeFromContinuingOps','netIncome']
# finStmtCF_df.info(), finStmtBS_df.info(), finStmtIS_df.info()
# is_df = finStmtIS_df[(finStmtIS_df.period == 'A') & (finStmtIS_df.endDate == 1514678400)][cs_cols].copy()
# cs_df = (is_df.T / is_df.totalRevenue.values).T
# cs_df[cs_df.columns[1:6]].plot.hist(range=(0, 1), alpha=0.5)
# cs_df.sort_values(by='netIncomeApplicableToCommonShares', ascending=False)

In [None]:
path = get_path('summary', d)
'temp/' + json_ext.format(symbol)

In [None]:
dates[0], get_path('summary', dates[0])

In [None]:
fileList = list_files('summary', dates[0])

In [None]:
fileList = list_files('summary', dates[0])
for f in fileList:
    symbol = f.split('/')[2].split('.')[0]
    consol_summary = json_load(f)
    if type(consol_summary) is dict:
        print('processing:', f)
        l = consol_summary['quoteSummary']['result']
        data = json.dumps(l)
        path = get_path('summary', dates[0])
        store_s3(data, path + json_ext.format(symbol))
        print('saving:', path + json_ext.format(symbol))