In [1]:
# imports
%reload_ext autoreload
%autoreload 2

from basic_utils import *
import matplotlib.pyplot as plt

dates = read_dates('summary')

Loading config.json


In [32]:
symbol_col = 'symbol'
symbol = 'BND'
path = get_path('summary', dates[-3])
fileList = list_files('summary', dates[0])

In [33]:
path + json_ext.format(symbol)

'summary/2019-02-06/BND.json'

In [34]:
summary = json_load(path + json_ext.format(symbol))[0]

#### processing functions

In [37]:
summary.keys()
# summary['assetProfile']['companyOfficers']

dict_keys(['assetProfile', 'fundProfile', 'defaultKeyStatistics', 'topHoldings', 'fundPerformance'])

In [10]:
# lambdas
show_structure = lambda dict_struct: {k: type(v) for k, v in dict_struct.items()}
remove_empty_keys = lambda dict_struct: {k: v for k, v in dict_struct.items() if dict_struct[k]}
get_column_order = lambda route: list(remove_empty_keys(route))
get_symbol_index = lambda route, indexKey: [indexKey for x in range(len(route))]

def create_df(route, indexKey):
    order = get_column_order(route[-1])
    df = pd.DataFrame(route)[order]
    df = set_symbol(df, indexKey)
    return df
def create_normalized_df(route, indexKey):
    order = get_column_order(route[-1])
    df = clean_up_fmt(json_normalize(route))[order]
    df = set_symbol(df, indexKey)
    return df
def clean_single_row_df(route):
    order = get_column_order(route)
    df = clean_up_fmt(json_normalize(route))[order]
    return df
def single_row_df(route, indexKey):
    df = clean_single_row_df(route)
    df = set_symbol(df, indexKey)
    return df

def set_storeDate(df, date):
    df['storeDate'] = datetime.strptime(str(date), '%Y-%m-%d').timestamp()
    return df
def set_symbol(df, symbol):
    df[symbol_col] = symbol
    return df

fin_stmt_mappings = {
    "CF":{"A":"cashflowStatementHistory",
        "Q":"cashflowStatementHistoryQuarterly",
        "B":"cashflowStatements"},
    "BS":{"A":"balanceSheetHistory",
        "Q":"balanceSheetHistoryQuarterly",
        "B":"balanceSheetStatements"},
    "IS":{"A":"incomeStatementHistory",
        "Q":"incomeStatementHistoryQuarterly",
        "B":"incomeStatementHistory"}    }
def parse_finstmt(summary, stmt, symbol):
    df = pd.DataFrame()
    mapping = fin_stmt_mappings[stmt]
    A = summary[mapping['A']][mapping['B']]
    if A: 
        df = create_normalized_df(A, symbol)
        df['period'] = 'A'
    Q = summary[mapping['Q']][mapping['B']]
    if Q: 
        q_df = create_normalized_df(Q, symbol)
        q_df['period'] = 'Q'
        df = df.append(q_df, sort=False)
    return df

def parse_earnings_trend(summary, symbol):
    route = summary['earningsTrend']['trend']
    epsEst_df = pd.DataFrame()
    revEst_df = pd.DataFrame()
    epsTrend_df = pd.DataFrame()
    epsRev_df = pd.DataFrame()
    period_df = pd.DataFrame()

    for item in route:
        epsEst_df = epsEst_df.append(single_row_df(item.pop('earningsEstimate'), symbol), sort=False)
        revEst_df = revEst_df.append(single_row_df(item.pop('revenueEstimate'), symbol), sort=False)
        epsTrend_df = epsTrend_df.append(single_row_df(item.pop('epsTrend'), symbol), sort=False)
        epsRev_df = epsRev_df.append(single_row_df(item.pop('epsRevisions'), symbol), sort=False)
        period_df = period_df.append(clean_single_row_df(item), sort=False)

    if 'growth' in epsEst_df.columns: epsEst_df.drop(labels='growth', axis=1, inplace=True)
    if 'growth' in revEst_df.columns: revEst_df.drop(labels='growth', axis=1, inplace=True)
        
    epsEst_df = pd.concat([period_df, epsEst_df], axis=1)
    revEst_df = pd.concat([period_df, revEst_df], axis=1)
    epsTrend_df = pd.concat([period_df, epsTrend_df], axis=1)
    epsRev_df = pd.concat([period_df, epsRev_df], axis=1)
    
    return epsEst_df, revEst_df, epsTrend_df, epsRev_df

In [11]:
def get_mult_rows(key, summary, symbol): 
    if key in summary: return create_normalized_df(summary[key], symbol)
def get_single_row(key, summary, symbol): 
    if key in summary: return single_row_df(summary[key], symbol)

def direct_row(summary, symbol): return single_row_df(summary, symbol)
def direct_rows(summary, symbol): return create_normalized_df(summary, symbol)

#### Unpack the summaries

In [29]:
index = 55
print(fileList[index])
summary = json_load(fileList[index])[0]

summary/2019-02-08/CCL.json


In [66]:
# unpack daily summary
def unpack_summaries(dates):

    for d in dates:
        
        profile_df = pd.DataFrame()
        officers_df = pd.DataFrame()
        keyStats_df = pd.DataFrame()
        finStats_df = pd.DataFrame()
        finStmtBS_df = pd.DataFrame()
        finStmtIS_df = pd.DataFrame()
        finStmtCF_df = pd.DataFrame()
        earningsEst_df = pd.DataFrame()
        revenueEst_df = pd.DataFrame()
        epsTrend_df = pd.DataFrame()
        epsRevisions_df = pd.DataFrame()
        netSharePA_df = pd.DataFrame()
        
        print('Unpacking summary for {}'.format(d))
        fileList = list_files('summary', d)
        i = 0
        for f in fileList:
            symbol = f.split('/')[2].split('.json')[0]
            consol_summary = json_load(f)

            if consol_summary:
                summary = consol_summary[0]

                # profile
                key = 'assetProfile'
                if key in summary:
                    officers = summary[key].pop('companyOfficers')
                    if officers:
                        officers_df = officers_df.append(direct_rows(officers, symbol), sort=False)
                    profile_df = profile_df.append(get_single_row(key, summary, symbol), sort=False)

                # stats
                key = 'defaultKeyStatistics'
                if key in summary:
                    keyStats_df = keyStats_df.append(get_single_row(key, summary, symbol), sort=False)
                key = 'financialData'
                if key in summary:
                    finStats_df = finStats_df.append(get_single_row(key, summary, symbol), sort=False)

                # financials
                A, Q = fin_stmt_mappings['CF']['A'], fin_stmt_mappings['CF']['Q']
                if A in summary and Q in summary:
                    finStmtCF_df = finStmtCF_df.append(parse_finstmt(summary, 'CF', symbol), sort=False)
                A, Q = fin_stmt_mappings['BS']['A'], fin_stmt_mappings['BS']['Q']
                if A in summary and Q in summary:
                    finStmtBS_df = finStmtBS_df.append(parse_finstmt(summary, 'BS', symbol), sort=False)
                A, Q = fin_stmt_mappings['IS']['A'], fin_stmt_mappings['IS']['Q']
                if A in summary and Q in summary:
                    finStmtIS_df = finStmtIS_df.append(parse_finstmt(summary, 'IS', symbol), sort=False)

                # earningsTrend
                key = 'earningsTrend'
                if key in summary:
                    eps_est, rev_est, eps_trend, eps_rev = parse_earnings_trend(summary, symbol)
                    earningsEst_df = earningsEst_df.append(eps_est, sort=False)
                    revenueEst_df = revenueEst_df.append(rev_est, sort=False)
                    epsTrend_df = epsTrend_df.append(eps_trend, sort=False)
                    epsRevisions_df = epsRevisions_df.append(eps_rev, sort=False)

                # netSharePurchaseActivity
                key = 'netSharePurchaseActivity'
                netSharePA_df = netSharePA_df.append(get_single_row(key, summary, symbol), sort=False)

            print('{} Full unpack for {}'.format(i, symbol))
            i += 1

        # static info
        # profile
        csv_store(set_storeDate(profile_df, today_date), 'summary-categories/', csv_ext.format('assetProfile'))
        csv_store(set_storeDate(officers_df, today_date), 'summary-categories/', csv_ext.format('companyOfficers'))

        # financials -> need to find a way to append to this file
        csv_store(set_storeDate(finStmtBS_df, today_date), 'summary-categories/', csv_ext.format('financials-BS'))
        csv_store(set_storeDate(finStmtIS_df, today_date), 'summary-categories/', csv_ext.format('financials-IS'))
        csv_store(set_storeDate(finStmtCF_df, today_date), 'summary-categories/', csv_ext.format('financials-CF'))

        # should be updated daily
        fname = csv_ext.format(str(today_date))
        csv_store(set_storeDate(keyStats_df, today_date), 'summary-categories/defaultKeyStatistics/', fname)
        csv_store(set_storeDate(finStats_df, today_date), 'summary-categories/financialData/', fname)
        csv_store(set_storeDate(earningsEst_df, today_date), 'summary-categories/earningsEstimate/', fname)
        csv_store(set_storeDate(revenueEst_df, today_date), 'summary-categories/revenueEstimate/', fname)
        csv_store(set_storeDate(epsTrend_df, today_date), 'summary-categories/epsTrend/', fname)
        csv_store(set_storeDate(epsRevisions_df, today_date), 'summary-categories/epsRevisions/', fname)
        csv_store(set_storeDate(netSharePA_df, today_date), 'summary-categories/netSharePurchaseActivity/', fname)

In [68]:
dates[:-3:-1]

['2019-02-08', '2019-02-07']

In [69]:
unpack_summaries(dates[:-3:-1])

Unpacking summary for 2019-02-08
0 Full unpack for 0700.HK
1 Full unpack for 1810.HK
2 Full unpack for 3690.HK
3 Full unpack for 9984.T
4 Full unpack for A
5 Full unpack for AAPL
6 Full unpack for ABBV
7 Full unpack for ABC
8 Full unpack for ABT
9 Full unpack for ADBE
10 Full unpack for ADI
11 Full unpack for ADSK
12 Full unpack for AEIS
13 Full unpack for AEO
14 Full unpack for AGN
15 Full unpack for AKAM
16 Full unpack for ALGN
17 Full unpack for ALLO
18 Full unpack for ALRM
19 Full unpack for ALXN
20 Full unpack for AMAT
21 Full unpack for AMD
22 Full unpack for AMGN
23 Full unpack for AMZN
24 Full unpack for ANET
25 Full unpack for ANSS
26 Full unpack for ANTM
27 Full unpack for APH
28 Full unpack for APTV
29 Full unpack for ATHN
30 Full unpack for ATVI
31 Full unpack for AVGO
32 Full unpack for AVX
33 Full unpack for AYX
34 Full unpack for BABA
35 Full unpack for BAX
36 Full unpack for BBY
37 Full unpack for BC
38 Full unpack for BDX
39 Full unpack for BGNE
40 Full unpack for BIDU

338 Full unpack for XLV
339 Full unpack for XLY
340 Full unpack for XME
341 Full unpack for XOP
342 Full unpack for XRAY
343 Full unpack for XRT
344 Full unpack for YELP
345 Full unpack for YY
346 Full unpack for ZBH
347 Full unpack for ZEN
348 Full unpack for ZTS
349 Full unpack for ZUO
Saved summary-categories/assetProfile.csv
Saved summary-categories/companyOfficers.csv
Saved summary-categories/financials-BS.csv
Saved summary-categories/financials-IS.csv
Saved summary-categories/financials-CF.csv
Saved summary-categories/defaultKeyStatistics/2019-02-08.csv
Saved summary-categories/financialData/2019-02-08.csv
Saved summary-categories/earningsEstimate/2019-02-08.csv
Saved summary-categories/revenueEstimate/2019-02-08.csv
Saved summary-categories/epsTrend/2019-02-08.csv
Saved summary-categories/epsRevisions/2019-02-08.csv
Saved summary-categories/netSharePurchaseActivity/2019-02-08.csv
Unpacking summary for 2019-02-07
0 Full unpack for 0700.HK
1 Full unpack for 1810.HK
2 Full unpack f

300 Full unpack for UHS
301 Full unpack for ULTA
302 Full unpack for ULTI
303 Full unpack for UNH
304 Full unpack for URBN
305 Full unpack for URGN
306 Full unpack for UTHR
307 Full unpack for V
308 Full unpack for VAR
309 Full unpack for VC
310 Full unpack for VCYT
311 Full unpack for VEEV
312 Full unpack for VIPS
313 Full unpack for VRSK
314 Full unpack for VRSN
315 Full unpack for VRTX
316 Full unpack for VSH
317 Full unpack for WAT
318 Full unpack for WB
319 Full unpack for WCG
320 Full unpack for WDAY
321 Full unpack for WDC
322 Full unpack for WIX
323 Full unpack for WST
324 Full unpack for WUBA
325 Full unpack for WWE
326 Full unpack for XAR
327 Full unpack for XBI
328 Full unpack for XES
329 Full unpack for XHB
330 Full unpack for XHE
331 Full unpack for XHS
332 Full unpack for XITK
333 Full unpack for XLB
334 Full unpack for XLC
335 Full unpack for XLE
336 Full unpack for XLF
337 Full unpack for XLI
338 Full unpack for XLK
339 Full unpack for XLNX
340 Full unpack for XLP
341 F

#### assetProfile

In [20]:
officers = summary['assetProfile'].pop('companyOfficers')

In [21]:
single_row_df(summary['assetProfile'], symbol)

Unnamed: 0,address1,city,state,zip,country,phone,website,industry,industrySymbol,sector,...,fullTimeEmployees,auditRisk,boardRisk,compensationRisk,shareHolderRightsRisk,overallRisk,governanceEpochDate,compensationAsOfEpochDate,maxAge,symbol
0,86 Morris Avenue,Summit,NJ,7901,United States,908-673-9000,http://www.celgene.com,Biotechnology,h788,Healthcare,...,7467,2,7,2,3,2,1535760000,1483142400,86400,CELG


In [22]:
create_normalized_df(officers, symbol)

Unnamed: 0,maxAge,name,title,exercisedValue,unexercisedValue,symbol
0,1,Mr. Mark J. Alles,Chairman & CEO,1597230,0,CELG
1,1,Mr. Peter N. Kellogg,Exec. VP & Chief Corp. Strategy Officer,0,0,CELG
2,1,Dr. S. J. Rupert Vessey,EVP and Pres of Research & Early Devel.,0,0,CELG
3,1,Mr. David V. Elkins,Exec. VP & CFO,0,0,CELG
4,1,Mr. Patrick E. Flanigan III,Corp. VP of Investor Relations,0,0,CELG


#### majorHoldersBreakdown

In [23]:
pd.DataFrame(summary['majorHoldersBreakdown']).loc['raw']

maxAge                                1
insidersPercentHeld             0.00381
institutionsPercentHeld         0.75954
institutionsFloatPercentHeld    0.76245
institutionsCount                  1848
Name: raw, dtype: object

#### institutionOwnership

In [24]:
create_normalized_df(summary['institutionOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1530316800,Blackrock Inc.,0.0765,53793364,4155537369,CELG
1,1,1530316800,"Vanguard Group, Inc. (The)",0.074,52056328,4021351338,CELG
2,1,1530316800,State Street Corporation,0.0406,28588600,2208469350,CELG
3,1,1530316800,Edgewood Management Company,0.0198,13924544,1075671024,CELG
4,1,1530316800,Invesco Ltd.,0.0169,11891214,918596281,CELG
5,1,1530316800,Orbis Allan Gray Ltd,0.0146,10255794,792260086,CELG
6,1,1530316800,"Geode Capital Management, LLC",0.0131,9189754,709908496,CELG
7,1,1530316800,"Franklin Resources, Inc",0.0131,9179327,709103010,CELG
8,1,1530316800,Northern Trust Corporation,0.013,9138429,705943640,CELG
9,1,1530316800,Janus Henderson Group PLC,0.0129,9070041,700660667,CELG


#### recommendationTrend

In [25]:
create_df(summary['recommendationTrend']['trend'], symbol)

Unnamed: 0,period,strongBuy,buy,hold,sell,symbol
0,0m,11,8,11,1,CELG
1,-1m,14,10,2,2,CELG
2,-2m,12,10,3,1,CELG
3,-3m,12,10,4,1,CELG


#### earningsHistory

In [26]:
create_normalized_df(summary['earningsHistory']['history'], symbol)

Unnamed: 0,maxAge,epsActual,epsEstimate,epsDifference,surprisePercent,quarter,period,symbol
0,1,1.91,1.87,0.04,0.021,1506729600,-4q,CELG
1,1,2.0,1.97,0.03,0.015,1514678400,-3q,CELG
2,1,2.05,1.96,0.09,0.046,1522454400,-2q,CELG
3,1,2.16,2.11,0.05,0.024,1530316800,-1q,CELG


#### indexTrend

In [29]:
summary['indexTrend']

{'maxAge': 1,
 'symbol': 'SP5',
 'peRatio': {'raw': 16.0209, 'fmt': '16.02'},
 'pegRatio': {'raw': 1.45273, 'fmt': '1.45'},
 'estimates': [{'period': '0q', 'growth': {'raw': 0.317, 'fmt': '0.32'}},
  {'period': '+1q', 'growth': {'raw': 0.43400002, 'fmt': '0.43'}},
  {'period': '0y', 'growth': {'raw': 0.24, 'fmt': '0.24'}},
  {'period': '+1y', 'growth': {'raw': 0.269, 'fmt': '0.27'}},
  {'period': '+5y', 'growth': {'raw': 0.109052, 'fmt': '0.11'}},
  {'period': '-5y', 'growth': {}}]}

In [30]:
route = summary['indexTrend']['estimates']
df = clean_up_fmt(json_normalize(route))
df.index = get_symbol_index(df, symbol)
df

Unnamed: 0,growth,period
CELG,0.317,0q
CELG,0.434,+1q
CELG,0.24,0y
CELG,0.269,+1y
CELG,0.109052,+5y
CELG,,-5y


#### defaultKeyStatistics

In [32]:
single_row_df(summary['defaultKeyStatistics'], symbol)

Unnamed: 0,maxAge,priceHint,enterpriseValue,forwardPE,profitMargins,floatShares,sharesOutstanding,sharesShort,sharesShortPriorMonth,sharesShortPreviousMonthDate,...,trailingEps,forwardEps,pegRatio,lastSplitFactor,lastSplitDate,enterpriseToRevenue,enterpriseToEbitda,52WeekChange,SandP52WeekChange,symbol
0,1,2,79976955904,8.334901,0.19813,701492064,703363008,10788473,15805908,1534291200,...,3.596,10.62,0.53,2/1,1403740800,5.663,15.803,-0.343833,0.146328,CELG


#### netSharePurchaseActivity

In [33]:
single_row_df(summary['netSharePurchaseActivity'], symbol)

Unnamed: 0,maxAge,period,buyInfoCount,buyInfoShares,buyPercentInsiderShares,sellInfoCount,sellInfoShares,sellPercentInsiderShares,netInfoCount,netInfoShares,netPercentInsiderShares,totalInsiderShares,symbol
0,1,6m,38,243436,0.097,4,70500,0.028,42,172936,0.069,2679813,CELG


#### fundOwnership

In [35]:
create_normalized_df(summary['fundOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1514678400,Vanguard Total Stock Market Index Fund,0.027,18975558,1987689700,CELG
1,1,1514678400,Vanguard 500 Index Fund,0.0191,13453235,1409226366,CELG
2,1,1530316800,iShares NASDAQ Biotechnology ETF,0.0117,8232189,635936600,CELG
3,1,1514678400,Vanguard Institutional Index Fund-Institutiona...,0.0113,7973718,835246960,CELG
4,1,1530316800,SPDR S&P 500 ETF Trust,0.0112,7891849,609645335,CELG
5,1,1530316800,"Invesco ETF Tr-Invesco QQQ Tr, Series 1 ETF",0.0085,5990779,462787677,CELG
6,1,1517356800,Advisors Inner Circle Fund-Edgewood Growth Fd,0.008,5629547,581307035,CELG
7,1,1514678400,Vanguard Growth Index Fund,0.0075,5301058,555285825,CELG
8,1,1530316800,Fidelity 500 Index Fund,0.0066,4652857,359433203,CELG
9,1,1530316800,iShares Core S&P 500 ETF,0.0065,4548250,351352312,CELG


#### insiderHolders

In [24]:
create_normalized_df(summary['insiderHolders']['holders'], symbol)

Unnamed: 0,maxAge,name,relation,transactionDescription,latestTransDate,positionIndirect,positionIndirectDate
ADBE,1,BANSE AMY L,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,BARNHOLT EDWARD W,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,BURGESS ROBERT K,Director,Conversion of Exercise of derivative security,1523404800,16030.0,1523405000.0
ADBE,1,CALDERONI FRANK A,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,DALEY JAMES ERNEST,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,DESMOND LAURA,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,GESCHKE CHARLES M,Director,Sale,1524182400,158059.0,1524182000.0
ADBE,1,LEWNES ANN,Officer,Sale,1524009600,,
ADBE,1,ROSENSWEIG DANIEL L,Director,Conversion of Exercise of derivative security,1523404800,,
ADBE,1,WARNOCK JOHN E,Director,Sale,1523404800,470344.0,1523405000.0


#### earnings

In [37]:
financialChart = summary['earnings']['financialsChart']
finChart_df = create_normalized_df(financialChart['yearly'], symbol)
finChart_df = finChart_df.append(create_normalized_df(financialChart['quarterly'], symbol), sort=False)

earningsChart = summary['earnings']['earningsChart']
qearnChart_df = create_normalized_df(earningsChart.pop('quarterly'), symbol)
earnInfo_df = clean_up_fmt(json_normalize(earningsChart))
earnInfo_df['earningsDate'] = earningsChart['earningsDate'][0]['raw']

In [47]:
finChart_df

Unnamed: 0,date,revenue,earnings,symbol
0,2014,7670400000,1999900000,CELG
1,2015,9256000000,1602000000,CELG
2,2016,11229000000,1999000000,CELG
3,2017,13003000000,2940000000,CELG
0,3Q2017,3287000000,988000000,CELG
1,4Q2017,3483000000,-81000000,CELG
2,1Q2018,3538000000,846000000,CELG
3,2Q2018,3814000000,1045000000,CELG


In [48]:
qearnChart_df

Unnamed: 0,date,actual,estimate,symbol
0,3Q2017,1.91,1.87,CELG
1,4Q2017,2.0,1.97,CELG
2,1Q2018,2.05,1.96,CELG
3,2Q2018,2.16,2.11,CELG


In [49]:
earnInfo_df

Unnamed: 0,currentQuarterEstimate,currentQuarterEstimateDate,currentQuarterEstimateYear,earningsDate
0,2.23,3Q,2018,1540425600


#### calendarEvents

In [50]:
route = summary['calendarEvents']['earnings']
cal_df = single_row_df(summary['calendarEvents']['earnings'], symbol)
cal_df['earningsDate'] = route['earningsDate'][0]['raw']
cal_df

Unnamed: 0,earningsDate,earningsAverage,earningsLow,earningsHigh,revenueAverage,revenueLow,revenueHigh,symbol
0,1540425600,2.23,2.16,2.35,3835350000,3725790000,3934200000,CELG


#### upgradeDowngradeHistory

In [79]:
route = summary['upgradeDowngradeHistory']['history']
create_normalized_df(route, symbol)

Unnamed: 0,epochGradeDate,firm,toGrade,action
ADBE,1522022400,JMP Securities,Market Perform,down
ADBE,1521504000,DZ Bank,Buy,up
ADBE,1513036800,JP Morgan,Neutral,down
ADBE,1510185600,UBS,Buy,init
ADBE,1508371200,Pivotal Research,Hold,up
ADBE,1508112000,Deutsche Bank,Hold,down
ADBE,1504656000,Moffett Nathanson,Buy,init
ADBE,1503619200,Jefferies,Buy,init
ADBE,1497225600,Pivotal Research,Sell,down
ADBE,1493683200,Barclays,Overweight,init


#### earningsTrend

In [68]:
import copy
summ2 = copy.deepcopy(summary)
epsEst_df, revEst_df, epsTrend_df, epsRev_df = parse_earnings_trend(summ2, symbol)

In [69]:
epsEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,yearAgoEps,numberOfAnalysts,symbol
0,1,0q,2018-09-30,0.168,2.23,2.16,2.35,1.91,23.0,CELG
0,1,+1q,2018-12-31,0.18,2.36,2.2,2.51,2.0,23.0,CELG
0,1,0y,2018-12-31,0.179,8.77,8.68,9.01,7.44,24.0,CELG
0,1,+1y,2019-12-31,0.212,10.63,9.24,14.2,8.77,25.0,CELG
0,1,+5y,,0.1962,,,,,,CELG
0,1,-5y,,0.25107,,,,,,CELG


In [70]:
revEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,numberOfAnalysts,yearAgoRevenue,symbol
0,1,0q,2018-09-30,0.168,3835350000.0,3725790000.0,3934200000.0,21.0,3287000000.0,CELG
0,1,+1q,2018-12-31,0.18,3960520000.0,3900000000.0,4083900000.0,21.0,3483000000.0,CELG
0,1,0y,2018-12-31,0.179,15151200000.0,14989500000.0,15370100000.0,24.0,13003000000.0,CELG
0,1,+1y,2019-12-31,0.212,16909800000.0,16000000000.0,17729600000.0,24.0,15151200000.0,CELG
0,1,+5y,,0.1962,,,,,,CELG
0,1,-5y,,0.25107,,,,,,CELG


In [71]:
epsTrend_df

Unnamed: 0,maxAge,period,endDate,growth,current,7daysAgo,30daysAgo,60daysAgo,90daysAgo,symbol
0,1,0q,2018-09-30,0.168,2.23,2.22,2.22,2.22,2.17,CELG
0,1,+1q,2018-12-31,0.18,2.36,2.35,2.35,2.35,2.23,CELG
0,1,0y,2018-12-31,0.179,8.77,8.75,8.75,8.75,8.53,CELG
0,1,+1y,2019-12-31,0.212,10.63,10.6,10.6,10.55,10.21,CELG
0,1,+5y,,0.1962,,,,,,CELG
0,1,-5y,,0.25107,,,,,,CELG


In [72]:
epsRev_df

Unnamed: 0,maxAge,period,endDate,growth,upLast7days,upLast30days,downLast30days,symbol
0,1,0q,2018-09-30,0.168,2.0,2.0,0.0,CELG
0,1,+1q,2018-12-31,0.18,2.0,2.0,0.0,CELG
0,1,0y,2018-12-31,0.179,2.0,2.0,0.0,CELG
0,1,+1y,2019-12-31,0.212,2.0,2.0,1.0,CELG
0,1,+5y,,0.1962,,,,CELG
0,1,-5y,,0.25107,,,,CELG


#### financialData

In [57]:
single_row_df(summary['financialData'], symbol)

Unnamed: 0,maxAge,currentPrice,targetHighPrice,targetLowPrice,targetMeanPrice,targetMedianPrice,recommendationMean,recommendationKey,numberOfAnalystOpinions,totalCash,...,freeCashflow,operatingCashflow,earningsGrowth,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,profitMargins,financialCurrency,symbol
0,86400,88.6,163.0,82.0,111.65,105.5,2.2,buy,24,3416999936,...,3493874944,3680999936,0.051,0.166,0.90575,0.35838,0.32106,0.19813,USD,CELG


#### financials

In [58]:
summary.keys()

dict_keys(['assetProfile', 'recommendationTrend', 'cashflowStatementHistory', 'institutionOwnership', 'majorHoldersBreakdown', 'balanceSheetHistoryQuarterly', 'earningsHistory', 'indexTrend', 'majorDirectHolders', 'defaultKeyStatistics', 'netSharePurchaseActivity', 'incomeStatementHistory', 'fundOwnership', 'incomeStatementHistoryQuarterly', 'cashflowStatementHistoryQuarterly', 'insiderHolders', 'earnings', 'calendarEvents', 'upgradeDowngradeHistory', 'balanceSheetHistory', 'earningsTrend', 'financialData'])

In [59]:
route = summary['cashflowStatementHistoryQuarterly']['cashflowStatements']

In [60]:
parse_finstmt(summary, 'CF', symbol)

Unnamed: 0,maxAge,endDate,netIncome,depreciation,changeToNetincome,changeToAccountReceivables,changeToLiabilities,changeToInventory,changeToOperatingActivities,totalCashFromOperatingActivities,...,totalCashflowsFromInvestingActivities,netBorrowings,otherCashflowsFromFinancingActivities,totalCashFromFinancingActivities,effectOfExchangeRate,changeInCash,repurchaseOfStock,issuanceOfStock,symbol,period
0,1,1514678400,2940000000,471000000,-336000000,-236000000,293000000,-42000000,-73000000,5246000000,...,-2891000000,1564000000,,-1584000000,72000000,843000000,-3833000000,685000000,CELG,A
1,1,1483142400,1999000000,505000000,926000000,-222000000,626000000,-55000000,85000000,4165000000,...,-1002000000,1564000000,-33000000.0,-1834000000,-39000000,1290000000,-2160000000,359000000,CELG,A
2,1,1451520000,1602000000,402000000,537000000,-305000000,564000000,-51000000,-326000000,2785000000,...,-6259000000,7297000000,-9000000.0,4283000000,-51000000,758000000,-3257000000,252000000,CELG,A
3,1,1419984000,1999900000,369400000,339900000,-166300000,242500000,-56500000,38300000,2806300000,...,-1438000000,2025300000,235200000.0,-417400000,-63700000,887200000,-2975100000,297200000,CELG,A
0,1,1530316800,1045000000,170000000,361000000,-110000000,103000000,-20000000,-152000000,1229000000,...,-128000000,995000000,,-2399000000,-18000000,-1316000000,-3396000000,2000000,CELG,Q
1,1,1522454400,846000000,126000000,-826000000,-47000000,-227000000,6000000,-193000000,-325000000,...,-5658000000,4452000000,-40000000.0,1756000000,33000000,-4194000000,-2700000000,44000000,CELG,Q
2,1,1514678400,-81000000,116000000,-762000000,-97000000,242000000,-5000000,212000000,1689000000,...,1098000000,1568000000,-40000000.0,-1292000000,7000000,1502000000,-2908000000,48000000,CELG,Q
3,1,1506729600,988000000,115000000,161000000,-64000000,-169000000,-5000000,-101000000,1088000000,...,-2374000000,-4000000,-40000000.0,132000000,5000000,-1149000000,-114000000,250000000,CELG,Q


In [61]:
parse_finstmt(summary, 'BS', symbol)

Unnamed: 0,maxAge,endDate,cash,shortTermInvestments,netReceivables,inventory,otherCurrentAssets,totalCurrentAssets,longTermInvestments,propertyPlantEquipment,...,totalLiab,commonStock,retainedEarnings,treasuryStock,capitalSurplus,otherStockholderEquity,totalStockholderEquity,netTangibleAssets,symbol,period
0,1,1514678400,7013000000,5029000000,2001000000,541000000,161000000,14892000000,1000000.0,1070000000,...,23220000000,10000000,13061000000,-19956000000,13806000000,287000000,6921000000,-6381000000,CELG,A
1,1,1483142400,6170000000,1800000000,1693000000,498000000,548000000,10868000000,32000000.0,930000000,...,21486000000,10000000,10074000000,-15862000000,12378000000,419000000,6600000000,-8658000000,CELG,A
2,1,1451520000,4880300000,1671600000,1420900000,443400000,754000000,9400900000,22400000.0,814100000,...,21045400000,9400000,8074400000,-13284100000,11119300000,767700000,5919000000,-9818100000,CELG,A
3,1,1419984000,4121600000,3425100000,1166700000,393100000,588100000,9712600000,1300000.0,642600000,...,10815300000,9200000,6472400000,-9784000000,9827200000,914800000,6524800000,266000000,CELG,A
0,1,1530316800,1503000000,1907000000,2089000000,555000000,792000000,6853000000,,1292000000,...,30014000000,10000000,15404000000,-25819000000,13835000000,-119000000,3430000000,-21044000000,CELG,Q
1,1,1522454400,2819000000,1921000000,1991000000,536000000,598000000,7872000000,,1251000000,...,29384000000,10000000,14359000000,-23274000000,14077000000,-328000000,5172000000,-19445000000,CELG,Q
2,1,1514678400,7013000000,5029000000,2001000000,541000000,161000000,14892000000,,1070000000,...,23220000000,10000000,13061000000,-19956000000,13806000000,287000000,6921000000,-6381000000,CELG,Q
3,1,1506729600,5511000000,6248000000,1816000000,537000000,643000000,14783000000,,1002000000,...,21886000000,10000000,13142000000,-16906000000,13604000000,337000000,9850000000,-5153000000,CELG,Q


In [62]:
parse_finstmt(summary, 'IS', symbol)

Unnamed: 0,maxAge,endDate,totalRevenue,costOfRevenue,grossProfit,researchDevelopment,sellingGeneralAdministrative,totalOperatingExpenses,operatingIncome,totalOtherIncomeExpenseNet,ebit,interestExpense,incomeBeforeTax,incomeTaxExpense,netIncomeFromContinuingOps,netIncome,netIncomeApplicableToCommonShares,symbol,period
0,1,1514678400,13003000000,1294000000,11709000000,3274000000,2626000000,7523000000,5480000000,-1166000000,5480000000,-522000000,4314000000,1374000000,2940000000,2940000000,2940000000,CELG,A
1,1,1483142400,11229000000,1365000000,9864000000,3543000000,2459000000,7725000000,3504000000,-1132000000,3504000000,-500000000,2372000000,373000000,1999000000,1999000000,1999000000,CELG,A
2,1,1451520000,9256000000,1949000000,7307000000,2089000000,2300000000,6617000000,2639000000,-616000000,2639000000,-311000000,2023000000,421000000,1602000000,1602000000,1602000000,CELG,A
3,1,1419984000,7670400000,891000000,6779400000,1796300000,2027900000,4973500000,2696900000,-369500000,2696900000,-176100000,2327400000,327500000,1999900000,1999900000,1999900000,CELG,A
0,1,1530316800,3814000000,126000000,3688000000,1251000000,790000000,2244000000,1570000000,-263000000,1570000000,-192000000,1307000000,262000000,1045000000,1045000000,1045000000,CELG,Q
1,1,1522454400,3538000000,382000000,3156000000,1956000000,864000000,3289000000,249000000,781000000,249000000,-166000000,1030000000,184000000,846000000,846000000,846000000,CELG,Q
2,1,1514678400,3483000000,242000000,3241000000,807000000,774000000,1902000000,1581000000,-450000000,1581000000,-142000000,1131000000,1212000000,-81000000,-81000000,-81000000,CELG,Q
3,1,1506729600,3287000000,729000000,2558000000,736000000,608000000,2153000000,1134000000,-143000000,1134000000,-127000000,991000000,3000000,988000000,988000000,988000000,CELG,Q


#### Cleanups

In [None]:
# finStmtIS_df.columns
# cs_cols = ['totalRevenue', 'costOfRevenue', 'grossProfit',
#        'researchDevelopment', 'sellingGeneralAdministrative',
#        'totalOperatingExpenses', 'operatingIncome',
#        'totalOtherIncomeExpenseNet', 'ebit', 'interestExpense',
#        'incomeBeforeTax', 'incomeTaxExpense', 'minorityInterest',
#        'netIncomeFromContinuingOps', 'discontinuedOperations', 'netIncome',
#        'netIncomeApplicableToCommonShares']
# cs_hist_cols = ['costOfRevenue', 'grossProfit', 'researchDevelopment', 
#                 'sellingGeneralAdministrative', 'operatingIncome',
#                 'ebit', 'interestExpense','incomeTaxExpense',
#                 'netIncomeFromContinuingOps','netIncome']
# finStmtCF_df.info(), finStmtBS_df.info(), finStmtIS_df.info()
# is_df = finStmtIS_df[(finStmtIS_df.period == 'A') & (finStmtIS_df.endDate == 1514678400)][cs_cols].copy()
# cs_df = (is_df.T / is_df.totalRevenue.values).T
# cs_df[cs_df.columns[1:6]].plot.hist(range=(0, 1), alpha=0.5)
# cs_df.sort_values(by='netIncomeApplicableToCommonShares', ascending=False)

In [43]:
path = get_path('summary', d)
'temp/' + json_ext.format(symbol)

'temp/A.json'

In [67]:
dates[0], get_path('summary', dates[0])

('2018-09-07', 'summary/2018-09-07/')

In [68]:
fileList = list_files('summary', dates[0])

In [73]:
fileList = list_files('summary', dates[0])
for f in fileList:
    symbol = f.split('/')[2].split('.')[0]
    consol_summary = json_load(f)
    if type(consol_summary) is dict:
        print('processing:', f)
        l = consol_summary['quoteSummary']['result']
        data = json.dumps(l)
        path = get_path('summary', dates[0])
        store_s3(data, path + json_ext.format(symbol))
        print('saving:', path + json_ext.format(symbol))

processing: summary/2018-09-07/ATVI.json
saving: summary/2018-09-07/ATVI.json
processing: summary/2018-09-07/AVX.json
saving: summary/2018-09-07/AVX.json
processing: summary/2018-09-07/AYX.json
saving: summary/2018-09-07/AYX.json
processing: summary/2018-09-07/BBY.json
saving: summary/2018-09-07/BBY.json
processing: summary/2018-09-07/BC.json
saving: summary/2018-09-07/BC.json
processing: summary/2018-09-07/BKNG.json
saving: summary/2018-09-07/BKNG.json
processing: summary/2018-09-07/BL.json
saving: summary/2018-09-07/BL.json
processing: summary/2018-09-07/BMY.json
saving: summary/2018-09-07/BMY.json
processing: summary/2018-09-07/BOX.json
saving: summary/2018-09-07/BOX.json
processing: summary/2018-09-07/BRKR.json
saving: summary/2018-09-07/BRKR.json
processing: summary/2018-09-07/BRKS.json
saving: summary/2018-09-07/BRKS.json
processing: summary/2018-09-07/CA.json
saving: summary/2018-09-07/CA.json
processing: summary/2018-09-07/CAKE.json
saving: summary/2018-09-07/CAKE.json
processi

saving: summary/2018-09-07/NEOG.json
processing: summary/2018-09-07/NEWR.json
saving: summary/2018-09-07/NEWR.json
processing: summary/2018-09-07/NKE.json
saving: summary/2018-09-07/NKE.json
processing: summary/2018-09-07/NOW.json
saving: summary/2018-09-07/NOW.json
processing: summary/2018-09-07/NTAP.json
saving: summary/2018-09-07/NTAP.json
processing: summary/2018-09-07/NTGR.json
saving: summary/2018-09-07/NTGR.json
processing: summary/2018-09-07/NVDA.json
saving: summary/2018-09-07/NVDA.json
processing: summary/2018-09-07/NWS.json
saving: summary/2018-09-07/NWS.json
processing: summary/2018-09-07/NWSA.json
saving: summary/2018-09-07/NWSA.json
processing: summary/2018-09-07/OLED.json
saving: summary/2018-09-07/OLED.json
processing: summary/2018-09-07/OLLI.json
saving: summary/2018-09-07/OLLI.json
processing: summary/2018-09-07/ORCL.json
saving: summary/2018-09-07/ORCL.json
processing: summary/2018-09-07/PANW.json
saving: summary/2018-09-07/PANW.json
processing: summary/2018-09-07/PA