In [133]:
# imports
%reload_ext autoreload
%autoreload 2

from basic_utils import *
import matplotlib.pyplot as plt

In [225]:
dates = read_dates('summary')

In [236]:
symbol_col = 'symbol'
symbol = 'GOOGL'

In [237]:
res = get_grouped_ds(symbol, 'summary')

In [238]:
path = get_path('summary', dates[-1])
fileList = list_files('summary', dates[0])

In [239]:
path + json_ext.format(symbol)

'summary/2019-02-14/GOOGL.json'

In [240]:
summary = json_load(path + json_ext.format(symbol))[0]

#### processing functions

In [241]:
print('Found {} keys for {}'.format(len(summary.keys()), symbol))
summary.keys()

Found 22 keys for GOOGL


dict_keys(['assetProfile', 'recommendationTrend', 'cashflowStatementHistory', 'institutionOwnership', 'majorHoldersBreakdown', 'balanceSheetHistoryQuarterly', 'earningsHistory', 'indexTrend', 'majorDirectHolders', 'defaultKeyStatistics', 'netSharePurchaseActivity', 'incomeStatementHistory', 'fundOwnership', 'incomeStatementHistoryQuarterly', 'cashflowStatementHistoryQuarterly', 'insiderHolders', 'earnings', 'calendarEvents', 'upgradeDowngradeHistory', 'balanceSheetHistory', 'earningsTrend', 'financialData'])

In [177]:
# utility fuctions
fin_stmt_mappings = {
    "CF": {
        "A":"cashflowStatementHistory",
        "Q":"cashflowStatementHistoryQuarterly",
        "B":"cashflowStatements"},
    "BS": {
        "A":"balanceSheetHistory",
        "Q":"balanceSheetHistoryQuarterly",
        "B":"balanceSheetStatements"},
    "IS": {
        "A":"incomeStatementHistory",
        "Q":"incomeStatementHistoryQuarterly",
        "B":"incomeStatementHistory"}}

show_structure = lambda dict_struct: {k: type(v) for k, v in dict_struct.items()}
remove_empty_keys = lambda dict_struct: {k: v for k, v in dict_struct.items() if dict_struct[k]}
get_column_order = lambda route: list(remove_empty_keys(route))
get_symbol_index = lambda route, indexKey: [indexKey for x in range(len(route))]

def create_df(route, indexKey):
    order = get_column_order(route[-1])
    df = pd.DataFrame(route)[order]
    df = set_symbol(df, indexKey)
    return df
def create_normalized_df(route, indexKey):
    order = get_column_order(route[-1])
    df = clean_up_fmt(json_normalize(route))[order]
    df = set_symbol(df, indexKey)
    return df
def clean_single_row_df(route):
    order = get_column_order(route)
    df = clean_up_fmt(json_normalize(route))[order]
    return df
def single_row_df(route, indexKey):
    df = clean_single_row_df(route)
    df = set_symbol(df, indexKey)
    return df

def set_storeDate(df, date):
    df['storeDate'] = datetime.strptime(str(date), '%Y-%m-%d').timestamp()
    return df
def set_symbol(df, symbol):
    df[symbol_col] = symbol
    return df

def parse_finstmt(summary, stmt, symbol):
    df = pd.DataFrame()
    mapping = fin_stmt_mappings[stmt]
    A = summary[mapping['A']][mapping['B']]
    if A: 
        df = create_normalized_df(A, symbol)
        df['period'] = 'A'
    Q = summary[mapping['Q']][mapping['B']]
    if Q: 
        q_df = create_normalized_df(Q, symbol)
        q_df['period'] = 'Q'
        df = df.append(q_df, sort=False)
    return df

def parse_earnings_trend(summary, symbol):
    route = summary['earningsTrend']['trend']
    epsEst_df = pd.DataFrame()
    revEst_df = pd.DataFrame()
    epsTrend_df = pd.DataFrame()
    epsRev_df = pd.DataFrame()
    period_df = pd.DataFrame()

    for item in route:
        epsEst_df = epsEst_df.append(single_row_df(item.pop('earningsEstimate'), symbol), sort=False)
        revEst_df = revEst_df.append(single_row_df(item.pop('revenueEstimate'), symbol), sort=False)
        epsTrend_df = epsTrend_df.append(single_row_df(item.pop('epsTrend'), symbol), sort=False)
        epsRev_df = epsRev_df.append(single_row_df(item.pop('epsRevisions'), symbol), sort=False)
        period_df = period_df.append(clean_single_row_df(item), sort=False)

    if 'growth' in epsEst_df.columns: epsEst_df.drop(labels='growth', axis=1, inplace=True)
    if 'growth' in revEst_df.columns: revEst_df.drop(labels='growth', axis=1, inplace=True)
        
    epsEst_df = pd.concat([period_df, epsEst_df], axis=1)
    revEst_df = pd.concat([period_df, revEst_df], axis=1)
    epsTrend_df = pd.concat([period_df, epsTrend_df], axis=1)
    epsRev_df = pd.concat([period_df, epsRev_df], axis=1)
    
    return epsEst_df, revEst_df, epsTrend_df, epsRev_df

def get_mult_rows(key, summary, symbol): 
    if key in summary: return create_normalized_df(summary[key], symbol)
def get_single_row(key, summary, symbol): 
    if key in summary: return single_row_df(summary[key], symbol)

def direct_row(summary, symbol): return single_row_df(summary, symbol)
def direct_rows(summary, symbol): return create_normalized_df(summary, symbol)

#### Unpack the summaries

In [178]:
# Pull a specific security
# index = 55
# print(fileList[index])
# summary = json_load(fileList[index])[0]

In [179]:
# unpack daily summary
def unpack_summaries(dates):

    for d in dates:
        
        profile_df = pd.DataFrame()
        officers_df = pd.DataFrame()
        keyStats_df = pd.DataFrame()
        finStats_df = pd.DataFrame()
        finStmtBS_df = pd.DataFrame()
        finStmtIS_df = pd.DataFrame()
        finStmtCF_df = pd.DataFrame()
        earningsEst_df = pd.DataFrame()
        revenueEst_df = pd.DataFrame()
        epsTrend_df = pd.DataFrame()
        epsRevisions_df = pd.DataFrame()
        netSharePA_df = pd.DataFrame()
        
        print('Unpacking summary for {}'.format(d))
        fileList = list_files('summary', d)
        i = 0
        for f in fileList:
            symbol = f.split('/')[2].split('.json')[0]
            consol_summary = json_load(f)

            if consol_summary:
                summary = consol_summary[0]

                # profile
                key = 'assetProfile'
                if key in summary:
                    officers = summary[key].pop('companyOfficers')
                    if officers:
                        officers_df = officers_df.append(direct_rows(officers, symbol), sort=False)
                    profile_df = profile_df.append(get_single_row(key, summary, symbol), sort=False)

                # stats
                key = 'defaultKeyStatistics'
                if key in summary:
                    keyStats_df = keyStats_df.append(get_single_row(key, summary, symbol), sort=False)
                
                key = 'financialData'
                if key in summary:
                    finStats_df = finStats_df.append(get_single_row(key, summary, symbol), sort=False)

                # financials
                A, Q = fin_stmt_mappings['CF']['A'], fin_stmt_mappings['CF']['Q']
                if A in summary and Q in summary:
                    finStmtCF_df = finStmtCF_df.append(parse_finstmt(summary, 'CF', symbol), sort=False)
                A, Q = fin_stmt_mappings['BS']['A'], fin_stmt_mappings['BS']['Q']
                if A in summary and Q in summary:
                    finStmtBS_df = finStmtBS_df.append(parse_finstmt(summary, 'BS', symbol), sort=False)
                A, Q = fin_stmt_mappings['IS']['A'], fin_stmt_mappings['IS']['Q']
                if A in summary and Q in summary:
                    finStmtIS_df = finStmtIS_df.append(parse_finstmt(summary, 'IS', symbol), sort=False)

                # earningsTrend
                key = 'earningsTrend'
                if key in summary:
                    eps_est, rev_est, eps_trend, eps_rev = parse_earnings_trend(summary, symbol)
                    earningsEst_df = earningsEst_df.append(eps_est, sort=False)
                    revenueEst_df = revenueEst_df.append(rev_est, sort=False)
                    epsTrend_df = epsTrend_df.append(eps_trend, sort=False)
                    epsRevisions_df = epsRevisions_df.append(eps_rev, sort=False)

                # netSharePurchaseActivity
                key = 'netSharePurchaseActivity'
                netSharePA_df = netSharePA_df.append(get_single_row(key, summary, symbol), sort=False)

            print('{} Full unpack for {}'.format(i, symbol))
            i += 1

        # static info
        # profile
        csv_store(set_storeDate(profile_df, today_date), 'summary-categories/', csv_ext.format('assetProfile'))
        csv_store(set_storeDate(officers_df, today_date), 'summary-categories/', csv_ext.format('companyOfficers'))

        # financials -> need to find a way to append to this file
        csv_store(set_storeDate(finStmtBS_df, today_date), 'summary-categories/', csv_ext.format('financials-BS'))
        csv_store(set_storeDate(finStmtIS_df, today_date), 'summary-categories/', csv_ext.format('financials-IS'))
        csv_store(set_storeDate(finStmtCF_df, today_date), 'summary-categories/', csv_ext.format('financials-CF'))

        # should be updated daily
        fname = csv_ext.format(str(today_date))
        csv_store(set_storeDate(keyStats_df, today_date), 'summary-categories/defaultKeyStatistics/', fname)
        csv_store(set_storeDate(finStats_df, today_date), 'summary-categories/financialData/', fname)
        csv_store(set_storeDate(earningsEst_df, today_date), 'summary-categories/earningsEstimate/', fname)
        csv_store(set_storeDate(revenueEst_df, today_date), 'summary-categories/revenueEstimate/', fname)
        csv_store(set_storeDate(epsTrend_df, today_date), 'summary-categories/epsTrend/', fname)
        csv_store(set_storeDate(epsRevisions_df, today_date), 'summary-categories/epsRevisions/', fname)
        csv_store(set_storeDate(netSharePA_df, today_date), 'summary-categories/netSharePurchaseActivity/', fname)

In [180]:
# tgt_dates = dates[:-3:-1]
tgt_dates = [dates[-1]]
tgt_dates

['2019-02-14']

In [181]:
# unpack_summaries(tgt_dates)

#### assetProfile

In [242]:
officers = summary['assetProfile'].pop('companyOfficers')

In [243]:
single_row_df(summary['assetProfile'], symbol)

Unnamed: 0,address1,city,state,zip,country,phone,website,industry,sector,longBusinessSummary,fullTimeEmployees,auditRisk,boardRisk,compensationRisk,shareHolderRightsRisk,overallRisk,governanceEpochDate,compensationAsOfEpochDate,maxAge,symbol
0,1600 Amphitheatre Parkway,Mountain View,CA,94043,United States,650-253-0000,http://www.abc.xyz,Internet Content & Information,Technology,Alphabet Inc. provides online advertising serv...,98771,2,8,10,10,10,1548979200,1514678400,86400,GOOGL


In [244]:
create_normalized_df(officers, symbol)

Unnamed: 0,maxAge,name,age,title,yearBorn,exercisedValue,unexercisedValue,symbol
0,1,Mr. Lawrence Edward Page,45,"Co-Founder, CEO & Director",1973,0,0,GOOGL
1,1,Mr. Sergey Brin,44,"Co-Founder, Pres & Director",1974,0,0,GOOGL
2,1,Ms. Ruth Myrna Porat,60,Sr. VP & CFO,1958,0,0,GOOGL
3,1,Mr. David C. Drummond,55,"Sr. VP of Corp. Devel., Chief Legal Officer & ...",1963,0,0,GOOGL
4,1,Ms. Amie Thuener O'Toole,42,Chief Accounting Officer & VP,1976,0,0,GOOGL


#### majorHoldersBreakdown

In [245]:
pd.DataFrame(summary['majorHoldersBreakdown']).loc['raw']

maxAge                                1
insidersPercentHeld             0.00161
institutionsPercentHeld         0.80354
institutionsFloatPercentHeld    0.80484
institutionsCount                  3167
Name: raw, dtype: object

#### institutionOwnership

In [246]:
create_normalized_df(summary['institutionOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1538265600,"Vanguard Group, Inc. (The)",0.0714,21342069,25761584648,GOOGL
1,1,1546214400,Blackrock Inc.,0.0628,18767367,19611147820,GOOGL
2,1,1538265600,"FMR, LLC",0.055,16434628,19837910766,GOOGL
3,1,1546214400,State Street Corporation,0.0359,10742476,11225457720,GOOGL
4,1,1538265600,Price (T.Rowe) Associates Inc,0.022,6580585,7943292541,GOOGL
5,1,1538265600,Capital Research Global Investors,0.0177,5299665,6397119628,GOOGL
6,1,1546214400,Northern Trust Corporation,0.0127,3782363,3952418040,GOOGL
7,1,1538265600,Invesco Ltd.,0.0125,3722910,4493850202,GOOGL
8,1,1538265600,"Geode Capital Management, LLC",0.012,3572834,4312696464,GOOGL
9,1,1538265600,"Wellington Management Company, LLP",0.0119,3544079,4277986879,GOOGL


#### recommendationTrend

In [247]:
create_df(summary['recommendationTrend']['trend'], symbol)

Unnamed: 0,period,strongBuy,buy,hold,symbol
0,0m,13,25,5,GOOGL
1,-1m,16,26,3,GOOGL
2,-2m,16,26,3,GOOGL
3,-3m,13,25,5,GOOGL


#### earningsHistory

In [248]:
eh = create_normalized_df(summary['earningsHistory']['history'], symbol)
eh

Unnamed: 0,maxAge,epsActual,epsEstimate,epsDifference,surprisePercent,quarter,period,symbol
0,1,9.7,9.98,-0.28,-0.028,1514678400,-4q,GOOGL
1,1,13.33,9.28,4.05,0.436,1522454400,-3q,GOOGL
2,1,4.54,9.59,-5.05,-0.527,1530316800,-2q,GOOGL
3,1,13.06,10.42,2.64,0.253,1538265600,-1q,GOOGL


#### indexTrend

In [249]:
summary['indexTrend']

{'maxAge': 1,
 'symbol': 'SP5',
 'peRatio': {'raw': 13.0587, 'fmt': '13.06'},
 'pegRatio': {'raw': 1.55072, 'fmt': '1.55'},
 'estimates': [{'period': '0q', 'growth': {'raw': 0.045, 'fmt': '0.05'}},
  {'period': '+1q', 'growth': {'raw': 0.039, 'fmt': '0.04'}},
  {'period': '0y', 'growth': {'raw': 0.099, 'fmt': '0.10'}},
  {'period': '+1y', 'growth': {'raw': 0.094, 'fmt': '0.09'}},
  {'period': '+5y', 'growth': {'raw': 0.117256, 'fmt': '0.12'}},
  {'period': '-5y', 'growth': {}}]}

In [250]:
route = summary['indexTrend']['estimates']
df = clean_up_fmt(json_normalize(route))
df.index = get_symbol_index(df, symbol)
df

Unnamed: 0,growth,period
GOOGL,0.045,0q
GOOGL,0.039,+1q
GOOGL,0.099,0y
GOOGL,0.094,+1y
GOOGL,0.117256,+5y
GOOGL,,-5y


#### defaultKeyStatistics

In [251]:
single_row_df(summary['defaultKeyStatistics'], symbol)

Unnamed: 0,maxAge,priceHint,enterpriseValue,forwardPE,profitMargins,floatShares,sharesOutstanding,sharesShort,sharesShortPriorMonth,sharesShortPreviousMonthDate,...,trailingEps,forwardEps,pegRatio,lastSplitFactor,lastSplitDate,enterpriseToRevenue,enterpriseToEbitda,52WeekChange,SandP52WeekChange,symbol
0,1,2,678750257152,24.02042,0.22465,603776337,299360000,1606498,2621656,1546214400,...,43.703,47.01,1.65,1000/1998,1396483200,4.961,16.792,0.03415,0.007993,GOOGL


#### netSharePurchaseActivity

In [192]:
single_row_df(summary['netSharePurchaseActivity'], symbol)

Unnamed: 0,maxAge,period,buyInfoCount,buyInfoShares,sellInfoCount,sellInfoShares,netInfoCount,netInfoShares,netPercentInsiderShares,totalInsiderShares,symbol
0,1,6m,35,391694,30,146473,65,245221,-1.293,27943,GOOG


#### fundOwnership

In [193]:
create_normalized_df(summary['fundOwnership']['ownershipList'], symbol)

Unnamed: 0,maxAge,reportDate,organization,pctHeld,position,value,symbol
0,1,1538265600,Vanguard Total Stock Market Index Fund,0.0217,7600886,9071429414,GOOG
1,1,1538265600,Vanguard 500 Index Fund,0.0164,5733009,6842174251,GOOG
2,1,1538265600,Growth Fund Of America Inc,0.0154,5375762,6415810674,GOOG
3,1,1543536000,SPDR S&P 500 ETF Trust,0.01,3497893,3828199035,GOOG
4,1,1543536000,Fidelity Contrafund Inc,0.0085,2981841,3263416245,GOOG
5,1,1538265600,Vanguard Institutional Index Fund-Institutiona...,0.0084,2937383,3505678489,GOOG
6,1,1543536000,"Invesco ETF Tr-Invesco QQQ Tr, Series 1 ETF",0.0082,2878965,3150825664,GOOG
7,1,1543536000,Fidelity 500 Index Fund,0.0062,2153268,2356601097,GOOG
8,1,1543536000,iShares Core S&P 500 ETF,0.0061,2143684,2346112080,GOOG
9,1,1538265600,Vanguard Growth Index Fund,0.006,2103274,2510194420,GOOG


#### insiderHolders

In [194]:
create_normalized_df(summary['insiderHolders']['holders'], symbol)

Unnamed: 0,maxAge,name,relation,transactionDescription,latestTransDate,positionIndirect,positionIndirectDate,symbol
0,1,BRIN SERGEY,President,Stock Gift,1543449600,,,GOOG
1,1,DOERR L JOHN,Director,Sale,1542240000,898374.0,1542240000.0,GOOG
2,1,DRUMMOND DAVID C,Officer,Sale,1547424000,,,GOOG
3,1,GREENE DIANE B,Director,Conversion of Exercise of derivative security,1548374400,,,GOOG
4,1,HENNESSY JOHN L,Director,Sale,1532476800,,,GOOG
5,1,MATHER ANN,Director,Sale,1548979200,,,GOOG
6,1,O TOOLE AMIE THUENER,Officer,Sale,1549238400,,,GOOG
7,1,PICHAI SUNDAR,Officer and Director,Conversion of Exercise of derivative security,1548374400,,,GOOG
8,1,PORAT RUTH M,Chief Financial Officer,Conversion of Exercise of derivative security,1545782400,,,GOOG
9,1,SCHMIDT ERIC E,Director,Stock Gift,1546387200,3996310.0,1546387000.0,GOOG


#### earnings

In [None]:
financialChart = summary['earnings']['financialsChart']
finChart_df = create_normalized_df(financialChart['yearly'], symbol)
finChart_df = finChart_df.append(create_normalized_df(financialChart['quarterly'], symbol), sort=False)

earningsChart = summary['earnings']['earningsChart']
qearnChart_df = create_normalized_df(earningsChart.pop('quarterly'), symbol)
earnInfo_df = clean_up_fmt(json_normalize(earningsChart))
earnInfo_df['earningsDate'] = earningsChart['earningsDate'][0]['raw'] if len(earningsChart['earningsDate']) > 0 else np.nan

In [201]:
earningsChart

{'currentQuarterEstimate': {'raw': 10.82, 'fmt': '10.82'},
 'currentQuarterEstimateDate': '4Q',
 'currentQuarterEstimateYear': 2018,
 'earningsDate': []}

In [202]:
finChart_df

Unnamed: 0,date,revenue,earnings,symbol
0,2015,74989000000,16348000000,GOOG
1,2016,90272000000,19478000000,GOOG
2,2017,110855000000,12662000000,GOOG
3,2018,136819000000,30736000000,GOOG
0,1Q2018,31146000000,9401000000,GOOG
1,2Q2018,32657000000,3195000000,GOOG
2,3Q2018,33740000000,9192000000,GOOG
3,4Q2018,39276000000,8948000000,GOOG


In [203]:
qearnChart_df

Unnamed: 0,date,actual,estimate,symbol
0,4Q2017,9.7,9.96,GOOG
1,1Q2018,13.33,9.32,GOOG
2,2Q2018,4.54,9.54,GOOG
3,3Q2018,13.06,10.4,GOOG


In [206]:
earnInfo_df

Unnamed: 0,currentQuarterEstimate,currentQuarterEstimateDate,currentQuarterEstimateYear,earningsDate
0,10.82,4Q,2018,


#### calendarEvents

In [209]:
route = summary['calendarEvents']['earnings']
cal_df = single_row_df(summary['calendarEvents']['earnings'], symbol)
cal_df['earningsDate'] = route['earningsDate'][0]['raw'] if len(route['earningsDate']) > 0 else np.nan
cal_df

Unnamed: 0,earningsAverage,earningsLow,earningsHigh,revenueAverage,revenueLow,revenueHigh,symbol,earningsDate
0,10.82,9.28,12.13,38910500000,38029000000,39455000000,GOOG,


#### upgradeDowngradeHistory

In [210]:
route = summary['upgradeDowngradeHistory']['history']
create_normalized_df(route, symbol)

Unnamed: 0,epochGradeDate,firm,toGrade,action,symbol
0,1443744000,Oppenheimer,Outperform,up,GOOG
1,1441670400,Moffett Nathanson,Buy,init,GOOG
2,1439942400,Atlantic Equities,Overweight,up,GOOG
3,1439251200,Monness Crespi Hardt,Buy,up,GOOG
4,1439251200,Mizuho,Buy,up,GOOG
5,1439251200,Stifel Nicolaus,Buy,up,GOOG
6,1439251200,Morgan Stanley,Overweight,up,GOOG
7,1439164800,Susquehanna,Positive,init,GOOG
8,1439164800,Stifel Nicolaus,Buy,up,GOOG
9,1435276800,Mizuho,Neutral,init,GOOG


#### earningsTrend

In [211]:
import copy
summ2 = copy.deepcopy(summary)
epsEst_df, revEst_df, epsTrend_df, epsRev_df = parse_earnings_trend(summ2, symbol)

In [212]:
epsEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,yearAgoEps,numberOfAnalysts,symbol
0,1,0q,2018-12-31,0.115,10.82,9.28,12.13,9.7,31.0,GOOG
0,1,+1q,2019-03-31,-0.213,10.49,9.71,11.85,13.33,24.0,GOOG
0,1,0y,2018-12-31,0.296,41.81,37.94,43.88,32.25,30.0,GOOG
0,1,+1y,2019-12-31,0.126,47.06,43.42,53.3,41.81,37.0,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [213]:
revEst_df

Unnamed: 0,maxAge,period,endDate,growth,avg,low,high,numberOfAnalysts,yearAgoRevenue,symbol
0,1,0q,2018-12-31,0.115,38910500000.0,38029000000.0,39455000000.0,28.0,32323000000.0,GOOG
0,1,+1q,2019-03-31,-0.213,37065100000.0,34326000000.0,38190800000.0,23.0,31146000000.0,GOOG
0,1,0y,2018-12-31,0.296,136473000000.0,135572000000.0,136998000000.0,32.0,110855000000.0,GOOG
0,1,+1y,2019-12-31,0.126,162557000000.0,158247000000.0,167038000000.0,32.0,136473000000.0,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [214]:
epsTrend_df

Unnamed: 0,maxAge,period,endDate,growth,current,7daysAgo,30daysAgo,60daysAgo,90daysAgo,symbol
0,1,0q,2018-12-31,0.115,10.82,10.86,10.98,10.88,10.88,GOOG
0,1,+1q,2019-03-31,-0.213,10.49,10.45,10.49,10.48,10.48,GOOG
0,1,0y,2018-12-31,0.296,41.81,41.83,41.93,41.78,41.78,GOOG
0,1,+1y,2019-12-31,0.126,47.06,47.16,47.28,47.19,47.02,GOOG
0,1,+5y,,0.1636,,,,,,GOOG
0,1,-5y,,,,,,,,GOOG


In [215]:
epsRev_df

Unnamed: 0,maxAge,period,endDate,growth,upLast7days,upLast30days,downLast30days,symbol
0,1,0q,2018-12-31,0.115,0.0,1.0,4.0,GOOG
0,1,+1q,2019-03-31,-0.213,1.0,3.0,1.0,GOOG
0,1,0y,2018-12-31,0.296,0.0,2.0,4.0,GOOG
0,1,+1y,2019-12-31,0.126,0.0,2.0,5.0,GOOG
0,1,+5y,,0.1636,,,,GOOG
0,1,-5y,,,,,,GOOG


#### financialData

In [216]:
single_row_df(summary['financialData'], symbol)

Unnamed: 0,maxAge,currentPrice,targetHighPrice,targetLowPrice,targetMeanPrice,targetMedianPrice,recommendationMean,recommendationKey,numberOfAnalystOpinions,totalCash,...,grossProfits,freeCashflow,operatingCashflow,revenueGrowth,grossMargins,ebitdaMargins,operatingMargins,profitMargins,financialCurrency,symbol
0,86400,1121.67,1400.0,1240.0,1328.0,1350.0,1.7,buy,5,109140000768,...,77270000000,19776749568,47971000320,0.215,0.56476,0.29543,0.22944,0.22465,USD,GOOG


#### financials

In [218]:
route = summary['cashflowStatementHistoryQuarterly']['cashflowStatements']

In [219]:
parse_finstmt(summary, 'CF', symbol)

Unnamed: 0,maxAge,endDate,netIncome,depreciation,changeToNetincome,changeToAccountReceivables,changeToLiabilities,changeToOperatingActivities,totalCashFromOperatingActivities,capitalExpenditures,...,otherCashflowsFromInvestingActivities,totalCashflowsFromInvestingActivities,netBorrowings,otherCashflowsFromFinancingActivities,totalCashFromFinancingActivities,effectOfExchangeRate,changeInCash,repurchaseOfStock,symbol,period
0,1,1546214400,30736000000,9029000000,3298000000,-2169000000,1438000000,7890000000,47971000000,-25139000000,...,,-28504000000,-61000000,-4043000000,-13179000000,-302000000,5986000000,-9075000000,GOOG,A
1,1,1514678400,12662000000,6899000000,8284000000,-3768000000,1121000000,3682000000,37091000000,-13184000000,...,1419000000.0,-31401000000,-86000000,-3366000000,-8298000000,405000000,-2203000000,-4846000000,GOOG,A
2,1,1483142400,19478000000,6100000000,7158000000,-2578000000,333000000,2420000000,36036000000,-10212000000,...,-1978000000.0,-31165000000,-1335000000,-3304000000,-8332000000,-170000000,-3631000000,-3693000000,GOOG,A
3,1,1451520000,16348000000,5024000000,5609000000,-2094000000,246000000,1618000000,26572000000,-9950000000,...,75000000.0,-23711000000,-23000000,-2422000000,-4225000000,-434000000,-1798000000,-1780000000,GOOG,A
0,1,1546214400,8948000000,2575000000,969000000,-2887000000,873000000,2869000000,12987000000,-7081000000,...,,-6876000000,-5000000,-91000000,-2746000000,-107000000,3258000000,-2650000000,GOOG,Q
1,1,1538265600,9192000000,2356000000,1794000000,-670000000,293000000,1480000000,13210000000,-5282000000,...,,-10408000000,-25000000,-1253000000,-3478000000,-29000000,-705000000,-2200000000,GOOG,Q
2,1,1530316800,3195000000,2112000000,1345000000,-312000000,20000000,5210000000,10132000000,-5477000000,...,,-3374000000,-1344000000,-1541000000,-4937000000,-331000000,1490000000,-2052000000,GOOG,Q
3,1,1522454400,9401000000,1986000000,-810000000,1700000000,252000000,-1669000000,11642000000,-7299000000,...,,-7846000000,1313000000,-1158000000,-2018000000,165000000,1943000000,-2173000000,GOOG,Q


In [220]:
parse_finstmt(summary, 'BS', symbol)

Unnamed: 0,maxAge,endDate,cash,shortTermInvestments,netReceivables,inventory,otherCurrentAssets,totalCurrentAssets,longTermInvestments,propertyPlantEquipment,...,totalCurrentLiabilities,totalLiab,commonStock,retainedEarnings,treasuryStock,otherStockholderEquity,totalStockholderEquity,netTangibleAssets,symbol,period
0,1,1546214400,16701000000,92439000000,21193000000,1107000000,4236000000,135676000000,13859000000,59719000000,...,34620000000,55164000000,45049000000,134885000000,-2306000000,-2306000000,177628000000,157520000000,GOOG,A
1,1,1514678400,10715000000,91156000000,18705000000,749000000,2983000000,124308000000,7813000000,42383000000,...,24183000000,44793000000,40247000000,113247000000,-992000000,-992000000,152502000000,133063000000,GOOG,A
2,1,1483142400,12918000000,73415000000,15632000000,268000000,3175000000,105408000000,5878000000,34234000000,...,16756000000,28461000000,36307000000,105131000000,-2402000000,-2402000000,139036000000,119261000000,GOOG,A
3,1,1451520000,15409000000,56517000000,13459000000,491000000,1590000000,90114000000,5183000000,29016000000,...,19310000000,27130000000,32982000000,89223000000,-1874000000,-1874000000,120331000000,100615000000,GOOG,A
0,1,1546214400,16701000000,92439000000,21193000000,1107000000,4236000000,135676000000,13859000000,59719000000,...,34620000000,55164000000,45049000000,134885000000,-2306000000,-2306000000,177628000000,157520000000,GOOG,Q
1,1,1538265600,13443000000,92973000000,18067000000,1212000000,4007000000,129702000000,12673000000,55300000000,...,31301000000,51698000000,43111000000,128405000000,-1676000000,-1676000000,169840000000,149497000000,GOOG,Q
2,1,1530316800,14148000000,88106000000,17244000000,698000000,3961000000,124157000000,11487000000,51672000000,...,29903000000,49610000000,42243000000,121282000000,-1525000000,-1525000000,162000000000,141443000000,GOOG,Q
3,1,1522454400,12658000000,90227000000,16814000000,636000000,3426000000,123761000000,10976000000,48845000000,...,25394000000,46110000000,41487000000,120008000000,-670000000,-670000000,160825000000,140154000000,GOOG,Q


In [221]:
parse_finstmt(summary, 'IS', symbol)

Unnamed: 0,maxAge,endDate,totalRevenue,costOfRevenue,grossProfit,researchDevelopment,sellingGeneralAdministrative,totalOperatingExpenses,operatingIncome,totalOtherIncomeExpenseNet,ebit,interestExpense,incomeBeforeTax,incomeTaxExpense,netIncomeFromContinuingOps,netIncome,netIncomeApplicableToCommonShares,symbol,period
0,1,1546214400,136819000000,59549000000,77270000000,21419000000,24459000000,105427000000,31392000000,3521000000,31392000000,-114000000,34913000000,4177000000,30736000000,30736000000,30736000000,GOOG,A
1,1,1514678400,110855000000,45583000000,65272000000,16625000000,19765000000,81973000000,28882000000,-1689000000,28882000000,-109000000,27193000000,14531000000,12662000000,12662000000,12662000000,GOOG,A
2,1,1483142400,90272000000,35138000000,55134000000,13948000000,17470000000,66556000000,23716000000,434000000,23716000000,-124000000,24150000000,4672000000,19478000000,19478000000,19478000000,GOOG,A
3,1,1451520000,74989000000,28164000000,46825000000,12282000000,15183000000,55629000000,19360000000,291000000,19360000000,-104000000,19651000000,3303000000,16348000000,16348000000,16348000000,GOOG,A
0,1,1546214400,39276000000,17918000000,21358000000,6034000000,7121000000,31073000000,8203000000,1869000000,8203000000,-29000000,10072000000,1124000000,8948000000,8948000000,8948000000,GOOG,Q
1,1,1538265600,33740000000,14281000000,19459000000,5232000000,5917000000,25430000000,8310000000,1773000000,8310000000,-28000000,10083000000,891000000,9192000000,9192000000,9192000000,GOOG,Q
2,1,1530316800,32657000000,13883000000,18774000000,5114000000,5782000000,24779000000,7878000000,-3663000000,7878000000,-27000000,4215000000,1020000000,3195000000,3195000000,3195000000,GOOG,Q
3,1,1522454400,31146000000,13467000000,17679000000,5039000000,5639000000,24145000000,7001000000,3542000000,7001000000,-30000000,10543000000,1142000000,9401000000,9401000000,9401000000,GOOG,Q


In [222]:
from datetime import datetime, timedelta, timezone
rpt_ts = 1546214400
rpt = datetime.fromtimestamp(rpt_ts)
ye = datetime(2018, 12, 31)
ye_ts = datetime.timestamp(dt)

ye_ts, rpt_ts, (ye_ts - rpt_ts) / (60*60)
60*60*8

28800

#### Cleanups

In [None]:
# finStmtIS_df.columns
# cs_cols = ['totalRevenue', 'costOfRevenue', 'grossProfit',
#        'researchDevelopment', 'sellingGeneralAdministrative',
#        'totalOperatingExpenses', 'operatingIncome',
#        'totalOtherIncomeExpenseNet', 'ebit', 'interestExpense',
#        'incomeBeforeTax', 'incomeTaxExpense', 'minorityInterest',
#        'netIncomeFromContinuingOps', 'discontinuedOperations', 'netIncome',
#        'netIncomeApplicableToCommonShares']
# cs_hist_cols = ['costOfRevenue', 'grossProfit', 'researchDevelopment', 
#                 'sellingGeneralAdministrative', 'operatingIncome',
#                 'ebit', 'interestExpense','incomeTaxExpense',
#                 'netIncomeFromContinuingOps','netIncome']
# finStmtCF_df.info(), finStmtBS_df.info(), finStmtIS_df.info()
# is_df = finStmtIS_df[(finStmtIS_df.period == 'A') & (finStmtIS_df.endDate == 1514678400)][cs_cols].copy()
# cs_df = (is_df.T / is_df.totalRevenue.values).T
# cs_df[cs_df.columns[1:6]].plot.hist(range=(0, 1), alpha=0.5)
# cs_df.sort_values(by='netIncomeApplicableToCommonShares', ascending=False)

In [None]:
path = get_path('summary', d)
'temp/' + json_ext.format(symbol)

In [None]:
dates[0], get_path('summary', dates[0])

In [None]:
fileList = list_files('summary', dates[0])

In [None]:
fileList = list_files('summary', dates[0])
for f in fileList:
    symbol = f.split('/')[2].split('.')[0]
    consol_summary = json_load(f)
    if type(consol_summary) is dict:
        print('processing:', f)
        l = consol_summary['quoteSummary']['result']
        data = json.dumps(l)
        path = get_path('summary', dates[0])
        store_s3(data, path + json_ext.format(symbol))
        print('saving:', path + json_ext.format(symbol))