In [1]:
import json
from urllib.request import urlopen

def fetch_json(path):
  with urlopen(path) as url:
      return json.loads(url.read().decode())      

In [79]:
%%time

annual = pd.DataFrame()
quarter = pd.DataFrame()
trailing = pd.DataFrame()

for dat in data:
  datatype = dat['meta']['type'][0]
  try:
    values = dat[datatype]
  except KeyError:
    values = []
  for val in values:
    try:
      asOfDate = val['asOfDate']
      raw = val['reportedValue']['raw']   
      if datatype.startswith('annual'):            
        annual.loc[asOfDate, datatype] = raw
      elif datatype.startswith('quarter'):
        quarter.loc[asOfDate, datatype] = raw
      elif datatype.startswith('trailing'):
        trailing.loc[asOfDate, datatype] = raw
    except TypeError:
      pass

CPU times: user 2.09 s, sys: 0 ns, total: 2.09 s
Wall time: 2.1 s


In [111]:
%%time

annual = pd.DataFrame()
quarter = pd.DataFrame()
trailing = pd.DataFrame()

for dat in data:
  datatype = dat['meta']['type'][0]
  try:
    items = dat[datatype]    
    items = [val for val in items if val != None]
    values = [val['reportedValue']['raw'] for val in items]
    dates = [val['asOfDate'] for val in items]
  except KeyError:
    values = []
    dates = []
  s = pd.Series(values, index=dates, name=datatype)
  if datatype.startswith('annual'):
    annual = pd.concat([annual, s], axis=1, sort=True)
  elif datatype.startswith('quarter'):
    quarter = pd.concat([quarter, s], axis=1, sort=True)
  elif datatype.startswith('trailing'):
    trailing = pd.concat([trailing, s], axis=1, sort=True)

CPU times: user 624 ms, sys: 0 ns, total: 624 ms
Wall time: 627 ms


In [102]:
df = pd.DataFrame()
s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'], name='hello')
s2 = pd.Series([4, 5], index=['b', 'd'], name='world')
pd.concat([df, s1, s2], axis=1, sort=True)

Unnamed: 0,hello,world
a,1.0,
b,2.0,4.0
c,3.0,
d,,5.0


In [5]:
import pandas as pd

def rawdata_to_reports(data, symbol):
  annual = pd.DataFrame()
  quarter = pd.DataFrame()
  trailing = pd.DataFrame()

  for dat in data:
    datatype = dat['meta']['type'][0]
    try:
      items = dat[datatype]    
      items = [val for val in items if val != None]
      values = [val['reportedValue']['raw'] for val in items]
      dates = [val['asOfDate'] for val in items]
    except KeyError:
      values = []
      dates = []
    s = pd.Series(values, index=dates, name=datatype)
    if datatype.startswith('annual'):
      annual = pd.concat([annual, s], axis=1, sort=True)
    elif datatype.startswith('quarter'):
      quarter = pd.concat([quarter, s], axis=1, sort=True)
    elif datatype.startswith('trailing'):
      trailing = pd.concat([trailing, s], axis=1, sort=True)

  annual.reset_index(inplace=True)
  annual.rename(columns={'index': 'date'}, inplace=True)
  annual.to_csv(f'./financials/annual/{symbol}.csv', index=False)
  
  quarter.reset_index(inplace=True)
  quarter.rename(columns={'index': 'date'}, inplace=True)
  quarter.to_csv(f'./financials/quarter/{symbol}.csv', index=False)
  
  trailing.reset_index(inplace=True)
  trailing.rename(columns={'index': 'date'}, inplace=True)
  trailing.to_csv(f'./financials/trailing/{symbol}.csv', index=False)

In [3]:
symbols = fetch_json('https://github.com/varianze/varianze/raw/master/symbols.json')

In [11]:
%%time

for symbol in symbols:
  print(symbol)
  with open(f'./rawdata/stocks/{symbol}.json') as json_file:
    data = json.load(json_file)
  rawdata_to_reports(data, symbol)

0001.HK
0002.HK
0003.HK
0004.HK
0005.HK
0006.HK
0008.HK
0011.HK
0012.HK
0016.HK
0017.HK
0019.HK
0020.H
0023.HK
0027.HK
0038.HK
0066.HK
0083.HK
0101.HK
0107.HK
0135.HK
0144.HK
0148.HK
0151.HK
0168.HK
0175.HK
0178.HK
0179.HK
0187.HK
0198.HK
0200.HK
0215.HK
0220.HK
0241.HK
0257.HK
0267.HK
0268.HK
0282.HK
0285.HK
0288.HK
0291.HK
0293.HK
0303.HK
0315.HK
0316.HK
0317.HK
0322.HK
0323.HK
0327.HK
0330.HK
0336.HK
0338.HK
0341.HK
0345.HK
0347.HK
0354.HK
0358.HK
0368.HK
0371.HK
0386.HK
0388.HK
0390.HK
0392.HK
0400.HK
0419.HK
0434.HK
0439.HK
0451.HK
0460.HK
0484.HK
0489.HK
0493.HK
0494.HK
0506.HK
0511.HK
0522.HK
0525.HK
0548.HK
0551.HK
0553.HK
0553.HK
0570.HK
0576.HK
0579.HK
0586.HK
0590.HK
0598.HK
0606.HK
0636.HK
0656.HK
0659.HK
0669.HK
0670.HK
0688.HK
0694.HK
0696.HK
0698.HK
0700.HK
0719.HK
0728.HK
0732.HK
0735.HK
0737.HK
0751.HK
0753.HK
0762.HK
0775.HK
0777.HK
0799.HK
0813.HK
0823.HK
0836.HK
0855.HK
0857.HK
0867.HK
0868.HK
0874.HK
0883.HK
0902.HK
0911.HK
0914.HK
0916.HK
0921.HK
0931.HK
0933.HK
0