In [30]:
# imports
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# imports
import json, time, random
import os.path, urllib
from io import StringIO
from urllib.request import urlopen
from datetime import datetime, date

import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.externals import joblib
from sklearn.metrics import mean_squared_error

import boto3
from basic_utils import *

pd.set_option("display.max_rows",50)
sns.set()

In [22]:
# configs
conf_file = 'config.json'
config = load_config(conf_file)

# set up options
dataset = 'option'
prefix = config[dataset + '_path']
S3_store = config['s3_store']
BUCKET_NAME = config['bucket_name']
top_path = BUCKET_NAME if S3_store else './' + BUCKET_NAME + '/'
ds_prefix = config['training_path']
option_train_fmt = config['option_train_fmt']
FIG_SIZE = (20,20)
fname = '{}.csv'

consol_quotes = config['quote_consol' + '_path']
consol_options = config['option_consol' + '_path']

s3 = boto3.resource('s3', 'us-west-2')
ml = boto3.client('machinelearning', 'us-east-1')
bucket = s3.Bucket(BUCKET_NAME)

Loading config.json


In [23]:
# clean up functions
def clean_up_fmt(df):
    fmt_cols = [x for x in df.columns.tolist() if str.lower(x).find('fmt') > 0]
    raw_cols = [x for x in df.columns.tolist() if str.lower(x).find('raw') > 0]
    rndm_map = { x: x[:x.index('.')] for x in raw_cols }
    df.drop(fmt_cols, axis=1, inplace=True)
    df.rename(rndm_map, axis=1, inplace=True)
    return df

In [24]:
# reading functions
json_load_s3 = lambda f: json.load(bucket.Object(key=f).get()["Body"])
csv_load_s3 = lambda f: StringIO(bucket.Object(key=f).get()["Body"].read().decode('utf-8'))

def read_dates():
    if S3_store: 
        ds_list = [x.key for x in bucket.objects.filter(Prefix='option/')]
        ds_list = list(set([x.split('/')[1] for x in ds_list if x.find('.json') > 0]))
        return sorted(ds_list)
    else:
        return [x for x in 
             os.listdir('./' + BUCKET_NAME + '/' + prefix[:prefix.index('/')]) 
             if x.find('.') < 0]

def get_path(dataset, d=None):
    prefix = config[dataset + '_path']
    path = ''
    if S3_store: path = prefix.format(d)
    else: path = top_path + prefix.format(d)
    return path

def list_files(dataset, date=None):
    fileList = []
    if S3_store:
        fileList = [x.key for x in 
                    bucket.objects.filter(
                        Prefix=get_path(dataset, date))]
    else:
        fileList = os.listdir(get_path(date))
    return fileList
    
#     return [x.key for x in bucket.objects.filter(Prefix=get_path(date))] \
#         if S3_store else os.listdir(get_path(date))

def load_file(path, fname):
    print('Loading S3 file:', path + fname)
    if S3_store:
        return json_load_s3(path + fname) 
    else: return load_config(path + fname)

In [25]:
# storage functions
json.dump_s3 = lambda obj, prefix: bucket(key=prefix).put(Body=json.dumps(obj))

def store_file(df, path, fname, headings=True):
    prefix = path + fname
    if S3_store:
        buffer = StringIO()
        df.to_csv(buffer, index=False, encoding='utf-8', header=headings)
        bucket.put_object(Body=buffer.getvalue(), Key=prefix)
    else:
        ds_fname = top_path + prefix
        df.to_csv(ds_fname, index=False, header=headings)
    print('Saved', prefix)

In [26]:
def flatten_quotes(dates):
    quote_frame = pd.DataFrame()
    for d in dates:
        storeDate = datetime.strptime(d, '%Y-%m-%d').timestamp()
        path = get_path('quote')
        result = load_file(path, d + '.json')
        for q in result:
            q_clean = clean_up_fmt(json_normalize(q))
            q_clean['storeDate'] = storeDate
            quote_frame = quote_frame.append(q_clean, sort=False)
    return quote_frame

def flatten_options(dates):
    calls_df = pd.DataFrame()
    puts_frame = pd.DataFrame()
    for d in dates:
        storeDate = datetime.strptime(d, '%Y-%m-%d').timestamp()
        path = get_path('option', d)
        files = list_files('option', d)
        for f in files:
            option_expirations = load_file(f, '')
            updt_root_flag = True
            for expiration in option_expirations:
                if updt_root_flag:
                    underlyingSymbol = expiration['underlyingSymbol']
                    updt_root_flag = False
                options = expiration['options'][0]
                exp_date = options['expirationDate']
                call_df = clean_up_fmt(json_normalize(options['calls']))
                put_df = clean_up_fmt(json_normalize(options['puts']))
                call_df['underlyingSymbol'] = underlyingSymbol
                call_df['storeDate'] = storeDate
                put_df['underlyingSymbol']  = underlyingSymbol
                put_df['storeDate'] = storeDate
                calls_df = calls_df.append(call_df, sort=False)
                puts_frame = puts_frame.append(put_df, sort=False)
    calls_df['type'] = 'call'
    puts_frame['type'] = 'put'
    full_set = calls_df.append(puts_frame)
    return full_set

In [27]:
datetime.strptime('2018-09-10', '%Y-%m-%d').timestamp()

1536562800.0

In [33]:
# Date ranges for analysis
dates = read_dates('option')
predict_days = -1
predict_dates, train_dates = dates[predict_days:], dates[:predict_days]
train_dates, predict_dates

(['2018-09-07',
  '2018-09-10',
  '2018-09-11',
  '2018-09-12',
  '2018-09-13',
  '2018-09-14',
  '2018-09-17',
  '2018-09-18',
  '2018-09-19'],
 ['2018-09-20'])

In [None]:
# for each date save full_set as consoldiated csv file
# S3 store inside the options/consolidated
# write function to load consolidated dataset (much faster)
# write function to load quotes from folder instead of parsing from inside option files
# externalize the functions into basic_utils.py to re-use from other files

In [None]:
%time full_set, quote_frame = load_option_files(train_dates)

In [None]:
# Saves large dataset to drive
full_set.to_csv('full_set.csv')
quote_frame.to_csv('quote_frame.csv')

In [None]:
# Loads data from drive / S3
full_set = pd.read_csv('full_set.csv')
quote_frame = pd.read_csv('quote_frame.csv')
'{0} options'.format(full_set.underlyingSymbol.count()), \
'{0} quotes'.format(quote_frame.symbol.count())

### Testing / iteration

In [202]:
name = 'AAPL.json'
path = get_path('option', dates[0])
result = load_file(path, name)

Loading S3 file: option/2018-09-07/AAPL.json


In [181]:
list_files('option', dates[0])

['option/2018-09-07/A.json',
 'option/2018-09-07/AAPL.json',
 'option/2018-09-07/ABMD.json',
 'option/2018-09-07/ADBE.json',
 'option/2018-09-07/ADP.json',
 'option/2018-09-07/AEIS.json',
 'option/2018-09-07/AEO.json',
 'option/2018-09-07/AKAM.json',
 'option/2018-09-07/ALGN.json',
 'option/2018-09-07/ALRM.json',
 'option/2018-09-07/AMAT.json',
 'option/2018-09-07/ANET.json',
 'option/2018-09-07/ANSS.json',
 'option/2018-09-07/ANTM.json',
 'option/2018-09-07/APPF.json',
 'option/2018-09-07/ATGE.json',
 'option/2018-09-07/ATVI.json',
 'option/2018-09-07/AVX.json',
 'option/2018-09-07/AYX.json',
 'option/2018-09-07/BBY.json',
 'option/2018-09-07/BC.json',
 'option/2018-09-07/BKNG.json',
 'option/2018-09-07/BL.json',
 'option/2018-09-07/BMY.json',
 'option/2018-09-07/BOX.json',
 'option/2018-09-07/BRKR.json',
 'option/2018-09-07/BRKS.json',
 'option/2018-09-07/CA.json',
 'option/2018-09-07/CAKE.json',
 'option/2018-09-07/CBM.json',
 'option/2018-09-07/CCMP.json',
 'option/2018-09-07/CDNS.

In [184]:
quote_frame = pd.DataFrame()
storeDate = datetime.strptime(dates[0], '%Y-%m-%d').date()
for q in result:
    q_clean = clean_up_fmt(json_normalize(q))
    q_clean['storeDate'] = storeDate
    quote_frame = quote_frame.append(q_clean, sort=True)

In [186]:
%time quotes = flatten_quotes([dates[0], '2018-09-18'])

Loading S3 file: 2018-09-07.json
Loading S3 file: 2018-09-18.json
CPU times: user 8.66 s, sys: 54.7 ms, total: 8.71 s
Wall time: 9.4 s


In [205]:
print(quotes.storeDate.unique().shape, quotes.symbol.unique().shape)

(1,) (334,)


In [189]:
path = get_path('option', dates[0])
predict_dates, path

(['2018-09-19'], 'option/2018-09-07/')

In [None]:
%time options = flatten_options(predict_dates)

In [145]:
%time store_file(options, consol_options, fname.format(predict_dates[0]), True)

Saved option/consolidated/2018-09-19.csv


In [198]:
dates = read_dates()

In [34]:
# consolidates all options in files by date
for d in dates:
    %time quotes = flatten_quotes([d])
    %time store_file(quotes, consol_quotes, fname.format(d), True)

CPU times: user 5.58 s, sys: 80 ms, total: 5.66 s
Wall time: 6.7 s
Saved quote/csv/2018-09-07.json
CPU times: user 24.8 ms, sys: 4.51 ms, total: 29.3 ms
Wall time: 643 ms
CPU times: user 4.97 s, sys: 32.1 ms, total: 5.01 s
Wall time: 5.3 s
Saved quote/csv/2018-09-10.json
CPU times: user 21.9 ms, sys: 1.12 ms, total: 23.1 ms
Wall time: 311 ms
CPU times: user 6.09 s, sys: 43.3 ms, total: 6.14 s
Wall time: 6.42 s
Saved quote/csv/2018-09-11.json
CPU times: user 52.3 ms, sys: 3.9 ms, total: 56.2 ms
Wall time: 999 ms


KeyboardInterrupt: 

Saved quote/csv/2018-09-12.json
CPU times: user 36 ms, sys: 1.94 ms, total: 38 ms
Wall time: 352 ms
CPU times: user 7.04 s, sys: 81.7 ms, total: 7.12 s
Wall time: 8.26 s
Saved quote/csv/2018-09-13.json
CPU times: user 38.6 ms, sys: 5.36 ms, total: 44 ms
Wall time: 879 ms
CPU times: user 6.61 s, sys: 58 ms, total: 6.67 s
Wall time: 8.09 s
Saved quote/csv/2018-09-14.json
CPU times: user 38.5 ms, sys: 3.02 ms, total: 41.5 ms
Wall time: 2.32 s
CPU times: user 5.56 s, sys: 71.9 ms, total: 5.63 s
Wall time: 6.49 s
Saved quote/csv/2018-09-17.json
CPU times: user 48.1 ms, sys: 5.38 ms, total: 53.5 ms
Wall time: 2.47 s
CPU times: user 4.9 s, sys: 48.7 ms, total: 4.95 s
Wall time: 6.12 s
Saved quote/csv/2018-09-18.json
CPU times: user 25.9 ms, sys: 1.81 ms, total: 27.7 ms
Wall time: 615 ms
CPU times: user 7.72 s, sys: 62.7 ms, total: 7.78 s
Wall time: 8.89 s
Saved quote/csv/2018-09-19.json
CPU times: user 51.8 ms, sys: 4.37 ms, total: 56.1 ms
Wall time: 4.12 s
CPU times: user 9.05 s, sys: 89 ms,

In [153]:
# consolidates all options in files by date
for d in dates:
    time% options = flatten_options([d])
    time% store_file(options, consol_options, fname.format(d), True)

Loading S3 file: option/2018-09-07/A.json
Loading S3 file: option/2018-09-07/AAPL.json
Loading S3 file: option/2018-09-07/ABMD.json
Loading S3 file: option/2018-09-07/ADBE.json
Loading S3 file: option/2018-09-07/ADP.json
Loading S3 file: option/2018-09-07/AEIS.json
Loading S3 file: option/2018-09-07/AEO.json
Loading S3 file: option/2018-09-07/AKAM.json
Loading S3 file: option/2018-09-07/ALGN.json
Loading S3 file: option/2018-09-07/ALRM.json
Loading S3 file: option/2018-09-07/AMAT.json
Loading S3 file: option/2018-09-07/ANET.json
Loading S3 file: option/2018-09-07/ANSS.json
Loading S3 file: option/2018-09-07/ANTM.json
Loading S3 file: option/2018-09-07/APPF.json
Loading S3 file: option/2018-09-07/ATGE.json
Loading S3 file: option/2018-09-07/ATVI.json
Loading S3 file: option/2018-09-07/AVX.json
Loading S3 file: option/2018-09-07/AYX.json
Loading S3 file: option/2018-09-07/BBY.json
Loading S3 file: option/2018-09-07/BC.json
Loading S3 file: option/2018-09-07/BKNG.json
Loading S3 file: opt

Loading S3 file: option/2018-09-07/VRTX.json
Loading S3 file: option/2018-09-07/VSH.json
Loading S3 file: option/2018-09-07/WAT.json
Loading S3 file: option/2018-09-07/WCG.json
Loading S3 file: option/2018-09-07/WDAY.json
Loading S3 file: option/2018-09-07/WST.json
Loading S3 file: option/2018-09-07/WWE.json
Loading S3 file: option/2018-09-07/XLNX.json
Loading S3 file: option/2018-09-07/YELP.json
Loading S3 file: option/2018-09-07/Z.json
Loading S3 file: option/2018-09-07/ZEN.json
Loading S3 file: option/2018-09-07/ZG.json
Loading S3 file: option/2018-09-07/ZNGA.json
Saved option/consolidated/2018-09-07.csv
Loading S3 file: option/2018-09-10/A.json
Loading S3 file: option/2018-09-10/AAPL.json
Loading S3 file: option/2018-09-10/ABMD.json
Loading S3 file: option/2018-09-10/ADBE.json
Loading S3 file: option/2018-09-10/ADP.json
Loading S3 file: option/2018-09-10/AEIS.json
Loading S3 file: option/2018-09-10/AEO.json
Loading S3 file: option/2018-09-10/AKAM.json
Loading S3 file: option/2018-0

Loading S3 file: option/2018-09-10/TTWO.json
Loading S3 file: option/2018-09-10/TWTR.json
Loading S3 file: option/2018-09-10/TXN.json
Loading S3 file: option/2018-09-10/TXRH.json
Loading S3 file: option/2018-09-10/TYL.json
Loading S3 file: option/2018-09-10/UBNT.json
Loading S3 file: option/2018-09-10/ULTA.json
Loading S3 file: option/2018-09-10/ULTI.json
Loading S3 file: option/2018-09-10/URBN.json
Loading S3 file: option/2018-09-10/UTHR.json
Loading S3 file: option/2018-09-10/VAC.json
Loading S3 file: option/2018-09-10/VAR.json
Loading S3 file: option/2018-09-10/VC.json
Loading S3 file: option/2018-09-10/VEEV.json
Loading S3 file: option/2018-09-10/VRTX.json
Loading S3 file: option/2018-09-10/VSH.json
Loading S3 file: option/2018-09-10/WAT.json
Loading S3 file: option/2018-09-10/WCG.json
Loading S3 file: option/2018-09-10/WDAY.json
Loading S3 file: option/2018-09-10/WST.json
Loading S3 file: option/2018-09-10/WWE.json
Loading S3 file: option/2018-09-10/XLNX.json
Loading S3 file: opti

Loading S3 file: option/2018-09-11/SMTC.json
Loading S3 file: option/2018-09-11/SNPS.json
Loading S3 file: option/2018-09-11/SPLK.json
Loading S3 file: option/2018-09-11/SQ.json
Loading S3 file: option/2018-09-11/STMP.json
Loading S3 file: option/2018-09-11/SUPN.json
Loading S3 file: option/2018-09-11/SWKS.json
Loading S3 file: option/2018-09-11/TDC.json
Loading S3 file: option/2018-09-11/TER.json
Loading S3 file: option/2018-09-11/THO.json
Loading S3 file: option/2018-09-11/TIF.json
Loading S3 file: option/2018-09-11/TJX.json
Loading S3 file: option/2018-09-11/TRIP.json
Loading S3 file: option/2018-09-11/TTD.json
Loading S3 file: option/2018-09-11/TTWO.json
Loading S3 file: option/2018-09-11/TWTR.json
Loading S3 file: option/2018-09-11/TXN.json
Loading S3 file: option/2018-09-11/TXRH.json
Loading S3 file: option/2018-09-11/TYL.json
Loading S3 file: option/2018-09-11/UBNT.json
Loading S3 file: option/2018-09-11/ULTA.json
Loading S3 file: option/2018-09-11/ULTI.json
Loading S3 file: opt

Loading S3 file: option/2018-09-12/PSTG.json
Loading S3 file: option/2018-09-12/QCOM.json
Loading S3 file: option/2018-09-12/QLYS.json
Loading S3 file: option/2018-09-12/REGN.json
Loading S3 file: option/2018-09-12/RGEN.json
Loading S3 file: option/2018-09-12/RHT.json
Loading S3 file: option/2018-09-12/RL.json
Loading S3 file: option/2018-09-12/RNG.json
Loading S3 file: option/2018-09-12/ROG.json
Loading S3 file: option/2018-09-12/ROST.json
Loading S3 file: option/2018-09-12/SHAK.json
Loading S3 file: option/2018-09-12/SHOO.json
Loading S3 file: option/2018-09-12/SKX.json
Loading S3 file: option/2018-09-12/SLAB.json
Loading S3 file: option/2018-09-12/SMTC.json
Loading S3 file: option/2018-09-12/SNPS.json
Loading S3 file: option/2018-09-12/SPLK.json
Loading S3 file: option/2018-09-12/SQ.json
Loading S3 file: option/2018-09-12/STMP.json
Loading S3 file: option/2018-09-12/SUPN.json
Loading S3 file: option/2018-09-12/SWKS.json
Loading S3 file: option/2018-09-12/TDC.json
Loading S3 file: op

Loading S3 file: option/2018-09-13/NWS.json
Loading S3 file: option/2018-09-13/NWSA.json
Loading S3 file: option/2018-09-13/OLED.json
Loading S3 file: option/2018-09-13/OLLI.json
Loading S3 file: option/2018-09-13/ORCL.json
Loading S3 file: option/2018-09-13/PANW.json
Loading S3 file: option/2018-09-13/PAYC.json
Loading S3 file: option/2018-09-13/PAYX.json
Loading S3 file: option/2018-09-13/PCTY.json
Loading S3 file: option/2018-09-13/PEGA.json
Loading S3 file: option/2018-09-13/PLCE.json
Loading S3 file: option/2018-09-13/PLT.json
Loading S3 file: option/2018-09-13/POWI.json
Loading S3 file: option/2018-09-13/PSTG.json
Loading S3 file: option/2018-09-13/QCOM.json
Loading S3 file: option/2018-09-13/QLYS.json
Loading S3 file: option/2018-09-13/REGN.json
Loading S3 file: option/2018-09-13/RGEN.json
Loading S3 file: option/2018-09-13/RHT.json
Loading S3 file: option/2018-09-13/RL.json
Loading S3 file: option/2018-09-13/RNG.json
Loading S3 file: option/2018-09-13/ROG.json
Loading S3 file: 

Loading S3 file: option/2018-09-14/MRVL.json
Loading S3 file: option/2018-09-14/MSFT.json
Loading S3 file: option/2018-09-14/MSG.json
Loading S3 file: option/2018-09-14/MU.json
Loading S3 file: option/2018-09-14/MXIM.json
Loading S3 file: option/2018-09-14/MYGN.json
Loading S3 file: option/2018-09-14/NATI.json
Loading S3 file: option/2018-09-14/NEOG.json
Loading S3 file: option/2018-09-14/NEWR.json
Loading S3 file: option/2018-09-14/NKE.json
Loading S3 file: option/2018-09-14/NOW.json
Loading S3 file: option/2018-09-14/NTAP.json
Loading S3 file: option/2018-09-14/NTGR.json
Loading S3 file: option/2018-09-14/NVDA.json
Loading S3 file: option/2018-09-14/NWS.json
Loading S3 file: option/2018-09-14/NWSA.json
Loading S3 file: option/2018-09-14/OLED.json
Loading S3 file: option/2018-09-14/OLLI.json
Loading S3 file: option/2018-09-14/ORCL.json
Loading S3 file: option/2018-09-14/PANW.json
Loading S3 file: option/2018-09-14/PAYC.json
Loading S3 file: option/2018-09-14/PAYX.json
Loading S3 file:

Loading S3 file: option/2018-09-17/LGND.json
Loading S3 file: option/2018-09-17/LOGM.json
Loading S3 file: option/2018-09-17/LOPE.json
Loading S3 file: option/2018-09-17/LRCX.json
Loading S3 file: option/2018-09-17/LULU.json
Loading S3 file: option/2018-09-17/MA.json
Loading S3 file: option/2018-09-17/MANH.json
Loading S3 file: option/2018-09-17/MASI.json
Loading S3 file: option/2018-09-17/MDSO.json
Loading S3 file: option/2018-09-17/MIME.json
Loading S3 file: option/2018-09-17/MKSI.json
Loading S3 file: option/2018-09-17/MOH.json
Loading S3 file: option/2018-09-17/MPWR.json
Loading S3 file: option/2018-09-17/MRVL.json
Loading S3 file: option/2018-09-17/MSFT.json
Loading S3 file: option/2018-09-17/MSG.json
Loading S3 file: option/2018-09-17/MU.json
Loading S3 file: option/2018-09-17/MXIM.json
Loading S3 file: option/2018-09-17/MYGN.json
Loading S3 file: option/2018-09-17/NATI.json
Loading S3 file: option/2018-09-17/NEOG.json
Loading S3 file: option/2018-09-17/NEWR.json
Loading S3 file:

Loading S3 file: option/2018-09-18/HRB.json
Loading S3 file: option/2018-09-18/HUBS.json
Loading S3 file: option/2018-09-18/HUM.json
Loading S3 file: option/2018-09-18/ICUI.json
Loading S3 file: option/2018-09-18/IDCC.json
Loading S3 file: option/2018-09-18/ILMN.json
Loading S3 file: option/2018-09-18/INTU.json
Loading S3 file: option/2018-09-18/IONS.json
Loading S3 file: option/2018-09-18/IPGP.json
Loading S3 file: option/2018-09-18/IRBT.json
Loading S3 file: option/2018-09-18/ISRG.json
Loading S3 file: option/2018-09-18/JNPR.json
Loading S3 file: option/2018-09-18/KLAC.json
Loading S3 file: option/2018-09-18/LGND.json
Loading S3 file: option/2018-09-18/LOGM.json
Loading S3 file: option/2018-09-18/LOPE.json
Loading S3 file: option/2018-09-18/LRCX.json
Loading S3 file: option/2018-09-18/LULU.json
Loading S3 file: option/2018-09-18/MA.json
Loading S3 file: option/2018-09-18/MANH.json
Loading S3 file: option/2018-09-18/MASI.json
Loading S3 file: option/2018-09-18/MDSO.json
Loading S3 fil

Loading S3 file: option/2018-09-19/CSU.json
Loading S3 file: option/2018-09-19/CTSH.json
Loading S3 file: option/2018-09-19/CVCO.json
Loading S3 file: option/2018-09-19/CVLT.json
Loading S3 file: option/2018-09-19/CYBR.json
Loading S3 file: option/2018-09-19/DANE.json
Loading S3 file: option/2018-09-19/DATA.json
Loading S3 file: option/2018-09-19/DBX.json
Loading S3 file: option/2018-09-19/DDD.json
Loading S3 file: option/2018-09-19/DE.json
Loading S3 file: option/2018-09-19/DECK.json
Loading S3 file: option/2018-09-19/DLB.json
Loading S3 file: option/2018-09-19/DMLRY.json
Loading S3 file: option/2018-09-19/DORM.json
Loading S3 file: option/2018-09-19/DOX.json
Loading S3 file: option/2018-09-19/DSW.json
Loading S3 file: option/2018-09-19/DWDP.json
Loading S3 file: option/2018-09-19/DXCM.json
Loading S3 file: option/2018-09-19/EA.json
Loading S3 file: option/2018-09-19/EBS.json
Loading S3 file: option/2018-09-19/EDIT.json
Loading S3 file: option/2018-09-19/EEFT.json
Loading S3 file: opt

Loading S3 file: option/2018-09-19/SLAB.json
Loading S3 file: option/2018-09-19/SMTC.json
Loading S3 file: option/2018-09-19/SNPS.json
Loading S3 file: option/2018-09-19/SPLK.json
Loading S3 file: option/2018-09-19/SPNS.json
Loading S3 file: option/2018-09-19/SPOT.json
Loading S3 file: option/2018-09-19/SQ.json
Loading S3 file: option/2018-09-19/SSYS.json
Loading S3 file: option/2018-09-19/STMP.json
Loading S3 file: option/2018-09-19/SUPN.json
Loading S3 file: option/2018-09-19/SWKS.json
Loading S3 file: option/2018-09-19/SYK.json
Loading S3 file: option/2018-09-19/SYRS.json
Loading S3 file: option/2018-09-19/TARO.json
Loading S3 file: option/2018-09-19/TCEHY.json
Loading S3 file: option/2018-09-19/TDC.json
Loading S3 file: option/2018-09-19/TDOC.json
Loading S3 file: option/2018-09-19/TER.json
Loading S3 file: option/2018-09-19/TEVA.json
Loading S3 file: option/2018-09-19/THO.json
Loading S3 file: option/2018-09-19/TIF.json
Loading S3 file: option/2018-09-19/TJX.json
Loading S3 file: 

In [259]:
# load flatten quotes
def load_consol_quotes(dates):
    quote_frame = pd.DataFrame()
    for d in dates:
        path = get_path('quote_consol')
        result = csv_load_s3(path + d + '.csv')
        quotes = pd.read_csv(result)
        quote_frame = quote_frame.append(quotes, sort=False)
    return quote_frame

%time quote_frame = load_consol_quotes(dates[:])
quote_frame.storeDate.unique(), quote_frame.symbol.unique().shape

CPU times: user 253 ms, sys: 30.3 ms, total: 283 ms
Wall time: 4.48 s


(array(['2018-09-07', '2018-09-10', '2018-09-11', '2018-09-12',
        '2018-09-13', '2018-09-14', '2018-09-17', '2018-09-18',
        '2018-09-19'], dtype=object), (334,))

In [263]:
# load flatten quotes
def load_consol_options(dates):
    option_frame = pd.DataFrame()
    for d in dates:
        path = get_path('option_consol')
        result = csv_load_s3(path + d + '.csv')
        options = pd.read_csv(result)
        option_frame = option_frame.append(options, sort=False)
    return option_frame

%time option_frame = load_consol_options(dates[:])

CPU times: user 3.98 s, sys: 1.67 s, total: 5.65 s
Wall time: 22 s


In [265]:
option_frame.shape, \
option_frame.lastTradeDate.unique().shape, \
option_frame.contractSymbol.unique().shape

(581078, 18)