In [1]:
import pandas as pd
import numpy as np
import sys, os

import seaborn as sns
import matplotlib.pyplot as plt

### Raw Options 

In [2]:
raw = pd.read_csv("data/tsla_options.csv")

### Filter by Zero Bid/Ask

In [3]:
raw_zba = raw[raw.bid_price + raw.bid_price != 0]
raw_zba.option_id.nunique()

4114

### Filter by regular expirations

In [4]:
start = "2020-07-13"
end = f"{int(start[:4]) + 4}-{start[4:]}"
fridays = pd.date_range(start, end, freq="WOM-3FRI").astype(str)
thursdays = pd.date_range(start, end, freq="WOM-3THU").astype(str)
regulars = list(fridays) + list(thursdays)
raw_zbar = raw_zba[raw_zba.expiration_date.isin(regulars)]
raw_zbar.option_id.nunique()

2553

### Filter by OptionID persistence

In [5]:
num_days = raw_zbar.date_current.nunique()
oids = raw_zbar.option_id.value_counts()
oids = oids[oids == num_days]
raw_zbarp = raw_zbar[raw_zbar.option_id.isin(oids.index)]
raw_zbarp.option_id.nunique()

1261

### Filter by volume cutoff

In [6]:
before = raw_zbarp.option_id.nunique()

In [7]:
cutoff = raw_zbarp.volume.quantile(0.30)
def byid(oid):
    med = oid.volume.median()
    if med >= cutoff:
        return oid
raw_zbarpv = raw_zbarp.groupby('option_id').apply(byid).dropna()
before, raw_zbarpv.option_id.nunique()

(1261, 934)

### Filter by regular expirations

In [8]:
start = "2020-07-13"
end = f"{int(start[:4]) + 4}-{start[4:]}"
fridays = pd.date_range(start, end, freq="WOM-3FRI").astype(str)
thursdays = pd.date_range(start, end, freq="WOM-3THU").astype(str)
regulars = list(fridays) + list(thursdays)

In [9]:
raw_zbarpvr = raw_zbarpv[raw_zbarpv.expiration_date.isin(regulars)]
raw_zbarpv.option_id.nunique(), raw_zbarpvr.option_id.nunique()

(934, 934)

### Save

In [10]:
raw_zbarpvr.to_csv("data/filtered_tsla_options.csv", index=False)