<h2>Initialise files</h2>

In [None]:
import pandas as pd
import numpy as np
import math
import os
from functools import reduce
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

Declare variable constants

In [None]:
FOLER_PATH = 'UOA_raw'
OUTPUT_FILE_PATH = 'uoa_gen'
TODAY = str(dt.date.today())

Parse, combine and output data in all UOA files

In [None]:
def parse_data_df_to_excel(df): 
    df['Exp Date'] = pd.to_datetime(df['Exp Date']).dt.date
    df['Trade date'] = pd.to_datetime(df['Trade date']).dt.date
    data_df = df.loc[:].dropna()
    data_df = data_df.astype({'DTE': int})
    return data_df.values.tolist()

In [None]:
def parse_data_headers(df): 
    df.rename(columns={'Time': 'Trade date'}, inplace=True)
    return df.columns.values.tolist()

In [None]:
import csv

columns_headers = [] 
data = []

for entry in os.listdir(FOLER_PATH):
    file_path = os.path.join(FOLER_PATH, entry)
    print(file_path)
    if os.path.isfile(file_path):
        df = pd.read_csv(file_path)
        column_headers = parse_data_headers(df)
        data.extend(parse_data_df_to_excel(df))

# flatten as single level array 
all_data = []
all_data.extend([column_headers])
all_data.extend(data)

print(all_data[0])
print(all_data[1])

output_file = open(f'{OUTPUT_FILE_PATH}.csv', 'w',  newline='')
writer =  csv.writer(output_file)
writer.writerows(all_data)
output_file.close()

<h2>ETL data</h2>

Display some data for visual check 

In [None]:
df = pd.read_csv(f'{OUTPUT_FILE_PATH}.csv')
display(df.head())
display(df.info())

<h3>Sort and add useful columns</h3>

"""
add vol * mid 
arrange according to ticker / DTE / type
calculate total call + put 
"""

In [None]:
df = df.sort_values(['Symbol', 'DTE', 'Type'], ascending=True)
df['Amount'] = df['Volume'] * df['Midpoint']
display(df)

Replace file

In [None]:
df.to_csv(f'{OUTPUT_FILE_PATH}.csv')

<h3>Daily trade put-call charts</h3>

In [None]:
daily_group = df.groupby(["Trade date", "Type"]).agg({'Volume': 'sum', 'Amount': 'sum'})
daily_group = daily_group.reset_index()
daily_group

In [None]:
def line_plot(data): 
    fig = plt.figure(figsize=(20, 6))
    sns.lineplot(data=data, x="Trade date", y="Volume", hue="Type")
    plt.title('Volume')
    plt.show()
    
line_plot(daily_group)

In [None]:
def line_plot(data): 
    fig = plt.figure(figsize=(20, 6))
    sns.lineplot(data=data, x="Trade date", y="Amount", hue="Type")
    plt.title('Amount')
    plt.show()
    
line_plot(daily_group)

<h3>Most active symbol</h3> 

In [None]:
def group_and_sum(_df): 
    return _df.groupby(['Symbol', 'Exp Date', 'Type', 'Trade date']).agg({'Amount': 'sum'})

def sort_and_display(_df): 
    return _df.reset_index().sort_values(by=['Amount'], ascending=False)

def sort_vol_oi_ratio(_df): 
    return _df.sort_values(by=['Vol/OI'], ascending=False)

In [None]:
df_symbols = group_and_sum(df)
df_symbols

View after today only 

In [None]:
df = df[(df['Exp Date']  > TODAY)].reset_index(drop=True)
df_calls = df[(df['Type'] == 'Call')]
df_puts = df[(df['Type'] == 'Put')]
df_calls

In [None]:
sort_and_display(df_symbols).head(20)

<h3>Most active calls</h3>

In [None]:
df_call_symbols = group_and_sum(df_calls)
df_call_symbols

In [None]:
sorted_calls = sort_and_display(df_call_symbols)
sorted_calls.head(10)

In [None]:
top_tickers = sorted_calls.drop_duplicates(subset=['Symbol'])
top_call_ticker_list = top_tickers.head(30)['Symbol'].tolist()
print(top_call_ticker_list)

In [None]:
top_tickers.head(30)

<h4>Top Vol/OI</h4>

In [None]:
top_call_vol_oi = sort_vol_oi_ratio(df_calls)
top_call_vol_oi.head(10)

<h3>Most active puts</h3>

In [None]:
df_puts_symbols = group_and_sum(df_puts)
df_puts_symbols

In [None]:
sorted_puts = sort_and_display(df_puts_symbols)
sorted_puts.head(10)

In [None]:
top_tickers = sorted_puts.drop_duplicates(subset=['Symbol'])
top_call_ticker_list = top_tickers.head(30)['Symbol'].tolist()
print(top_call_ticker_list)

In [None]:
top_tickers.head(30)

<h4>Top Vol/OI</h4>

In [None]:
top_call_vol_oi = sort_vol_oi_ratio(df_puts)
top_call_vol_oi.head(10)