In [1]:
import os
import pandas as pd


In [4]:
def extract_info_from_filename(file_path):
    file_name = os.path.basename(file_path).split('.')[0]  # Get the file name without the extension
    parts = file_name.split('-')
    
    # Extracting the ticker
    ticker = parts[0]
    
    # Extracting the expiration date
    start_idx_exp = file_name.find("exp-") + 4  # +4 to move past "exp-"
    expiration_date = file_name[start_idx_exp:start_idx_exp + 10]
    expiration_date = expiration_date.replace("-", "")  # Removing '-'
    
    # Extracting the updated date
    start_idx_date = file_name.rfind("-") - 5  # +1 to move past the last "-"
    updated_date_raw = file_name[start_idx_date:start_idx_date + 10]
    updated_date_parts = updated_date_raw.split("-")
    updated_date = updated_date_parts[2] + updated_date_parts[0] + updated_date_parts[1] # Reformatting to yyyymmdd
    
    return ticker, expiration_date, updated_date

def process_file(file_path):
    data = pd.read_csv(os.getcwd()+ "/csv/" + file_path)
    
    ticker, expiration_date, updated_date = extract_info_from_filename(file_path)
    
    # Check if 'Symbol' column exists before filtering
    if 'Symbol' in data.columns:
        data = data[~data['Symbol'].str.contains("Downloaded from Barchart", na=False)]
    
    # Check if 'Strike' column exists before filtering
    if 'Strike' in data.columns:
        data = data[~data['Strike'].str.contains("Downloaded from Barchart", na=True)]
    
    # Prepending the columns
    data.insert(0, 'Updated Date', updated_date)
    data.insert(1, 'Expiration Date', expiration_date)
    data.insert(2, 'Ticker', ticker)
    
    return data

In [9]:
# Assuming process_file is defined somewhere above

# Get a list of all files in the current working directory
all_files = os.listdir(os.getcwd() + "/csv")

# Filter out only the CSV files
csv_files = [file for file in all_files if file.endswith('.csv')]

# List to collect DataFrames from each processed file
dfs = []

# Process each file and append DataFrame to dfs list
for csv_file in csv_files:
    temp_df = process_file(csv_file)
    dfs.append(temp_df)

# Concatenate all DataFrames in the list into a master DataFrame
master_df = pd.concat(dfs, ignore_index=True)

# print head of master df 
print(master_df.head())

# save master df to csv
master_df.to_csv("options_chain_data_combined.csv", index=False)

  Updated Date Expiration Date Ticker  Strike   Last  Theor.      IV    Delta  \
0     20240207        20240216    spy  484.00  10.47   11.15  12.39%  0.86748   
1     20240207        20240216    spy  485.00  10.35   10.28  12.26%  0.84599   
2     20240207        20240216    spy  486.00   9.80    9.42  12.12%  0.82189   
3     20240207        20240216    spy  487.00   8.95    8.59  11.97%  0.79526   
4     20240207        20240216    spy  488.00   8.05    7.79  11.83%  0.76536   

     Gamma    Theta     Vega      Rho Volume Open Int  Vol/OI  Type  \
0  0.02228 -0.16507  0.16609  0.10291  1,545   16,034    0.10  Call   
1  0.02493 -0.17478  0.18388  0.10051    308   27,631    0.01  Call   
2  0.02771 -0.18426  0.20202  0.09779  1,210    6,613    0.18  Call   
3  0.03057 -0.19305  0.22007  0.09475     87    7,571    0.01  Call   
4  0.03344 -0.20152  0.23802  0.09131    172   10,538    0.02  Call   

  Last Trade  Avg IV  
0   02/06/24  11.55%  
1   02/06/24  11.55%  
2   02/06/24  11.