In [None]:
import os
import re


def parse_option_filenames(output_folder):
    option_data = {}

    # Get all CSV filenames in the output folder
    filenames = [f for f in os.listdir(output_folder) if f.endswith(".csv")]

    for filename in filenames:
        # Extract the option symbol from the filename
        option_symbol = os.path.splitext(filename)[0]

        # Parse the option symbol
        match = re.match(r"^([A-Z]+)(\d{6})([CP])(\d{8})$", option_symbol)

        if match:
            underlying_stock = match.group(1)
            expiry_date = match.group(2)
            option_type = "Call" if match.group(3) == "C" else "Put"
            strike_price = int(match.group(4)) / 1000

            # Format the expiry date
            year = "20" + expiry_date[:2]
            month = expiry_date[2:4]
            day = expiry_date[4:]
            formatted_expiry_date = f"{year}-{month}-{day}"

            # Check if the underlying stock exists in the option_data dictionary
            if underlying_stock not in option_data:
                option_data[underlying_stock] = {
                    "Call": {"strikes": [], "expiry_dates": []},
                    "Put": {"strikes": [], "expiry_dates": []},
                }

            # Append the strike price and expiry date to the respective lists
            option_data[underlying_stock][option_type]["strikes"].append(strike_price)
            option_data[underlying_stock][option_type]["expiry_dates"].append(
                formatted_expiry_date
            )

    return option_data


output_path = "generated/XYZ"
option_data = parse_option_filenames(output_path)
option_data

In [69]:
from typing import List, Dict, Tuple
from dataclasses import dataclass
from datetime import datetime
import pandas as pd




def prepare_market_data(
    options: List[Option],
    stock_prices: pd.DataFrame,
    r: float,
    sigma: float,
) -> Dict[str, pd.DataFrame]:
    """
    Prepare market data for options pricing.

    Parameters:
    - options: List of Option objects representing options
    - stock_prices: DataFrame with columns ['date', 'price'] representing underlying stock prices
    - r: Risk-free interest rate (float)
    - sigma: Volatility of the underlying asset (float)

    Returns:
    - Dictionary where the key is the asset id and the value is a DataFrame of the asset's prices
    """
    market_data = {}

    for option in options:
        option_prices = calculate_option_prices(
            stock_prices,
            option.expiry_date.strftime("%Y-%m-%d"),
            option.strike,
            option.option_type,
            r,
            sigma,
        )
        # option_prices["asset_id"] = option.id
        market_data[option.id] = option_prices

    return market_data


# Sample options data
options = [
    Option("OPTION1", 100.0, datetime(2023, 3, 1), "call"),
    Option("OPTION2", 110.0, datetime(2023, 4, 30), "put"),
    Option("OPTION3", 95.0, datetime(2023, 5, 31), "call"),
]
stock_prices = pd.DataFrame(
    {
        "date": ["2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01"],
        "price": [100, 105, 110, 95],
    }
)
# Risk-free rate and volatility
r = 0.05
sigma = 0.2

market_data = prepare_market_data(options, stock_prices, r, sigma)
for asset_id, asset_data in market_data.items():
    print(f"Asset ID: {asset_id}")
    print(asset_data)
    print()

Asset ID: OPTION1
        date  option_price
0 2023-01-01      3.612713
1 2023-02-01      5.901369
2 2023-03-01     10.000000
3 2023-04-01           NaN

Asset ID: OPTION2
        date  option_price
0 2023-01-01      9.940723
1 2023-02-01      6.279861
2 2023-03-01      3.110896
3 2023-04-01     14.574375

Asset ID: OPTION3
        date  option_price
0 2023-01-01      9.153847
1 2023-02-01     12.443792
2 2023-03-01     16.416457
3 2023-04-01      3.464305



  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d2 = d1 - sigma * np.sqrt(T)


In [83]:
from collections import OrderedDict
import numpy as np
import pandas as pd


def rollovers_to_signals(rollovers, start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date, freq="D")
    option_ids = list(OrderedDict.fromkeys(rollover.id for rollover in rollovers))
    signals_df = pd.DataFrame(0, index=date_range, columns=option_ids)

    for option_id, entry_date, exit_date in rollovers:

        signals_df.loc[entry_date, option_id] = 1  # Buy signal
        signals_df.loc[exit_date, option_id] = -1  # Sell signal

    return signals_df


def generate_positions(signals_df):
    """
    Generate positions DataFrame from signals DataFrame using cumulative sum.
    Positions take effect one day after the signal, considering the cumulative effect.
    """
    # Use cumulative sum to accumulate signals
    positions_df = signals_df.cumsum()

    # Shift the positions to reflect that they take effect one day after the signal
    positions_df = positions_df.shift(1).fillna(0).astype(int)

    return positions_df


def calculate_returns(positions_df, market_data):
    """
    Calculate returns DataFrame from positions DataFrame and market data.
    """
    returns_df = pd.DataFrame(index=positions_df.index, columns=positions_df.columns)

    for asset_id in positions_df.columns:
        asset_data = market_data[market_data["asset_id"] == asset_id].copy()
        asset_data["date"] = pd.to_datetime(asset_data["date"])
        asset_data = asset_data.set_index("date")

        asset_positions = positions_df[asset_id]
        asset_returns = asset_positions * asset_data["price"].pct_change()
        returns_df[asset_id] = asset_returns

    return returns_df


def backtest(market_data, rollovers, start_date, end_date):
    """
    Perform backtest using market data and rollovers.
    """

    signals_df = rollovers_to_signals(rollovers, start_date, end_date)
    positions_df = generate_positions(signals_df)
    returns_df = calculate_returns(positions_df, market_data)

    return signals_df, positions_df, returns_df


# Prepare market data and rollovers
stock_prices_df = pd.read_csv(f"{output_path}/XYZ_stock.csv", parse_dates=["date"])
# stock_prices_df.info()
rollovers = [
    Option(
        id="XYZ230131C00100000",
        strike=100.0,
        expiry_date=datetime(2023, 1, 31, 0, 0),
        option_type="call",
    ),
    Option(
        id="XYZ230302C00105000",
        strike=105.0,
        expiry_date=datetime(2023, 3, 2, 0, 0),
        option_type="call",
    ),
    Option(
        id="XYZ230401C00105000",
        strike=105.0,
        expiry_date=datetime(2023, 4, 1, 0, 0),
        option_type="call",
    ),
]

# Risk-free rate and volatility
r = 0.05
sigma = 0.2

market_data = prepare_market_data(
    rollovers,
    stock_prices_df,
    r,
    sigma,
)
market_data["XYZ"] = stock_prices_df
market_data["XYZ230131C00100000"]


start_date = market_data["XYZ"]["date"].min()
end_date = market_data["XYZ"]["date"].max()

rollovers_to_signals(rollovers, start_date, end_date)

# Perform backtest
# start_date = market_data["XYZ"]["date"].min()
# end_date = market_data["XYZ"]["date"].max()
# signals_df, positions_df, returns_df = backtest(
#     stock_prices_df, rollovers, start_date, end_date
# )

# Analyze results
# print("Signals:")
# print(signals_df)
# print("\nPositions:")
# print(positions_df)
# print("\nReturns:")
# print(returns_df)

  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d2 = d1 - sigma * np.sqrt(T)
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d2 = d1 - sigma * np.sqrt(T)
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
  d2 = d1 - sigma * np.sqrt(T)


TypeError: 'Option' object is not subscriptable