In [None]:
accounts_list = [
    "NETS012_OLD",
    "NECS008OP_OLD",
    "NEL004_OLD",
    "NEO006MS_OLD",
    "NEO004OP_OLD",
]

accounts_id_map = {
    "NETS012_OLD": 4766,
    "NECS008OP_OLD": 6973,
    "NEL004_OLD": 4003,
    "NEO006MS_OLD": 5190,
    "NEO004OP_OLD": 3976,
}

In [None]:
account = "NETS012_OLD"
account_id = accounts_id_map.get(account)
assert account_id is not None, f"Account ID for {account} not found."
print(f"Account ID for {account} is {account_id}.")

In [None]:
ACCOUNT_ID = "6973"
ACCOUNT_NAME = "NECS008OP_OLD"
PATH_DIR = f"../data/raw/fills/{ACCOUNT_ID}/"

In [None]:
import glob
import os
import re
from io import StringIO

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


# read all csv files in the directory and combine them into a single dataframe
def read_csv_files_in_directory(directory):
    path_pattern = os.path.join(directory, "*.csv")

    # Get a list of all matching files
    csv_files = glob.glob(path_pattern)
    print(f"Found {len(csv_files)} files.")
    df_list = []

    for file in csv_files:
        df = pd.read_csv(file)
        df_list.append(df)

    combined_df = pd.concat(df_list, ignore_index=True)
    return combined_df


fills = read_csv_files_in_directory(PATH_DIR)


fills.sample(20)

In [None]:
fills.drop(
    columns=[
        "Route",
        "Liq",
        "Fill Id",
        "Currency",
        "ISIN",
        "CUSIP",
        "Status",
        "PropReports Id",
    ],
    inplace=True,
)

In [None]:
fills["Date/Time"] = pd.to_datetime(fills["Date/Time"])
fills.sort_values(by="Date/Time")
fee_columns = [
    "Comm",
    "Ecn Fee",
    "SEC",
    "ORF",
    "CAT",
    "TAF",
    "FTT",
    "NSCC",
    "Acc",
    "Clr",
    "Misc",
]
fills["fee"] = fills[fee_columns].sum(axis=1)
fills.drop(fee_columns, axis=1, inplace=True)
display(fills.head())

In [None]:
fills = fills.rename(
    mapper={
        "Date/Time": "date_time",
        "Account": "account",
        "B/S": "trade_side",
        "Qty": "quantity",
        "Symbol": "symbol",
        "Price": "price",
        "Order Id": "order_id",
    },
    axis=1,
)
fills.head()

In [None]:
fills["date"] = fills["date_time"].dt.date


agg_map_day = {
    "quantity": "sum",
    # price → average_price
    "price": lambda x: np.average(x, weights=fills.loc[x.index, "quantity"]),
    "fee": "sum",
}

df_daily = (
    fills.groupby(["date", "symbol", "trade_side"])
    .agg(agg_map_day)
    .rename(
        columns={"price": "average_price"}
    )  # rename the “price” column to average_price
    .reset_index()
)

In [None]:
fills["value"] = np.where(
    fills["trade_side"] == "B",
    -fills["quantity"] * fills["price"],
    fills["quantity"] * fills["price"],
)
group = (
    fills.groupby(["date", "symbol"])
    .agg(
        fills=("quantity", "count"),  # how many fills
        qty=("quantity", "sum"),  # total quantity traded
        gross=("value", "sum"),  # P/L before fees
        comm=("fee", "sum"),  # total commission/fees
    )
    .reset_index()
)
group["net"] = group["gross"] - group["comm"]

display(group.tail(15))