In [None]:
import pandas as pd
import numpy as np

from dotenv import load_dotenv

load_dotenv(
    dotenv_path="/Users/tomwattley/App/racing-api-project/racing-api-project/libraries/api-helpers/src/api_helpers/.env"
)

In [None]:
from trader.fetch_requests import *
from trader.prepare_requests import *
from api_helpers.clients import get_betfair_client, get_s3_client
from api_helpers.helpers.data_utils import print_dataframe_for_testing
from api_helpers.helpers.file_utils import S3FilePaths
from api_helpers.helpers.data_utils import deduplicate_dataframe, combine_dataframes

paths = S3FilePaths()

betfair_client = get_betfair_client()
s3_client = get_s3_client()
betting_data = fetch_betting_data(s3_client, betfair_client)
requests_data = prepare_request_data(betting_data)

In [None]:
betting_data.betting_data.selections

In [None]:
requests_data

In [None]:
p = betfair_client.get_past_orders_by_market_id(["1.244208713", "1.244208667"])
p

In [None]:
q = betfair_client.get_past_orders_by_date_range("2023-05-20", "2025-05-28")

In [None]:
pd.merge(p, requests_data, how="left", on=["selection_id", "market_id"])

In [None]:
p[
    [
        "bet_outcome",
        "customer_strategy_ref",
        "market_id",
        "order_type",
        "placed_date",
        "price_matched",
        "selection_id",
        "settled_date",
        "side",
        "size_settled",
    ]
].head(2)

In [None]:
print_dataframe_for_testing(requests_data)

In [None]:
tf = pd.DataFrame(
    {
        "id": [
            "a420210f-f017-4e5a-a7d6-25efe4c2ce18",
            "a420210f-f017-4e5a-a7d6-25efe4c2ce18",
        ],
        "timestamp": [
            pd.Timestamp("2025-05-27 12:19:01.350000+00:00"),
            pd.Timestamp("2025-05-27 12:19:01.350000+00:00"),
        ],
        "race_id": [893932, 893932],
        "horse_id": [169155, 169155],
        "horse_name": ["Devasboy", "Devasboy"],
        "selection_type": ["BACK", "LAY"],
        "market_type": ["WIN", "WIN"],
        "market_id": ["1.244208713", "1.244208713"],
        "selection_id": [40379042, 40379042],
        "requested_odds": [5.2, 5.2],
        "race_time": [
            pd.Timestamp("2025-05-27 17:10:00+01:00"),
            pd.Timestamp("2025-05-27 17:10:00+01:00"),
        ],
        "minutes_to_race": [53, 53],
        "back_price_1": [5.4, 5.4],
        "back_price_1_depth": [23.0, 23.0],
        "back_price_2": [5.3, 5.3],
        "back_price_2_depth": [46.0, 46.0],
        "lay_price_1": [5.5, 5.5],
        "lay_price_1_depth": [43.0, 43.0],
        "lay_price_2": [5.6, 5.6],
        "lay_price_2_depth": [66.0, 66.0],
        "eight_to_seven_runners": [False, False],
        "short_price_removed_runners": [False, False],
        "average_price_matched": [5.2, 5.4],
        "size_matched": [5.0, 4.81],
        "customer_strategy_ref": ["mvp", "cash_out"],
    }
)

In [None]:
def identify_cashed_out_bets(df: pd.DataFrame) -> pd.DataFrame:
    group_cols = ["race_id", "horse_id", "market_id", "selection_id"]

    grouped = (
        df.groupby(group_cols)
        .agg(
            {
                "selection_type": lambda x: set(x),
                "customer_strategy_ref": lambda x: set(x),
                "id": "first",
                "horse_name": "first",
                "market_type": "first",
            }
        )
        .reset_index()
    )

    cashed_out_mask = (
        # Has both BACK and LAY
        grouped["selection_type"].apply(lambda x: {"BACK", "LAY"}.issubset(x))
        &
        # Has 'cash_out' strategy
        grouped["customer_strategy_ref"].apply(lambda x: "cash_out" in x)
    )

    cashed_out_groups = grouped[cashed_out_mask][group_cols]

    df_with_cashout = (
        df.merge(
            cashed_out_groups.assign(is_cashed_out=True), on=group_cols, how="left"
        )
        .assign(is_cashed_out=lambda x: x["is_cashed_out"].astype("boolean"))
        .fillna({"is_cashed_out": False})
    )

    return df_with_cashout


def split_cashed_out_bets(df: pd.DataFrame) -> pd.DataFrame:
    """Extract only the bets that have been cashed out"""
    df_with_cashout = identify_cashed_out_bets(df)
    return (
        df_with_cashout[df_with_cashout["is_cashed_out"] == True].drop(
            columns=["is_cashed_out"]
        ),
        df_with_cashout[df_with_cashout["is_cashed_out"] == False].drop(
            columns=["is_cashed_out"]
        ),
    )


cf, nf = split_cashed_out_bets(tf)
cf

In [None]:
nf

In [None]:
cashout_back = cf[cf["selection_type"] == "BACK"]
cashout_lay = cf[cf["selection_type"] == "LAY"]
merged_cashout = pd.merge(
    cashout_back,
    cashout_lay,
    on=["race_id", "horse_id", "market_id", "selection_id"],
    suffixes=("_back", "_lay"),
)

In [None]:
merged_cashout[
    [
        "race_id",
        "horse_id",
        "horse_name_back",
        "market_id",
        "selection_id",
        "average_price_matched_back",
        "size_matched_back",
        "average_price_matched_lay",
        "size_matched_lay",
    ]
].rename(
    columns={
        "horse_name_back": "horse_name",
    }
).assign(
    is_cashed_out=True,
    cash_out_liability=abs(
        merged_cashout["size_matched_lay"] - merged_cashout["size_matched_back"]
    ),
).filter(
    items=[
        "race_id",
        "horse_id",
        "horse_name",
        "market_id",
        "selection_id",
        "is_cashed_out",
        "cash_out_liability",
    ]
).drop_duplicates()

In [None]:
p = betfair_client.get_current_orders()
p["customer_strategy_ref"]

In [None]:
5.4 * 4.81

In [None]:
betting_data.cashed_out_bets

In [None]:
updated_requests_data = deduplicate_dataframe(
    combine_dataframes(
        requests_data,
        betting_data.current_requests_data,
    ),
    betting_data.current_requests_data,
    unique_columns=["id", "race_id", "horse_id", "selection_type", "market_id"],
    timestamp_column="timestamp",
)

updated_requests_data

In [None]:
print_dataframe_for_testing(updated_requests_data)

In [None]:
rd = pd.DataFrame(
    {
        "id": ["a420210f-f017-4e5a-a7d6-25efe4c2ce18"],
        "timestamp": [
            pd.Timestamp("2025-05-27 12:19:01.350000+00:00"),
        ],
        "race_id": [893932],
        "horse_id": [169155],
        "horse_name": ["Devasboy"],
        "selection_type": ["BACK"],
        "market_type": ["WIN"],
        "market_id": ["1.244208713"],
        "selection_id": [40379042],
        "requested_odds": [5.2],
        "race_time": [
            pd.Timestamp("2025-05-27 17:10:00+01:00"),
        ],
        "minutes_to_race": [122],
        "back_price_1": [5.3],
        "back_price_1_depth": [64.0],
        "back_price_2": [5.2],
        "back_price_2_depth": [46.0],
        "lay_price_1": [5.4],
        "lay_price_1_depth": [22.0],
        "lay_price_2": [5.5],
        "lay_price_2_depth": [43.0],
        "eight_to_seven_runners": [False],
        "short_price_removed_runners": [False],
        "average_price_matched": [5.2],
        "size_matched": [5.0],
    }
)

In [None]:
updated_requests_data = deduplicate_dataframe(
    combine_dataframes(
        requests_data,
        betting_data.current_requests_data,
    ),
    betting_data.current_requests_data,
    unique_columns=["id", "race_id", "horse_id", "selection_type", "market_id"],
    timestamp_column="timestamp",
)

In [None]:
updated_requests_data

In [None]:
s3_client.store_data(
    requests_data, "today/2025_05_27/trader_data/current_requests_data.parquet"
)

In [None]:
betting_data.betfair_market_data["status_win"] = np.where(
    betting_data.betfair_market_data["horse_name"] == "Dashing Donkey",
    "REMOVED",
    "ACTIVE",
)

In [None]:
betting_data.betfair_market_data

In [None]:
s3_client.fetch_data("today/2025_05_27/trader_data/selections.parquet")

In [None]:
pd.DataFrame(
    {
        "bet_id": ["389581590544", "389581654562", "389582010825", "389582024559"],
        "market_id": ["1.243895856", "1.243895856", "1.243895856", "1.243895856"],
        "selection_id": [71662002, 71662002, 71662002, 71662002],
        "selection_type": ["BACK", "BACK", "BACK", "BACK"],
        "execution_status": [
            "EXECUTION_COMPLETE",
            "EXECUTABLE",
            "EXECUTION_COMPLETE",
            "EXECUTABLE",
        ],
        "placed_date": [
            pd.Timestamp("2025-05-19T19:09:42.000Z"),
            pd.Timestamp("2025-05-19T19:10:18.000Z"),
            pd.Timestamp("2025-05-19T19:13:32.000Z"),
            pd.Timestamp("2025-05-19T19:13:40.000Z"),
        ],
        "matched_date": [
            pd.Timestamp("2025-05-19T19:09:42.000Z"),
            pd.Timestamp("nan"),
            pd.Timestamp("2025-05-19T19:13:32.000Z"),
            pd.Timestamp("nan"),
        ],
        "average_price_matched": [4.2, 0.0, 4.2, 0.0],
        "customer_strategy_ref": ["trader", "UI", "UI", "UI"],
        "size_matched": [5.0, 0.0, 5.0, 0.0],
        "size_remaining": [0.0, 5.0, 0.0, 6.0],
        "size_lapsed": [0.0, 0.0, 0.0, 0.0],
        "size_cancelled": [0.0, 0.0, 0.0, 0.0],
        "size_voided": [0.0, 0.0, 0.0, 0.0],
        "price": [4.1, 5.0, 4.2, 5.0],
        "size": [5.0, 5.0, 5.0, 6.0],
    }
)

In [None]:
sf = pd.DataFrame(
    {
        "race_id": [1, 2],
        "race_time": [
            pd.Timestamp("2020-01-01 12:00:00"),
            pd.Timestamp("2020-01-01 13:00:00"),
        ],
        "race_date": [
            pd.Timestamp("2020-01-01"),
            pd.Timestamp("2020-01-01"),
        ],
        "horse_id": [1, 2],
        "horse_name": ["Horse A", "Horse B"],
        "selection_type": ["BACK", "LAY"],
        "market_type": [
            "WIN",
            "WIN",
        ],
        "market_id": ["1", "2"],
        "selection_id": [1, 2],
        "requested_odds": [3, 4],
    }
)

In [None]:
co = pd.DataFrame(
    {
        "bet_id": [1, 2, 3, 4],
        "market_id": ["1", "1", "2", "1"],
        "selection_id": [1, 1, 2, 1],
        "selection_type": ["BACK", "BACK", "LAY", "BACK"],
        "execution_status": [
            "EXECUTION_COMPLETE",
            "EXECUTABLE",
            "EXECUTABLE",
            "EXECUTION_COMPLETE",
        ],
        "placed_date": [pd.Timestamp("2020-01-01")] * 4,
        "matched_date": [pd.Timestamp("2020-01-01")] * 4,
        "average_price_matched": [3, np.nan, 2, 3.5],
        "size_matched": [5, 0, 2, 10],
        "size_remaining": [0, 5, 3, 0],
        "customer_strategy_ref": ["mvp"] * 4,
    }
)

In [None]:
df = pd.merge(
    sf,
    co,
    on=["market_id", "selection_id", "selection_type"],
    how="left",
)

In [None]:
df

In [None]:
matched = df[df["execution_status"] == "EXECUTION_COMPLETE"].assign(
    matched_status="matched"
)
unmatched = df[df["execution_status"] != "EXECUTION_COMPLETE"].assign(
    matched_status="unmatched"
)

In [None]:
group_cols = ["race_id", "market_id", "selection_type", "selection_id"]
matched["payoff"] = matched["size_matched"] * matched["average_price_matched"]
matched["total_stake"] = matched.groupby(group_cols)["size_matched"].transform("sum")
matched["total_odds"] = matched.groupby(group_cols)["payoff"].transform("sum")
matched["ave_odds"] = (matched["total_odds"] / matched["total_stake"]).round(2)
matched = matched.drop_duplicates(subset=group_cols)
matched

In [None]:
matched = matched[
    [
        "race_id",
        "race_time",
        "race_date",
        "horse_id",
        "horse_name",
        "selection_type",
        "market_type",
        "market_id",
        "selection_id",
        "requested_odds",
        "placed_date",
        "matched_date",
        "matched_status",
        "size_remaining",
        "total_stake",
        "ave_odds",
    ]
].rename(columns={"total_stake": "size_matched", "ave_odds": "average_price_matched"})

In [None]:
unmatched = unmatched[matched.columns]

In [None]:
unmatched

In [None]:
data = pd.concat([matched, unmatched])
data

In [None]:
now_timestamp = get_uk_time_now()

folder = f"today/{now_timestamp.strftime('%Y_%m_%d')}"

selections_file_path = f"{folder}/selections.parquet"
fully_matched_bets_file_path = f"{folder}/fully_matched_bets.parquet"
cashed_out_bets_file_path = f"{folder}/cashed_out_bets_bets.parquet"
invalidated_bets_file_path = f"{folder}/invalidated_bets.parquet"
market_state_file_path = f"{folder}/market_state.parquet"

In [None]:
fully_matched_bets = s3_client.fetch_data(fully_matched_bets_file_path)

In [None]:
fully_matched_bets

In [None]:
selections = s3_client.fetch_data(selections_file_path)
selections

In [None]:
from pathlib import PosixPath

In [None]:
a = PosixPath(
    "/Users/tomwattley/Code/python/racing-api-project/racing-api-project/libraries/api-helpers/src/api_helpers/helpers"
)

In [None]:
path = a.parent.parent.parent.parent.parent / "apps" / "racing-etl" / ".env"

In [None]:
path

In [None]:
"/Users/tomwattley/Code/python/racing-api-project/racing-api-project/apps/racing-etl/.env"

"/Users/tomwattley/Code/python/racing-api-project/racing-api-project/apps/racing_etl/.env"