In [None]:
# File for data cleaning
import pandas as pd

In [None]:
full_df = pd.read_csv("Wimbledon_featured_matches.csv")

In [None]:
def get_row_momentum(row):
    # p1_momentum = 0
    # p2_momentum = 0
    momentum_dict = {"1": 0, "2": 0}
    score_point_diff_p1 = row["p1_points_won"] - row["p2_points_won"]
    score_point_diff_p2 = -score_point_diff_p1
    p1_break_serve = (
        1 if (row["server"] == 2).all() and (row[f"p2_ace"] != 1).all() else 0
    )
    p2_break_serve = (
        1 if (row["server"] == 1).all() and (row[f"p1_ace"] != 1).all() else 0
    )
    row["serve_player1_broken"] = p1_break_serve
    row["serve_player2_broken"] = p2_break_serve
    row["p1_point_diff"] = score_point_diff_p1
    row["p2_point_diff"] = score_point_diff_p2

    for i in ["1", "2"]:
        not_i = "1" if i == "2" else "2"
        if row[f"p{i}_ace"].any():
            momentum_dict[i] += 1
            momentum_dict[not_i] -= 1
        if row[f"p{i}_winner"].any():
            momentum_dict[i] += 1
            momentum_dict[not_i] -= 1
        if row[f"p{i}_double_fault"].any():
            momentum_dict[i] -= 1
            momentum_dict[not_i] += 1
        if row[f"p{i}_break_pt_won"].any():
            momentum_dict[i] += 1
            momentum_dict[not_i] -= 1
        if row[f"p{i}_break_pt_missed"].any():
            momentum_dict[i] -= 1
            momentum_dict[not_i] += 1
        if row[f"p{i}_unf_err"].any():
            momentum_dict[i] -= 1
            momentum_dict[not_i] += 1
        if (row[f"p{i}_point_diff"] > 2).any() and (row[f"p{i}_winner"]).any():
            momentum_dict[i] -= 1
            momentum_dict[not_i] += 1
        if (row[f"serve_player{i}_broken"]).any():
            momentum_dict[i] -= 1
            momentum_dict[not_i] += 1

    row["p1_momentum"] = momentum_dict["1"]
    row["p2_momentum"] = momentum_dict["2"]
    return row

In [None]:
# For NOW ONLY GAME
columns = [
    "match_id",
    "player1",
    "player2",
    "elapsed_time",
    "set_no",
    "game_no",
    "point_no",
    "p1_sets",
    "p2_sets",
    "p1_games",
    "p2_games",
    "p1_points_won",
    "p2_points_won",
    "server",
    "speed_mph",
    "serve_width",
    "serve_depth",
    "return_depth",
    "serve_player1_broken",
    "serve_player2_broken",
    "p1_point_diff",
    "p2_point_diff",
    "p1_momentum",
    "p2_momentum",
]


def get_game_momentum(df):
    # df_set = df.loc[::, df["set"] == set_num]
    new_df = pd.DataFrame(columns=columns)
    p1_game_momentum = 0
    p2_game_momentum = 0
    for index, row in df.iterrows():
        row = pd.DataFrame(row).transpose()
        new_row = get_row_momentum(row).copy()
        p1_game_momentum += new_row["p1_momentum"].item()
        p2_game_momentum += new_row["p2_momentum"].item()
        a = new_row["p1_momentum"]
        # print(f"p1 gMom: {p1_game_momentum}|p1 rowMOM : {a}")
        new_row["p1_cumulative_momentum"] = p1_game_momentum
        new_row["p2_cumulative_momentum"] = p2_game_momentum
        new_df = pd.concat([new_df, new_row])
    return new_df

In [None]:
test_df = full_df.loc[full_df["match_id"] == "2023-wimbledon-1301"]
game_df = get_game_momentum(test_df)
game_df

In [None]:
columns = [
    "match_id",
    "player1",
    "player2",
    "elapsed_time",
    "set_no",
    "game_no",
    "point_no",
    "p1_sets",
    "p2_sets",
    "p1_games",
    "p2_games",
    "p1_points_won",
    "p2_points_won",
    "server",
    "speed_mph",
    "serve_width",
    "serve_depth",
    "return_depth",
    "serve_player1_broken",
    "serve_player2_broken",
    "p1_point_diff",
    "p2_point_diff",
    "p1_momentum",
    "p2_momentum",
    "p1_cumulative_momentum",
    "p2_cumulative_momentum",
]


def create_df(df):
    new_df = pd.DataFrame(columns=columns)
    for id in df["match_id"].unique():
        # print(id)
        tmp = df.loc[df["match_id"] == id].copy()
        # print(tmp)
        game_df = get_game_momentum(tmp)
        new_df = pd.concat([new_df, game_df])
    return new_df

In [None]:
# new_full_df = create_df(full_df) # DO NOT RUN

In [None]:
# new_full_df.to_excel("output.xlsx", index=False)  # DO NOT RUN

In [None]:
import matplotlib.pyplot as plt

In [None]:
x = game_df["elapsed_time"]
y1 = game_df["p1_cumulative_momentum"]
y2 = game_df["p2_cumulative_momentum"]
y3 = game_df["p1_games"]
y4 = game_df["p2_games"]
y5 = game_df["p1_sets"]
y6 = game_df["p2_sets"]

In [None]:
plt.plot(x, y1)
plt.plot(x, y2)


plt.plot(x, y3 * 5, linestyle="--")


plt.plot(x, y4 * 5, linestyle="-.")


# plt.plot(x, y5 * 10, linestyle=":")

# plt.plot(x, y6 * 10, linestyle=":")

plt.xlabel("Time Elapsed")
plt.ylabel("Momentum")
plt.title("Test Game momentum")
# , "P1- set", "P2 -set"])
plt.legend(["P1", "P2", "P1-game-points", "P2-game-points"])

plt.show()