In [1]:
import pandas as pd
import re
from typing import Union

# read data
df: pd.DataFrame = pd.read_csv("../data/processed/wnba_2025_cleaned.csv")

# add NAME_KEY
def normalize_name(name: str) -> str:
    """
    Convert a player name to uppercase and remove non-letter characters
    for consistent matching.
    """
    name = str(name).upper()
    name = re.sub(r"[^A-Z]", "", name)
    return name


def calc_score(
    PTS: Union[int, float],
    STL: Union[int, float],
    AST: Union[int, float],
    TOV: Union[int, float]
) -> float:
    """
    Compute a weighted player performance score using points, steals,
    assists, and turnovers.
    """
    return PTS * 0.61 + STL * 0.11 + AST * 0.07 - TOV * 0.21


# make sure stats columns are numeric
for col in ["PTS", "STL", "AST", "TOV"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

# add a SCORE column
df["SCORE"] = df.apply(
    lambda row: calc_score(
        row["PTS"], row["STL"], row["AST"], row["TOV"]
    ),
    axis=1
)

# add a NAME_KEY column
df["NAME_KEY"] = df["PLAYER"].apply(normalize_name)

# output csv file
output_path: str = "../data/processed/wnba_2025_player_scores.csv"
df.to_csv(output_path, index=False)

print(df.head())


             PLAYER   PTS  STL  AST  TOV   SCORE         NAME_KEY
0       A'ja Wilson  23.4  1.6  3.1  2.2  14.205        AJAWILSON
1  Napheesa Collier  22.9  1.6  3.2  2.1  13.928  NAPHEESACOLLIER
2   Kelsey Mitchell  20.2  0.9  3.4  1.8  12.281   KELSEYMITCHELL
3       Kelsey Plum  19.5  1.2  5.7  3.0  11.796       KELSEYPLUM
4    Paige Bueckers  19.2  1.6  5.4  2.0  11.846    PAIGEBUECKERS
