In [2]:
import pandas as pd
import re

df = pd.read_csv("../data/processed/wnba_2025_cleaned.csv")

# add NAME_KEY
def normalize_name(name):
    name = str(name).upper()
    name = re.sub(r"[^A-Z]", "", name)
    return name

# define a function to calculate player score
def calc_score(pts, stl, ast, tov):
    return pts * 0.55 + stl * 0.22 + ast * 0.05 - tov * 0.18

# make sure stats columns are numeric
for col in ["PTS", "STL", "AST", "TOV"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

# add a SCORE column
df["SCORE"] = df.apply(
    lambda row: calc_score(row["PTS"], row["STL"], row["AST"], row["TOV"]),
    axis=1
)

# add a NAME_KEY column 
df["NAME_KEY"] = df["PLAYER"].apply(normalize_name)

# output csv file
output_path = "../data/processed/wnba_2025_player_scores.csv"
df.to_csv(output_path, index=False)

print(df.head())


             PLAYER   PTS  STL  AST  TOV   SCORE         NAME_KEY
0       A'ja Wilson  23.4  1.6  3.1  2.2  12.981        AJAWILSON
1  Napheesa Collier  22.9  1.6  3.2  2.1  12.729  NAPHEESACOLLIER
2   Kelsey Mitchell  20.2  0.9  3.4  1.8  11.154   KELSEYMITCHELL
3       Kelsey Plum  19.5  1.2  5.7  3.0  10.734       KELSEYPLUM
4    Paige Bueckers  19.2  1.6  5.4  2.0  10.822    PAIGEBUECKERS
