In [3]:
import pandas as pd

players = pd.read_csv("nba_advanced_stats_2001_2025_players.csv")
averages = pd.read_csv("nba_advanced_stats_2001_2025_league_averages.csv")

cols_to_normalize = ["TS%", "3PAr", "FTr", "ORB%", "DRB%", "TRB%",
                     "AST%", "STL%", "BLK%", "TOV%", "USG%"]

players = players.loc[:, ~players.columns.str.contains("^Unnamed|^Rk$")]
averages = averages.loc[:, ~averages.columns.str.contains("^Unnamed|^Rk$")]

players["Season"] = players["Season"].astype(str)
averages["Season"] = averages["Season"].astype(str)

merged = players.merge(averages[["Season"] + cols_to_normalize],
                       on="Season", how="left", suffixes=("", "_LEAGUE"))

for col in cols_to_normalize:
    league_col = f"{col}_LEAGUE"
    merged[col] = merged[col] / merged[league_col]

merged.drop(columns=[f"{col}_LEAGUE" for col in cols_to_normalize], inplace=True)

merged.to_csv("nba_advanced_stats_normalized_2001_2025.csv", index=False)

print("Normalized columns saved to nba_advanced_stats_normalized_2001_2025.csv")

✅ Normalized columns saved to nba_advanced_stats_normalized_2001_2025.csv


In [4]:
df = pd.read_csv("nba_advanced_stats_normalized_2001_2025.csv")

combined_rows = df[df["Team"].str.contains("TM", na=False)].copy()

combined_keys = set(zip(combined_rows["Player"], combined_rows["Season"]))

df_filtered = df[
    df.apply(lambda row: (row["Player"], row["Season"]) not in combined_keys or "TM" in str(row["Team"]), axis=1)
].copy()

df_filtered.to_csv("nba_advanced_stats_normalized_cleaned_multiteam.csv", index=False)

print("Saved file with only combined multi-team rows as nba_advanced_stats_normalized_cleaned_multiteam.csv")


✅ Saved file with only combined multi-team rows as nba_advanced_stats_normalized_cleaned_multiteam.csv


In [1]:
advanced = pd.read_csv("nba_advanced_stats_normalized_cleaned_multiteam.csv")
rookies = pd.read_csv("nba_rookie_names_and_seasons.csv")

def clean_player_name(name):
    return name.strip().replace("*", "")

rookies["Player"] = rookies["Player"].astype(str).apply(clean_player_name)
advanced["Player"] = advanced["Player"].astype(str).str.strip()

rookies["Season"] = rookies["Season"].astype(str).str.strip()
advanced["Season"] = advanced["Season"].astype(str).str.strip()

merged = pd.merge(rookies, advanced, on=["Player", "Season"], how="inner")

rookie_keys = set(zip(rookies["Player"], rookies["Season"]))
matched_keys = set(zip(merged["Player"], merged["Season"]))
unmatched_keys = rookie_keys - matched_keys

print("\n Rookies not found in advanced stats:")
for player, season in sorted(unmatched_keys):
    print(f"- {player} ({season})")

merged.to_csv("rookie_advanced_stats_normalized.csv", index=False)
print("\n Saved merged rookie stats to rookie_advanced_stats_normalized.csv")



 Rookies not found in advanced stats:

 Saved merged rookie stats to rookie_advanced_stats_normalized.csv


In [4]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import re

rookie_stats = pd.read_csv("rookie_advanced_stats_normalized.csv")
combine = pd.read_csv("draftcombine_cleaned.csv")

def normalize_name(name):
    name = name.lower().strip()
    name = re.sub(r'\b(jr|sr|iii|ii|iv)\b\.?', '', name)
    name = re.sub(r'[^\w\s]', '', name) 
    name = re.sub(r'\s+', ' ', name) 
    return name.strip()

rookie_stats["Player_norm"] = rookie_stats["Player"].apply(normalize_name)
combine["PLAYER_norm"] = combine["PLAYER"].apply(normalize_name)

threshold = 75

match_dict = {}
closest_match_dict = {}

for player, norm_name in zip(combine["PLAYER"], combine["PLAYER_norm"]):
    result = process.extractOne(norm_name, rookie_stats["Player_norm"].tolist(), scorer=fuzz.token_sort_ratio)
    
    if result:
        match_norm, score = result
        closest_player = rookie_stats.loc[rookie_stats["Player_norm"] == match_norm, "Player"].values[0]
        closest_match_dict[player] = (closest_player, score)
        
        if score >= threshold:
            match_dict[player] = closest_player
            print(f"✅ Match: {player} → {closest_player} (score: {score})")
        else:
            match_dict[player] = None
            print(f"❌ No strong match: {player} → Closest: {closest_player} (score: {score})")
    else:
        match_dict[player] = None
        closest_match_dict[player] = ("No match found", 0)
        print(f"❌ No match found: {player}")

combine["Player"] = combine["PLAYER"].map(match_dict)

matched_combine = combine[combine["Player"].notna()].copy()

merged = pd.merge(rookie_stats, matched_combine, on="Player", how="left", suffixes=("", "_combine"))

merged.to_csv("rookie_stats_with_combine_fuzzy.csv", index=False)
print("Saved final merged file to rookie_stats_with_combine_fuzzy.csv")

unmatched = combine[combine["Player"].isna()]
if not unmatched.empty:
    print("\n Combine players that didn't match any rookie stats:")
    for name in unmatched["PLAYER"]:
        closest_name, closest_score = closest_match_dict.get(name, ("No match found", 0))
        print(f"- {name} → Closest: {closest_name} (score: {closest_score})")

✅ Match: Michael Ajayi → Ajay Mitchell (score: 85)
❌ No strong match: Melvin Ajinca → Closest: Melvin Frazier (score: 67)
✅ Match: Trey Alexander → Trey Alexander (score: 100)
✅ Match: Reece Beekman → Reece Beekman (score: 100)
✅ Match: Adem Bona → Adem Bona (score: 100)
❌ No strong match: Trevon Brazile → Closest: Tony Bradley (score: 69)
✅ Match: Jalen Bridges → Jalen Bridges (score: 100)
✅ Match: Matas Buzelis → Matas Buzelis (score: 100)
❌ No strong match: Carlton Carrington → Closest: Aaron Harrison (score: 69)
✅ Match: Devin Carter → Devin Carter (score: 100)
✅ Match: Stephon Castle → Stephon Castle (score: 100)
✅ Match: Ulrich Chomche → Ulrich Chomche (score: 100)
✅ Match: Cam Christie → Cam Christie (score: 100)
✅ Match: Donovan Clingan → Donovan Clingan (score: 100)
✅ Match: Isaiah Collier → Isaiah Collier (score: 100)
✅ Match: Tristan Da Silva → Tristan Da Silva (score: 100)
✅ Match: Pacome Dadiet → Pacome Dadiet (score: 100)
✅ Match: N'Faly Dante → N'Faly Dante (score: 100)


✅ Match: JD Davison → JD Davison (score: 100)
✅ Match: Moussa Diabate → Moussa DiabatÃ© (score: 96)
✅ Match: Tari Eason → Tari Eason (score: 100)
✅ Match: Keon Ellis → Keon Ellis (score: 100)
✅ Match: Michael Foster Jr. → Michael Foster Jr. (score: 100)
✅ Match: Collin Gillespie → Collin Gillespie (score: 100)
✅ Match: Ron Harper Jr. → Ron Harper Jr. (score: 100)
✅ Match: Nikola Jovic → Nikola JoviÄ‡ (score: 96)
✅ Match: Johnny Juzang → Johnny Juzang (score: 100)
✅ Match: Trevor Keels → Trevor Keels (score: 100)
✅ Match: Walker Kessler → Walker Kessler (score: 100)
✅ Match: Christian Koloko → Christian Koloko (score: 100)
✅ Match: Jake LaRavia → Jake LaRavia (score: 100)
❌ No strong match: Justin Lewis → Closest: Justin Williams (score: 74)
✅ Match: E.J. Liddell → E.J. Liddell (score: 100)
✅ Match: Bennedict Mathurin → Bennedict Mathurin (score: 100)
❌ No strong match: Matthew Mayer → Closest: Wesley Matthews (score: 64)
✅ Match: Bryce McGowens → Bryce McGowens (score: 100)
❌ No strong

✅ Match: Ignas Brazdeikis → Ignas Brazdeikis (score: 100)
✅ Match: O'Shae Brissett → Oshae Brissett (score: 100)
✅ Match: Moses Brown → Moses Brown (score: 100)
✅ Match: Brandon Clarke → Brandon Clarke (score: 100)
✅ Match: Nicolas Claxton → Nic Claxton (score: 85)
✅ Match: Tyler Cook → Tyler Cook (score: 100)
✅ Match: Jarrett Culver → Jarrett Culver (score: 100)
✅ Match: Terence Davis → Terence Davis (score: 100)
✅ Match: Luguentz Dort → Luguentz Dort (score: 100)
✅ Match: Carsen Edwards → Carsen Edwards (score: 100)
✅ Match: Tacko Fall → Tacko Fall (score: 100)
✅ Match: Bruno Fernando → Bruno Fernando (score: 100)
✅ Match: Daniel Gafford → Daniel Gafford (score: 100)
✅ Match: Kyle Guy → Kyle Guy (score: 100)
✅ Match: Jaylen Hands → Jaylen Adams (score: 83)
✅ Match: Jared Harper → Jared Harper (score: 100)
✅ Match: Jaxson Hayes → Jaxson Hayes (score: 100)
✅ Match: Dewan Hernandez → Dewan Hernandez (score: 100)
✅ Match: Tyler Herro → Tyler Herro (score: 100)
✅ Match: Jalen Hoard → Jayl

✅ Match: Frank Mason III → Frank Mason III (score: 100)
❌ No strong match: Kennedy Meeks → Closest: D.J. Kennedy (score: 61)
✅ Match: Donovan Mitchell → Donovan Mitchell (score: 100)
✅ Match: Monte Morris → Monte Morris (score: 100)
✅ Match: Johnathan Motley → Johnathan Motley (score: 100)
✅ Match: Svi Mykhailiuk → Svi Mykhailiuk (score: 100)
✅ Match: Semi Ojeleye → Semi Ojeleye (score: 100)
✅ Match: Cameron Oliver → Cameron Oliver (score: 100)
✅ Match: Justin Patton → Justin Patton (score: 100)
✅ Match: Alec Peters → Alec Peters (score: 100)
✅ Match: Ivan Rabb → Ivan Rabb (score: 100)
✅ Match: Davon Reed → Davon Reed (score: 100)
✅ Match: Devin Robinson → Devin Robinson (score: 100)
✅ Match: Kobi Simmons → Kobi Simmons (score: 100)
✅ Match: Edmond Sumner → Edmond Sumner (score: 100)
✅ Match: Caleb Swanigan → Caleb Swanigan (score: 100)
✅ Match: Sindarius Thornwell → Sindarius Thornwell (score: 100)
❌ No strong match: Melo Trimble → Closest: Allonzo Trier (score: 56)
✅ Match: Derrick W

✅ Match: Jerami Grant → Jerami Grant (score: 100)
✅ Match: P.J. Hairston → P.J. Hairston (score: 100)
✅ Match: Gary Harris → Gary Harris (score: 100)
✅ Match: Joe Harris → Joe Harris (score: 100)
✅ Match: Rodney Hood → Rodney Hood (score: 100)
✅ Match: Cory Jefferson → Cory Jefferson (score: 100)
✅ Match: Nick Johnson → Nick Johnson (score: 100)
✅ Match: Sean Kilpatrick → Sean Kilpatrick (score: 100)
✅ Match: Alex Kirk → Alex Kirk (score: 100)
✅ Match: Zach LaVine → Zach LaVine (score: 100)
✅ Match: Devyn Marble → Devyn Marble (score: 100)
✅ Match: James McAdoo → James Michael McAdoo (score: 75)
✅ Match: K.J. McDaniels → K.J. McDaniels (score: 100)
✅ Match: Doug McDermott → Doug McDermott (score: 100)
✅ Match: Jordan McRae → Jordan McRae (score: 100)
✅ Match: Shabazz Napier → Shabazz Napier (score: 100)
✅ Match: Johnny O?Bryant III → Johnny O'Bryant (score: 100)
✅ Match: Lamar Patterson → Lamar Patterson (score: 100)
✅ Match: Adreian Payne → Adreian Payne (score: 100)
✅ Match: Elfrid P

✅ Match: Tyler Zeller → Tyler Zeller (score: 100)
✅ Match: Keith Benson → Keith Benson (score: 100)
✅ Match: Marshon Brooks → MarShon Brooks (score: 100)
✅ Match: Alec Burks → Alec Burks (score: 100)
✅ Match: Jimmy Butler → Jimmy Butler (score: 100)
✅ Match: Norris Cole → Norris Cole (score: 100)
❌ No strong match: Jon Diebler → Closest: John Butler (score: 73)
❌ No strong match: Michael Dunigan → Closest: Donovan Mitchell (score: 71)
❌ No strong match: LaceDarius Dunn → Closest: Kris Dunn (score: 67)
✅ Match: Kenneth Faried → Kenneth Faried (score: 100)
✅ Match: James Fredette → Jimmer Fredette (score: 83)
✅ Match: Andrew Goudelock → Andrew Goudelock (score: 100)
✅ Match: Jordan Hamilton → Jordan Hamilton (score: 100)
✅ Match: Justin Harper → Justin Harper (score: 100)
✅ Match: Tobias Harris → Tobias Harris (score: 100)
✅ Match: Tyler Honeycutt → Tyler Honeycutt (score: 100)
✅ Match: Scotty Hopson → Scotty Hopson (score: 100)
✅ Match: Kyrie Irving → Kyrie Irving (score: 100)
✅ Match: 

✅ Match: Jerryd Bayless → Jerryd Bayless (score: 100)
✅ Match: Michael Beasley → Michael Beasley (score: 100)
✅ Match: Ramel Bradley → Michael Bradley (score: 79)
❌ No strong match: Tyrone Brazelton → Closest: Jaylon Tyson (score: 64)
✅ Match: Takais Brown → Jabari Brown (score: 75)
❌ No strong match: Keith Brumbaugh → Closest: Keith Bogans (score: 59)
❌ No strong match: Stanley Burrell → Closest: Cassius Stanley (score: 60)
❌ No strong match: Brian Butch → Closest: Brian Cook (score: 67)
✅ Match: Jamar Butler → Jared Butler (score: 83)
❌ No strong match: Pat Calathes → Closest: Nick Calathes (score: 72)
✅ Match: Joe Crawford → Joe Crawford (score: 100)
❌ No strong match: Chris Daniels → Closest: Chris Duarte (score: 72)
✅ Match: Joey Dorsey → Joey Dorsey (score: 100)
❌ No strong match: Marcus Dove → Closest: Marcus Fizer (score: 70)
❌ No strong match: Josh Duncan → Closest: DeQuan Jones (score: 70)
❌ No strong match: Frank Elegar → Closest: Ian Clark (score: 57)
✅ Match: Patrick Ewing

❌ No strong match: Kyle Visser → Closest: Kyle Weaver (score: 73)
✅ Match: Darryl Watkins → Darryl Watkins (score: 100)
❌ No strong match: Major Wingate → Closest: Nate Darling (score: 64)
❌ No strong match: DaShaun Wood → Closest: Brendan Haywood (score: 59)
✅ Match: Brandan Wright → Brandan Wright (score: 100)
✅ Match: Julian Wright → Julian Wright (score: 100)
❌ No strong match: Avis Wyatt → Closest: Travis Wear (score: 67)
✅ Match: Nick Young → Nick Young (score: 100)
✅ Match: Thaddeus Young → Thaddeus Young (score: 100)
❌ No strong match: Kenny Adeleke → Closest: Kennedy Chandler (score: 69)
✅ Match: Maurice Ager → Maurice Ager (score: 100)
✅ Match: LaMarcus Aldridge → LaMarcus Aldridge (score: 100)
✅ Match: Morris Almond → Morris Almond (score: 100)
✅ Match: Lou Amundson → Lou Amundson (score: 100)
✅ Match: Rashad Anderson → Ryan Anderson (score: 79)
✅ Match: Hilton Armstrong → Hilton Armstrong (score: 100)
✅ Match: James Augustine → James Augustine (score: 100)
✅ Match: Renaldo 

✅ Match: Brandon Rush → Brandon Rush (score: 100)
✅ Match: Luke Schenscher → Luke Schenscher (score: 100)
✅ Match: Wayne Simien → Wayne Simien (score: 100)
❌ No strong match: Tre Simmons → Closest: Tre Jones (score: 70)
✅ Match: David Simon → David Johnson (score: 75)
✅ Match: Chris Taft → Chris Taft (score: 100)
✅ Match: Chris Thomas → Khyri Thomas (score: 83)
✅ Match: Omar Thomas → Matt Thomas (score: 82)
✅ Match: Dijon Thompson → Dijon Thompson (score: 100)
✅ Match: Ronny Turiaf → Ronny Turiaf (score: 100)
✅ Match: Charlie Villanueva → Charlie Villanueva (score: 100)
✅ Match: Hakim Warrick → Hakim Warrick (score: 100)
✅ Match: Martell Webster → Martell Webster (score: 100)
✅ Match: Robert Whaley → Robert Whaley (score: 100)
✅ Match: Deron Williams → Deron Williams (score: 100)
✅ Match: Jawad Williams → Jawad Williams (score: 100)
✅ Match: Marvin Williams → Marvin Williams (score: 100)
✅ Match: Antoine Wright → Antoine Wright (score: 100)
✅ Match: Bracey Wright → Bracey Wright (score

✅ Match: James Lang → James Lang (score: 100)
❌ No strong match: Donald Little → Closest: Donald Sloan (score: 64)
✅ Match: Chris Marcus → Marquese Chriss (score: 81)
❌ No strong match: Chris Massie → Closest: Marquese Chriss (score: 74)
❌ No strong match: Will McDonald → Closest: Will Conroy (score: 67)
✅ Match: Darko Milicic → Darko MiliÄiÄ‡ (score: 92)
❌ No strong match: Jeff Newton → Closest: Jeff Taylor (score: 64)
❌ No strong match: Uche Nsonwu-Amadi → Closest: Cason Wallace (score: 55)
❌ No strong match: Ugonna Okyekwe → Closest: Kyle O'Quinn (score: 56)
❌ No strong match: Marlon Parmer → Closest: Desmon Farmer (score: 69)
❌ No strong match: Stephane Pelle → Closest: StÃ©phane Lasme (score: 74)
✅ Match: Kirk Penney → Kirk Penney (score: 100)
✅ Match: Pavel Podkolzin → Pavel Podkolzin (score: 100)
✅ Match: Josh Powell → Josh Powell (score: 100)
❌ No strong match: Hollis Price → Closest: Ryan Hollins (score: 67)
✅ Match: Luke Ridnour → Luke Ridnour (score: 100)
❌ No strong match:

❌ No strong match: Kimani Ffriend → Closest: Kaniel Dickens (score: 57)
✅ Match: Alton Ford → Alton Ford (score: 100)
✅ Match: Joseph Forte → Joseph Forte (score: 100)
✅ Match: Jerry Green → Jeff Green (score: 76)
❌ No strong match: Kenny Gregory → Closest: Greg Oden (score: 64)
✅ Match: Eddie Griffin → Eddie Griffin (score: 100)
✅ Match: Trenton Hassell → Trenton Hassell (score: 100)
✅ Match: Kirk Haston → Kirk Haston (score: 100)
❌ No strong match: Charles Hathaway → Closest: Charles Cooke (score: 55)
✅ Match: Brendan Haywood → Brendan Haywood (score: 100)
❌ No strong match: Michael Hicks → Closest: Michael Foster Jr. (score: 67)
✅ Match: Steven Hunter → Steven Hunter (score: 100)
❌ No strong match: Andre Hutson → Closest: Andrew Harrison (score: 74)
✅ Match: Nate James → James Jones (score: 76)
✅ Match: Richard Jefferson → Richard Jefferson (score: 100)
✅ Match: Horace Jenkins → Horace Jenkins (score: 100)
✅ Match: Darrell Johns → Carldell Johnson (score: 76)
✅ Match: Joe Johnson → 

In [5]:
midrange_matches = {
    player: (matched_name, score)
    for player, (matched_name, score) in closest_match_dict.items()
    if 75 <= score < 100 and matched_name != "No match found"
}

print("\n Matches with scores between 75 and 99:")
for player, (matched_name, score) in midrange_matches.items():
    print(f"- {player} → {matched_name} (score: {score})")

midrange_df = pd.DataFrame([
    {"Combine_Player": player, "Matched_Rookie_Player": matched_name, "Score": score}
    for player, (matched_name, score) in midrange_matches.items()
])

midrange_df.to_csv("midrange_matches_75_99.csv", index=False)
print("Saved midrange matches to midrange_matches_75_99.csv")



🟡 Matches with scores between 75 and 99:
- Michael Ajayi → Ajay Mitchell (score: 85)
- Robert Dillingham → Rob Dillingham (score: 90)
- David Jones → David Johnson (score: 83)
- Jaxson Robinson → Justin Robinson (score: 80)
- Tidjane Salaun → Tidjane SalaÃ¼n (score: 96)
- Alexandre Sarr → Alex Sarr (score: 78)
- JT Toppin → Jacob Toppin (score: 76)
- Mike Miles Jr. → Mike Miller (score: 86)
- Dillon Mitchell → Davion Mitchell (score: 87)
- Moussa Diabate → Moussa DiabatÃ© (score: 96)
- Nikola Jovic → Nikola JoviÄ‡ (score: 96)
- Trevion Williams → Marvin Williams (score: 84)
- Matt Hurt → Matthew Hurt (score: 86)
- Makur Maker → Maurice Baker (score: 75)
- John Petty Jr. → Johan Petro (score: 76)
- Joshua Hall → Josh Hall (score: 90)
- Theo Maledon → ThÃ©o Maledon (score: 96)
- Karim Mane → Karim ManÃ© (score: 95)
- Jayden Scrubb → Jay Scrubb (score: 87)
- Nicolas Claxton → Nic Claxton (score: 85)
- Jaylen Hands → Jaylen Adams (score: 83)
- Jalen Hoard → Jaylen Hoard (score: 96)
- Came

At this point, some manual work had to be done to only include the incorrec matches in the dataset. Further, a few unmatched players like Carlton Carrington had matches that fuzzymatching did not find (he goes by Bub Carrington)

In [6]:
merged = pd.read_csv("rookie_stats_with_combine_fuzzy.csv")

incorrect_matches = pd.read_csv("midrange_matches_75_99.csv")

cleaned = merged[~merged["PLAYER"].isin(incorrect_matches["Combine_Player"])].copy()

cleaned.to_csv("rookie_stats_with_combine_cleaned.csv", index=False)
print("Saved cleaned file to rookie_stats_with_combine_cleaned.csv (incorrect matches removed)")


✅ Saved cleaned file to rookie_stats_with_combine_cleaned.csv (incorrect matches removed)


In [7]:
df = pd.read_csv("rookie_stats_with_combine_cleaned.csv")

df = df[df["PLAYER"].notna()].copy()

columns_to_drop = ["Team", "Pos", "Awards", "Player_norm", "PLAYER_norm", "PLAYER"]
df.drop(columns=columns_to_drop, inplace=True, errors="ignore")

df.to_csv("rookie_stats_final_clean.csv", index=False)
print("Final cleaned dataset saved to rookie_stats_final_clean.csv")

✅ Final cleaned dataset saved to rookie_stats_final_clean.csv
