# Name Parser

## Goal

- Parse the name of a player into a first name and last name
- Standardize the name of all players across all seasons

## Methodology 
- Create a new table with player uuids that will be used across all seasons
- This table will have the following columns:
    - player_uuid
    - first_name
    - last_name
    - full_name


In [5]:
import re

# Name formats in different seasons
s_16_17 = "First_Last"
s_17_18 = "First_Last"
s_18_19 = "First_Last_number"
s_19_20 = "First_Last_number"
s_20_21 = "First Last"
s_21_22 = "First Last"
s_22_23 = "First Last"
s_23_24 = "First Last"

name_formats = {
    "player_gameweek_history_2016_17": s_16_17,
    "player_gameweek_history_2017_18": s_17_18,
    "player_gameweek_history_2018_19": s_18_19,
    "player_gameweek_history_2019_20": s_19_20,
    "player_gameweek_history_2020_21": s_20_21,
    "player_gameweek_history_2021_22": s_21_22,
    "player_gameweek_history_2022_23": s_22_23,
    "player_gameweek_history_2023_24": s_23_24,
}


def convert_name(name, season):
    name = remove_underscore(name)
    name = remove_number(name)
    name = remove_special_characters(name)
    return name


def remove_underscore(name):
    return name.replace("_", " ")


def remove_number(name):
    return re.sub(r"\d+", "", name)


def remove_special_characters(name):
    return re.sub(r"[^a-zA-Z0-9\s]", "", name)

In [6]:
# Test the name parsing functions first
test_names = [
    "Mohamed_Salah",
    "Kevin_De_Bruyne",
    "Harry_Kane_1",
    "Erling_Haaland_2",
    "Bruno_Fernandes",
    "Son_Heung_min",
]

print("Testing name parsing functions:")
for name in test_names:
    cleaned = convert_name(name, "test")
    print(f"Original: {name} -> Cleaned: {cleaned}")

Testing name parsing functions:
Original: Mohamed_Salah -> Cleaned: Mohamed Salah
Original: Kevin_De_Bruyne -> Cleaned: Kevin De Bruyne
Original: Harry_Kane_1 -> Cleaned: Harry Kane 
Original: Erling_Haaland_2 -> Cleaned: Erling Haaland 
Original: Bruno_Fernandes -> Cleaned: Bruno Fernandes
Original: Son_Heung_min -> Cleaned: Son Heung min


In [7]:
import polars as pl
from sqlalchemy import select

from fantasy_premier_league.database import get_db_session
from fantasy_premier_league.models.player_gameweek_history import PlayerGameweekHistory16_17

query = select(PlayerGameweekHistory16_17)

with get_db_session() as session:
    df = pl.read_database(query, session)

df

  """


InvalidRequestError: Table 'player_gameweek_history_2016_17' is already defined for this MetaData instance.  Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

x