<h1>Preparing & Setting Necessary Components</h1>

In [1]:
#Imporing External Libraries
from bs4 import BeautifulSoup 
import requests
import pandas as pd
import json
import re
import numpy as np

In [2]:
#Function for splitting words based on Capital Letters
def NameSplitter(text):
    split_name = re.findall('[A-Z][a-z]*|\d+', team_name)
    
    # Join the split parts with spaces
    formatted_name = ' '.join(split_name)
    
    return formatted_name

In [3]:
#Setting Pandas Options for Rows and Columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)

<h1>Data Scraping</h1>

In [4]:
#Main Link for the data
nba_standings_link = 'https://www.espn.com/nba/standings/_/sort/wins/dir/desc'
team_standings_page = requests.get(nba_standings_link).text
team_standings_page_bs = BeautifulSoup(team_standings_page,'lxml')

#Creating the data frame
team_stats_df = pd.DataFrame()
player_stats_df = pd.DataFrame()
player_bio_df = pd.DataFrame()
team_matches_df = pd.DataFrame()

<h2>Creating the Team_Stats Table</h2>

In [5]:
#Position and Team Names
team_standings_page_table = team_standings_page_bs.find_all('div',class_='standings__table InnerLayout__child--dividers standings__table--nba-play-in-tournament')
team_standings_matrix=[]

#Looping through the div to find all the team standings table
for tags in team_standings_page_table:
    team_standings_data = tags.find_all('div',class_="team-link flex items-center clr-gray-03")
    conference_tags = tags.find('div',class_="Table__Title") #Getting the conference name
    conference = conference_tags.text
    rank = 1
    for team_standing in team_standings_data:
        matrix_sem = [rank]
        team = team_standing.find_all("span")
        for team_data in team:
            if (team_data.text != ""):
                matrix_sem += [team_data.text]
        rank+=1
        matrix_sem += [conference]
        team_standings_matrix += [matrix_sem]



In [6]:
#Team Stats
team_stats_matrix = []
for tags in team_standings_page_table:
    teams_stats_data = tags.find_all('tbody',"Table__TBODY")
    i = 0
    j = 1
 
    for team_stats in teams_stats_data: #Scraping team statistics to a matrix
        if (i % 2) != 0: #4 tbody with the classs "Table TBODY" in the html, taking only the tables needed
            matrix_sem = []
            stats_data = team_stats.find_all("td")
            for stats in stats_data:
                if (j == 14):
                    j=1
                    team_stats_matrix += [matrix_sem]
                    matrix_sem = []
                matrix_sem += [stats.span.text]
                j+=1
            team_stats_matrix+=[matrix_sem]
            j = 1
        i+=1

In [7]:
#Transfering the data to the dataframe
team_stats_df["Standing"] = [row[0] for row in team_standings_matrix]
team_stats_df["City"] = [row[2] for row in team_standings_matrix]
team_stats_df["Name"] = [row[3] for row in team_standings_matrix]
team_stats_df["Conference"] = [row[4] for row in team_standings_matrix]
team_stats_df["Pct"] = [row[2] for row in team_stats_matrix]
team_stats_df["Ppg"] = [row[8] for row in team_stats_matrix]
team_stats_df["OppPpg"] = [row[9] for row in team_stats_matrix]

In [8]:
display(team_stats_df)

Unnamed: 0,Standing,City,Name,Conference,Pct,Ppg,OppPpg
0,1,MIL,Milwaukee Bucks,Eastern Conference,.707,116.9,113.3
1,2,BOS,Boston Celtics,Eastern Conference,.695,117.9,111.4
2,3,PHI,Philadelphia 76ers,Eastern Conference,.659,115.2,110.9
3,4,CLE,Cleveland Cavaliers,Eastern Conference,.622,112.3,106.9
4,5,NY,New York Knicks,Eastern Conference,.573,116.0,113.1
...,...,...,...,...,...,...,...
25,11,DAL,Dallas Mavericks,Western Conference,.463,114.2,114.1
26,12,UTAH,Utah Jazz,Western Conference,.451,117.1,118.0
27,13,POR,Portland Trail Blazers,Western Conference,.402,113.4,117.4
28,14,HOU,Houston Rockets,Western Conference,.268,110.7,118.6


<h2>Creating the Player_Bio Table</h2>

In [9]:
#Taking team links in the regular season
team_links = [] #Array to save the links for each teams that are taken from the main page
for tags in team_standings_page_table:
    team_standings_data = tags.find_all('div',class_="team-link flex items-center clr-gray-03")
    for team_standing in team_standings_data:
        link = team_standing.find('a',class_="AnchorLink")
        if link.get("href"):
            team_stats_link = "http://www.espn.com"
            team_stats_link += link.get("href")
            team_stats_link = team_stats_link.replace("_","stats/_")
            team_stats_link = team_stats_link[:team_stats_link.rfind("/")]
            team_stats_link += "/season/2023/seasontype/2"
            team_links += [team_stats_link]

In [10]:
#Player Bio Data
player_bio_matrix = [] #Array to save each player bio data
player_name_list = [] #Array untuk save nama pemain yang akan digunakan untuk palyer_performance
i = 0
for link in team_links:
    i+= 1
    #Looping through each team
    team_stats= requests.get(link).text
    team_stats_bs = BeautifulSoup(team_stats,'lxml')

    #Getting each player's link from the roster
    stats_table = team_stats_bs.find("div",class_="ResponsiveTable ResponsiveTable--fixed-left mt5 remove_capitalize")
    player_name_table = stats_table.find("tbody" ,class_="Table__TBODY")
    player_names = player_name_table.find_all('a',class_="AnchorLink")
    player_links = []

    #Changing to the player's bio page
    for player in player_names:
        player_bio_matrix_sem = ["-" for i in range(8)] #Keeping the current player bio before inserting into the main matrix
        link = player.get("href")
        link = link.replace("_","bio/_")
        player_links += [link]
        player_name_list += [player.text]


    #Getting the player's desired attributes from the bio
    for link in player_links:
        player_bio_matrix_sem = [None for i in range(8)]
        player_bio_link = requests.get(link).text
        player_bio_link_bs = BeautifulSoup(player_bio_link,'lxml')
        player_name = ""
        player_name_html = player_bio_link_bs.find_all("h1",class_="PlayerHeader__Name")

        for tag in player_name_html:
            player_name += tag.span.text
            last_name = (tag.find("span", class_ ="truncate min-w-0"))
            if last_name:
                player_name += " " + last_name.text

            player_bio_matrix_sem[0] = player_name

            bio_table = player_bio_link_bs.find("section",class_='Card Bio')   
            bio_data = bio_table.find_all("div",class_="flex")

            for data in bio_data:
                if (data.span.text in ["Position ", "HT/WT", "Birthdate","Experience","Birthplace"]):
                    data_value = data.find("span",class_="dib flex-uniform mr3 clr-gray-01").text
                    if ("," in data_value):
                        data_value1, data_value2 = data_value.split(", ")
                        if (data.span.text == "HT/WT"):
                            player_bio_matrix_sem[2] = data_value1
                            player_bio_matrix_sem[3] = data_value2
                        if (data.span.text =="Birthplace"):
                            player_bio_matrix_sem[6] = data_value1
                            player_bio_matrix_sem[7] = data_value2
                    else:
                        if (data.span.text == "Position "):
                            player_bio_matrix_sem[1] = data_value
                        if (data.span.text == "Birthdate"):
                            player_bio_matrix_sem[4] = data_value
                        if (data.span.text == "Experience"):
                            player_bio_matrix_sem[5] = data_value

            player_bio_matrix += [player_bio_matrix_sem]

In [11]:
#Transfering the data to the dataframe
player_bio_df["PlayerName"] = [row[0] for row in player_bio_matrix]
player_bio_df["Position"] = [row[1] for row in player_bio_matrix]
player_bio_df["PlayerHeight"] = [row[2] for row in player_bio_matrix]
player_bio_df["PlayerWeight"] = [row[3] for row in player_bio_matrix]
player_bio_df["Birthdate"] = [row[4] for row in player_bio_matrix]
player_bio_df["Experience"] = [row[5] for row in player_bio_matrix]
player_bio_df["Birth City"] = [row[6] for row in player_bio_matrix]
player_bio_df["Birth State"] = [row[7] for row in player_bio_matrix]

In [12]:
display(player_bio_df)

Unnamed: 0,PlayerName,Position,PlayerHeight,PlayerWeight,Birthdate,Experience,Birth City,Birth State
0,Giannis Antetokounmpo,Power Forward,"6' 11""",243 lbs,12/6/1994 (28),9th Season,Athens,Greece
1,Jrue Holiday,Point Guard,"6' 4""",205 lbs,6/12/1990 (33),13th Season,Chatsworth,CA
2,Brook Lopez,Center,"7' 0""",282 lbs,4/1/1988 (35),14th Season,North Hollywood,CA
3,Khris Middleton,Small Forward,"6' 7""",222 lbs,8/12/1991 (31),10th Season,Charleston,SC
4,Bobby Portis,Forward,"6' 10""",250 lbs,2/10/1995 (28),7th Season,Little Rock,AR
...,...,...,...,...,...,...,...,...
605,Isaiah Roby,Forward,"6' 8""",230 lbs,2/3/1998 (25),3rd Season,Dixon,IL
606,Dominick Barlow,Forward,"6' 10""",220 lbs,5/26/2003 (20),Rookie,,
607,Gorgui Dieng,Center,"6' 10""",265 lbs,1/18/1990 (33),9th Season,Kebemer,Senegal
608,Jordan Hall,Guard,,,1/14/2002,,Wildwood,NJ


<h2>Creating the Player_Stats Table</h2>

In [13]:
player_stats_matrix = []
for link in team_links:
    #Looping through each team
    team_stats= requests.get(link).text
    team_stats_bs = BeautifulSoup(team_stats,'lxml')
    
    team_name_header = team_stats_bs.find("h1",class_="ClubhouseHeader__Name")    
    team_name_tag = team_name_header.find("span")
    team_name_span = team_name_tag.find_all("span",class_="db")
    team_name =""
    for tag in team_name_span:
        team_name += tag.text + " "
    
    stats_table_html = team_stats_bs.find("div",class_="Table__ScrollerWrapper relative overflow-hidden")
    player_stats_data = stats_table_html("tr",class_='Table__TR Table__TR--sm Table__even')
    stats_count = len(player_stats_data)-1
    i = 0
    while(i<stats_count):
        stats = player_stats_data[i]
        player_stats_matrix_sem = [team_name]
        for stat in stats:
            if (stat.span.text != "INF"):
                player_stats_matrix_sem += [stat.span.text]
            else:
                player_stats_matrix_sem += [None]
        player_stats_matrix += [player_stats_matrix_sem]
        i+=1

In [14]:
#Transfering the data to the matrix
player_stats_df["PlayerId"] = [row for row in player_name_list]
player_stats_df["Team"] = [row[0] for row in player_stats_matrix]
player_stats_df["Gp"] = [row[1] for row in player_stats_matrix]
player_stats_df["Gs"] = [row[2] for row in player_stats_matrix]
player_stats_df["Min"] = [row[3] for row in player_stats_matrix]
player_stats_df["Pts"] = [row[4] for row in player_stats_matrix]
player_stats_df["OffReb"] = [row[5] for row in player_stats_matrix]
player_stats_df["DefReb"] = [row[6] for row in player_stats_matrix]
player_stats_df["Ast"] = [row[8] for row in player_stats_matrix]
player_stats_df["Stl"] = [row[9] for row in player_stats_matrix]
player_stats_df["Blk"] = [row[10] for row in player_stats_matrix]
player_stats_df["Turnover"] = [row[11] for row in player_stats_matrix]
player_stats_df["Pf"] = [row[12] for row in player_stats_matrix]

In [15]:
display(player_stats_df)

Unnamed: 0,PlayerId,Team,Gp,Gs,Min,Pts,OffReb,DefReb,Ast,Stl,Blk,Turnover,Pf
0,Giannis Antetokounmpo,Milwaukee Bucks,63,63,32.1,31.1,2.2,9.6,5.7,0.8,0.8,3.9,3.1
1,Jrue Holiday,Milwaukee Bucks,67,65,32.6,19.3,1.2,3.9,7.4,1.2,0.4,2.9,1.7
2,Brook Lopez,Milwaukee Bucks,78,78,30.4,15.9,2.0,4.7,1.3,0.5,2.5,1.4,2.6
3,Khris Middleton,Milwaukee Bucks,33,19,24.3,15.1,0.8,3.4,4.9,0.7,0.2,2.2,2.1
4,Bobby Portis,Milwaukee Bucks,70,22,26.0,14.1,2.2,7.4,1.5,0.4,0.2,1.2,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,Isaiah Roby,San Antonio Spurs,42,2,11.3,4.1,0.5,2.1,0.9,0.4,0.2,0.7,1.3
606,Dominick Barlow,San Antonio Spurs,28,0,14.6,3.9,1.6,2.0,0.9,0.4,0.7,0.5,2.0
607,Gorgui Dieng,San Antonio Spurs,31,1,11.5,3.9,0.9,2.6,1.7,0.1,0.5,1.0,1.8
608,Jordan Hall,San Antonio Spurs,9,0,9.2,3.1,0.3,1.0,1.2,0.1,0.0,0.8,0.7


<h2>Creating Matches Table</h2>

In [16]:
matches_matrix = [] 
for link in team_links: #Looping through all the links in team_links
    team_schedule_link = link.replace("stats","schedule") 
    getter = requests.get(team_schedule_link).text
    team_schedule_bs = BeautifulSoup(getter,'lxml') 

    team_name_header = team_schedule_bs.find("h1",class_="ClubhouseHeader__Name") #Finding the header for team_name    
    team_name_tag = team_name_header.find("span") #Finding the span that contains the full name
    team_name_span = team_name_tag.find_all("span",class_="db")
    team_name =""
    for tag in team_name_span: #Looping to get the full name from the team
        team_name += tag.text + " "
        
    team_schedule_table = team_schedule_bs.find("tbody",class_="Table__TBODY") #Finding the table that contains the team full schedule
    schedule_data_list = team_schedule_table.find_all("tr")
    
    for schedule_data_row in schedule_data_list:
        
        matches_matrix_sem = []
        
        schedule_data = schedule_data_row.find_all("td",class_="Table__TD") 
        for i in range(3): #Looping to only get the data we want in team_matches
            data_list = schedule_data[i].find_all('span')
            for data in data_list:
                if (data.text not in [" ","DATE","OPPONENT","RESULT"]): 
                    overtime = False
                    if ("," in data.text):
                        day,date = data.text.split(", ")
                        matches_matrix_sem += [day,date,team_name]
                    else:
                        matches_matrix_sem += [data.text]
        if ("W" in matches_matrix_sem): #Getting only the win data so theres no redundant data
            matches_matrix += [matches_matrix_sem]

In [17]:
#Transfering the data to the dataframe
team_matches_df = pd.DataFrame()
team_matches_df["MatchDay"] = [row[0] for row in matches_matrix] 
team_matches_df["MatchDate"] = [row[1] for row in matches_matrix] 
team_matches_df["Team1"] = [row[2] for row in matches_matrix] 
team_matches_df["Court"] = [row[3] for row in matches_matrix] 
team_matches_df["Team2"] = [row[4] for row in matches_matrix] 
team_matches_df["Winner"] = [row[5] for row in matches_matrix]
team_matches_df["Score"] = [row[6] for row in matches_matrix] 

In [18]:
display(team_matches_df)

Unnamed: 0,MatchDay,MatchDate,Team1,Court,Team2,Winner,Score
0,Thu,Oct 20,Milwaukee Bucks,@,Philadelphia,W,90-88
1,Sat,Oct 22,Milwaukee Bucks,vs,Houston,W,125-105
2,Wed,Oct 26,Milwaukee Bucks,vs,Brooklyn,W,110-99
3,Fri,Oct 28,Milwaukee Bucks,vs,New York,W,119-108
4,Sat,Oct 29,Milwaukee Bucks,vs,Atlanta,W,123-115
...,...,...,...,...,...,...,...
1225,Tue,Mar 14,San Antonio Spurs,vs,Orlando,W,132-114
1226,Sun,Mar 19,San Antonio Spurs,vs,Atlanta,W,126-118
1227,Sun,Apr 2,San Antonio Spurs,@,Sacramento,W,142-134 OT
1228,Thu,Apr 6,San Antonio Spurs,vs,Portland,W,129-127


<h2>Adding Coaches Column to Teams</h2>

In [19]:
coach_matrix = []
for link in team_links:
    team_roster_link = link.replace("stats","roster")
    idx = team_roster_link.find("/season")
    team_roster_link = team_roster_link
    getter = requests.get(team_roster_link).text
    team_roster_bs = BeautifulSoup(getter,'lxml')
    
    table = team_roster_bs.find("div",class_="Wrapper Card__Content")
    team_coach = table.find("div",class_="TableDetails pt4")
    if (team_coach):
        coach_name = team_coach.text
        coach_name = coach_name.split(" ")
        final_coach_name = ""
        for i in range(1,len(coach_name)):
            final_coach_name += coach_name[i] + " "
        coach_matrix += [final_coach_name]
    else:
        coach_matrix += [None]
team_stats_df["Coach"] = [row for row in coach_matrix]
display(team_stats_df)

Unnamed: 0,Standing,City,Name,Conference,Pct,Ppg,OppPpg,Coach
0,1,MIL,Milwaukee Bucks,Eastern Conference,.707,116.9,113.3,Adrian Griffin
1,2,BOS,Boston Celtics,Eastern Conference,.695,117.9,111.4,Joe Mazzulla
2,3,PHI,Philadelphia 76ers,Eastern Conference,.659,115.2,110.9,Nick Nurse
3,4,CLE,Cleveland Cavaliers,Eastern Conference,.622,112.3,106.9,J.B. Bickerstaff
4,5,NY,New York Knicks,Eastern Conference,.573,116.0,113.1,Tom Thibodeau
...,...,...,...,...,...,...,...,...
25,11,DAL,Dallas Mavericks,Western Conference,.463,114.2,114.1,Jason Kidd
26,12,UTAH,Utah Jazz,Western Conference,.451,117.1,118.0,Will Hardy
27,13,POR,Portland Trail Blazers,Western Conference,.402,113.4,117.4,Chauncey Billups
28,14,HOU,Houston Rockets,Western Conference,.268,110.7,118.6,Ime Udoka


<h1>Data Preprocessing</h1>

In [20]:
#Function to change from .089 -> 8.9
def times_100(x):
    return float(x)*100

#Function to remove the plus in diff
def remove_plus(x):
    if "+" in x:
        return x.replace("+","")
    else:
        return x


#Applying times_100 to the PCT column
team_stats_df["Pct"] = team_stats_df["Pct"].apply(times_100)

team_stats_df["Pct"] = team_stats_df["Pct"].round(1)

In [21]:
#Function to convert height from feet and inches into cm
def convert_to_cm(height):
    if height and re.match(r'^\d+\'\s*\d+\"$', height):
        feet, inches = re.findall(r'\d+', height)
        height_cm = (int(feet) * 12 + int(inches)) * 2.54
        return f'{height_cm:.2f}'
    else:
        return height

#Function to switch the date from ("Year-Day-Month to Year-Month-Date")
def switch_date(date):
    if (date):
        date2 = date.split("-")
        return (date2[2] + "-" + date2[0] + "-" + date2[1])
    else:
        return date

#Dropping rows because some players might have played for 1 or more teams in 1 season
player_bio_df = player_bio_df.drop_duplicates(subset="PlayerName")
player_bio_df = player_bio_df.reset_index(drop=True)

#Adding PlayerId column and moving it to the from
player_bio_df["PlayerId"] = [i for i in range(1,len(player_bio_df["PlayerName"])+1)]
player_bio_df = player_bio_df[player_bio_df.columns[-1:].tolist() + player_bio_df.columns[:-1].tolist()]

#Changing the Height from Feet & Inches to cm from the  Column
player_bio_df['PlayerHeight'] = player_bio_df['PlayerHeight'].apply(convert_to_cm)

#Removing the age from the Birthdate Column
player_bio_df['Birthdate'] = player_bio_df['Birthdate'].str.replace(r'\s*\(\d+\)', "",regex=True)

#Replacing the "/" to '-' to satisfy the sql format
player_bio_df["Birthdate"] = player_bio_df["Birthdate"].str.replace("/","-")

#Applying the switch_date function to the birthdate/ column
player_bio_df["Birthdate"] = player_bio_df["Birthdate"].apply(switch_date)

                                                                    
#Removing the lbs from the Weight Column        
player_bio_df['PlayerWeight'] = player_bio_df['PlayerWeight'].str.replace(' lbs', '')                                                               

In [22]:
#Function to add year to the date column
def add_year(date):
    month,day=date.split("-")
    if int(month) >= 10:
        return "2022-" + date
    else:
        return "2023-" + date
    
#Function to change the day to full length
def change_day(day):
    days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
    for i in range(len(days)):
        if (day in days[i]):
            return days[i]

#Function to get the team full name
def add_team_name(city):
    for i in range(len(team_stats_df)):
        if (city in team_stats_df["Name"][i]): #Finding the team name in team_stats table
            return team_stats_df["Name"][i]
        
#Function to remove the OT from the score column
def split_score(score):
    if "OT" in score:
        score_parts = score.split(" ")
        score2 = score_parts[0]
        return score2
    else:
        return score

#Function to change the court from symbols to Team 1 / Team 2 according to the symbol
def change_court(court):
    if (court == "@"):
        return "Team 2"
    else:
        return "Team 1"
    
#Changing the date format (example : Oct 21 -> 10-21)
team_matches_df['MatchDate'] = team_matches_df['MatchDate'].apply(lambda x: pd.to_datetime(x, format='%b %d').strftime('%m-%d'))
    
#Applying the function to add the year to the date column
team_matches_df["MatchDate"] = team_matches_df["MatchDate"].apply(add_year)
        
#Applying the change_day function 
team_matches_df["MatchDay"] = team_matches_df["MatchDay"].apply(change_day)

#Creating an Overtime column based if there is OT or not in the score
team_matches_df["Overtime"] = team_matches_df["Score"].apply(lambda x: True if "OT" in x else False)
        
#Applying the function add_team_name to the Team 2 column
team_matches_df["Team2"] = team_matches_df["Team2"].apply(add_team_name)

#Since the data scrapped was only from the winning matches than the winner will always be team 1 (if new data is added manually than team 2 might be the winner) 
team_matches_df["Winner"] = team_matches_df["Team1"]

#Applying the split score function to the score column
team_matches_df["Score"] = team_matches_df["Score"].apply(split_score)

#Splitting the score to score 1 and score 2
team_matches_df[["Score1","Score2"]] = team_matches_df["Score"].str.split("-",expand=True)

#Applying the change_court function to the court column
team_matches_df["Court"] = team_matches_df["Court"].apply(change_court)

#Removing the score column
team_matches_df = team_matches_df.drop('Score', axis=1)

In [23]:
#Changing the player_name to PlayerId to match the primary key
player_id_map = player_bio_df.set_index('PlayerName')['PlayerId'].to_dict()
player_stats_df['PlayerId'] = player_stats_df['PlayerId'].map(player_id_map)

<h2>Clearing Trailing Whitespace</h2>

In [24]:
team_matches_df = team_matches_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

team_stats_df = team_stats_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

player_bio_df = player_bio_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

player_stats_df = player_stats_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

<h2>Changing Numeric Data</h2>

In [25]:
team_stats_numeric = ["Pct","Ppg","OppPpg"]

player_stats_numeric = ["PlayerId","Gp","Gs","Min","Pts","OffReb","DefReb","Ast","Stl","Blk","Turnover","Pf"]

team_matches_numeric = ["Score1","Score2"]

player_bio_numeric = ["PlayerHeight","PlayerWeight"]

team_stats_df[team_stats_numeric] = team_stats_df[team_stats_numeric].apply(pd.to_numeric)

player_stats_df[player_stats_numeric] = player_stats_df[player_stats_numeric].apply(pd.to_numeric)

team_matches_df[team_matches_numeric] = team_matches_df[team_matches_numeric].apply(pd.to_numeric)

player_bio_df[player_bio_numeric] = player_bio_df[player_bio_numeric].apply(pd.to_numeric)

<h2>Replacing NaN to None</h2>

In [26]:
team_stats_df = team_stats_df.replace(np.nan,None)
player_stats_df = player_stats_df.replace(np.nan,)
team_matches_df = team_matches_df.replace(np.nan,None)
player_bio_df = player_bio_df.replace(np.nan,None)

<h1>Display Final</h1>

In [27]:
display(player_bio_df)

Unnamed: 0,PlayerId,PlayerName,Position,PlayerHeight,PlayerWeight,Birthdate,Experience,Birth City,Birth State
0,1,Giannis Antetokounmpo,Power Forward,210.82,243.0,1994-12-6,9th Season,Athens,Greece
1,2,Jrue Holiday,Point Guard,193.04,205.0,1990-6-12,13th Season,Chatsworth,CA
2,3,Brook Lopez,Center,213.36,282.0,1988-4-1,14th Season,North Hollywood,CA
3,4,Khris Middleton,Small Forward,200.66,222.0,1991-8-12,10th Season,Charleston,SC
4,5,Bobby Portis,Forward,208.28,250.0,1995-2-10,7th Season,Little Rock,AR
...,...,...,...,...,...,...,...,...,...
534,535,Isaiah Roby,Forward,203.2,230.0,1998-2-3,3rd Season,Dixon,IL
535,536,Dominick Barlow,Forward,208.28,220.0,2003-5-26,Rookie,,
536,537,Gorgui Dieng,Center,208.28,265.0,1990-1-18,9th Season,Kebemer,Senegal
537,538,Jordan Hall,Guard,,,2002-1-14,,Wildwood,NJ


In [28]:
display(team_matches_df)

Unnamed: 0,MatchDay,MatchDate,Team1,Court,Team2,Winner,Overtime,Score1,Score2
0,Thursday,2022-10-20,Milwaukee Bucks,Team 2,Philadelphia 76ers,Milwaukee Bucks,False,90,88
1,Saturday,2022-10-22,Milwaukee Bucks,Team 1,Houston Rockets,Milwaukee Bucks,False,125,105
2,Wednesday,2022-10-26,Milwaukee Bucks,Team 1,Brooklyn Nets,Milwaukee Bucks,False,110,99
3,Friday,2022-10-28,Milwaukee Bucks,Team 1,New York Knicks,Milwaukee Bucks,False,119,108
4,Saturday,2022-10-29,Milwaukee Bucks,Team 1,Atlanta Hawks,Milwaukee Bucks,False,123,115
...,...,...,...,...,...,...,...,...,...
1225,Tuesday,2023-03-14,San Antonio Spurs,Team 1,Orlando Magic,San Antonio Spurs,False,132,114
1226,Sunday,2023-03-19,San Antonio Spurs,Team 1,Atlanta Hawks,San Antonio Spurs,False,126,118
1227,Sunday,2023-04-02,San Antonio Spurs,Team 2,Sacramento Kings,San Antonio Spurs,True,142,134
1228,Thursday,2023-04-06,San Antonio Spurs,Team 1,Portland Trail Blazers,San Antonio Spurs,False,129,127


In [29]:
display(player_stats_df)

Unnamed: 0,PlayerId,Team,Gp,Gs,Min,Pts,OffReb,DefReb,Ast,Stl,Blk,Turnover,Pf
0,1,Milwaukee Bucks,63,63,32.1,31.1,2.2,9.6,5.7,0.8,0.8,3.9,3.1
1,2,Milwaukee Bucks,67,65,32.6,19.3,1.2,3.9,7.4,1.2,0.4,2.9,1.7
2,3,Milwaukee Bucks,78,78,30.4,15.9,2.0,4.7,1.3,0.5,2.5,1.4,2.6
3,4,Milwaukee Bucks,33,19,24.3,15.1,0.8,3.4,4.9,0.7,0.2,2.2,2.1
4,5,Milwaukee Bucks,70,22,26.0,14.1,2.2,7.4,1.5,0.4,0.2,1.2,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,535,San Antonio Spurs,42,2,11.3,4.1,0.5,2.1,0.9,0.4,0.2,0.7,1.3
606,536,San Antonio Spurs,28,0,14.6,3.9,1.6,2.0,0.9,0.4,0.7,0.5,2.0
607,537,San Antonio Spurs,31,1,11.5,3.9,0.9,2.6,1.7,0.1,0.5,1.0,1.8
608,538,San Antonio Spurs,9,0,9.2,3.1,0.3,1.0,1.2,0.1,0.0,0.8,0.7


In [30]:
display(team_stats_df)

Unnamed: 0,Standing,City,Name,Conference,Pct,Ppg,OppPpg,Coach
0,1,MIL,Milwaukee Bucks,Eastern Conference,70.7,116.9,113.3,Adrian Griffin
1,2,BOS,Boston Celtics,Eastern Conference,69.5,117.9,111.4,Joe Mazzulla
2,3,PHI,Philadelphia 76ers,Eastern Conference,65.9,115.2,110.9,Nick Nurse
3,4,CLE,Cleveland Cavaliers,Eastern Conference,62.2,112.3,106.9,J.B. Bickerstaff
4,5,NY,New York Knicks,Eastern Conference,57.3,116.0,113.1,Tom Thibodeau
...,...,...,...,...,...,...,...,...
25,11,DAL,Dallas Mavericks,Western Conference,46.3,114.2,114.1,Jason Kidd
26,12,UTAH,Utah Jazz,Western Conference,45.1,117.1,118.0,Will Hardy
27,13,POR,Portland Trail Blazers,Western Conference,40.2,113.4,117.4,Chauncey Billups
28,14,HOU,Houston Rockets,Western Conference,26.8,110.7,118.6,Ime Udoka


<h1>Creating JSON File</h1>

In [31]:
file_path = "../data//"
file_path_list = []
#Changing the path for each table.json
file_path_list += [file_path + "Team_Stats.json"]
file_path_list += [file_path + "Player_Bio.json"] 
file_path_list += [file_path + "Player_Stats.json"]
file_path_list += [file_path + "Team_Matches.json"]

In [32]:
#Creating the json files
json_list = []

json_list += [json.dumps(team_stats_df.to_dict(orient='records'), indent=2)]

json_list += [json.dumps(player_bio_df.to_dict(orient='records'), indent=2)]

json_list += [json.dumps(player_stats_df.to_dict(orient='records'), indent=2)]

json_list += [json.dumps(team_matches_df.to_dict(orient='records'), indent=2)]

j = 0
for i in file_path_list:
    with open(i, "w") as json_file:
        json_file.write(json_list[j])
    j+=1