In [49]:
from concurrent.futures import ThreadPoolExecutor
import requests
import pandas as pd
import numpy as np
# from IPython.display import JSON

In [38]:
# def get_club_players(club_id, season_id):
#     club_players_data = []

#     request_url = f"http://localhost:8000/clubs/{club_id}/players?season_id={season_id}"
#     get_players_response = requests.get(request_url)

#     if get_players_response.status_code == 200:
#         get_players_json = get_players_response.json()
#         for player in get_players_json.get('players', []):  # Use .get to handle missing keys gracefully
#             data = {
#                 'season_id': season_id,
#                 'club_id': club_id,
#                 'player_id': player['id'],
#                 'player_name': player['name']
#             }
#             club_players_data.append(data)

#         return pd.DataFrame(club_players_data)

#     else:
#         raise SystemExit(f"Error: {get_players_response.status_code}")

# competition_ids = [
#     'GB1',    # Premier League
# ]

# season_ids = ['2024']

# # Create an empty list to collect DataFrames
# data_frames = []

# for competition_id in competition_ids:
#     for season_id in season_ids:
#         request_url = f"http://localhost:8000/competitions/{competition_id}/clubs?season_id={season_id}"
#         get_clubs_response = requests.get(request_url)

#         if get_clubs_response.status_code == 200:
#             get_clubs_json = get_clubs_response.json()
#             for club in get_clubs_json.get('clubs', []):
#                 club_players_df = get_club_players(club['id'], season_id)
#                 data_frames.append(club_players_df)  # Collect DataFrames in a list

#         else:
#             print(f"Error: {get_clubs_response.status_code}")

# # Concatenate all DataFrames into a single DataFrame
# temp_data_table = pd.concat(data_frames, ignore_index=True)

# # Display the first few rows of the final DataFrame
# temp_data_table.head()

In [54]:
def get_clubs(competition_id, season_id):
    '''
    Function to get clubs for a competition and season
    '''

    url = f"http://localhost:8000/competitions/{competition_id}/clubs"
    response = requests.get(url, params={'season_id': season_id})
    if response.status_code == 200:
        clubs = response.json().get('clubs', [])
        return [{'club_id': club['id'], 'club_name': club['name']} for club in clubs]
    else:
        raise SystemExit(f"Error fetching clubs: {response.status_code} - {response.text}")


def get_players(club_id, season_id):
    '''
    Function to get every player in a club for a specific season
    '''

    url = f"http://localhost:8000/clubs/{club_id}/players"
    response = requests.get(url, params={'season_id': season_id})
    if response.status_code == 200:
        players = response.json().get('players', [])
        return [{'player_id': player['id'], 'player_name': player['name']} for player in players]
    else:
        raise SystemExit(f"Error fetching players: {response.status_code} - {response.text}")


def fetch_player_info(player_id):
    # print(f"Getting player info on: {player_id}")
    url = f"http://localhost:8000/players/{player_id}/profile"
    response = requests.get(url)
    if response.status_code == 200:
        player_json = response.json()
        return {
            'player_id': player_id,
            'date_of_birth': player_json.get('dateOfBirth'),
            'age': player_json.get('age'),
            'height': player_json.get('height'),
            'citizenship': player_json.get('citizenship')[0],
            'secondary_citizenship': player_json.get('citizenship', [None])[1] if len(player_json.get('citizenship', [])) > 1 else None,
            'main_position': player_json.get('position', {}).get('main'),
            'other_positions': ", ".join(player_json.get('position', {}).get('other', [])),
            'preferred_foot': player_json.get('foot'),
            'outfitter': player_json.get('outfitter')
        }
    else:
        return {'player_id': player_id, 'error': f"Error {response.status_code}"}



# def get_player_market_values(player_id)

'''
NOTE: Since working just with the top 5 leagues right now and they have an Aug - May season
      the following logic will workout (20/21, 21/22, etc.). But for example with something
      like MLS that has seasons as 2020, 2021, etc. this would not work.
'''
def get_player_jersey_number(player_id, club_id, season_id):
    url = f"http://localhost:8000/players/{player_id}/jersey_numbers"
    response = requests.get(url)
    if response.status_code == 200:
        jersey_numbers = response.json().get('jerseyNumbers', [])
        # implement finding jersey numbers logic here:
    
    else:
        return {'player_id': player_id, 'error': f"Error obtaining jersey number, {response.status_code}"}

In [55]:
def build_player_dataset(competition_id, season_id):
    data = []
    clubs = get_clubs(competition_id, season_id)
    for club in clubs:
        club_id = club['club_id']
        club_name = club['club_name']
        players = get_players(club_id, season_id)
        
        # Parallel fetch player info
        with ThreadPoolExecutor() as executor:
            results = list(executor.map(fetch_player_info, [p['player_id'] for p in players]))
        
        for player, player_info in zip(players, results):
            player_data = {
                'season_id': season_id,
                'competition_id': competition_id,
                'club_id': club_id,
                'club_name': club_name,
                'player_id': player['player_id'],
                'player_name': player['player_name'],
                **player_info,
                # value for 'jersey_number(s)' should go here
            }
            data.append(player_data)

    return pd.DataFrame(data)

In [56]:
competition_id = 'GB1'  # Premier League
season_id = '2024'

dataset = build_player_dataset(competition_id, season_id)
print(dataset.head())

Getting player info on: 238223Getting player info on: 85941

Getting player info on: 14555
Getting player info on: 258004
Getting player info on: 284730
Getting player info on: 177476
Getting player info on: 186590
Getting player info on: 942497
Getting player info on: 475959
Getting player info on: 576121
Getting player info on: 701057
Getting player info on: 95424
Getting player info on: 357565
Getting player info on: 601883
Getting player info on: 51471
Getting player info on: 53622
Getting player info on: 241641
Getting player info on: 88755
Getting player info on: 583199
Getting player info on: 486049
Getting player info on: 203460
Getting player info on: 406635
Getting player info on: 743591
Getting player info on: 661207
Getting player info on: 418560
Getting player info on: 262749Getting player info on: 111819

Getting player info on: 495666
Getting player info on: 435338
Getting player info on: 420243
Getting player info on: 425918
Getting player info on: 502821
Getting player

In [62]:
dataset.head(7)
print(dataset['outfitter'].value_counts())

outfitter
adidas          105
Nike            104
Puma             37
Under Armour      8
New Balance       5
Skechers          4
Uhlsport          3
Umbro             1
Sells             1
AB1               1
Name: count, dtype: int64
