In [11]:
import pandas as pd
import os
import json as jsonlib
from typing import Tuple

# Obtenemos el CSV con competiciones
cdir = os.getcwd()
utils = os.path.join(os.path.abspath(os.path.join(cdir, '..', '..')), 'utils')
comps = pd.read_csv(os.path.join(utils, 'comps.csv'), sep=';')

# JSON con temporadas deseadas
with open(os.path.join(utils, 'des_seasons.json'), 'r', encoding='utf-8') as f:
    desired_seasons = jsonlib.load(f)

# Lector de JSON
def json_to_dict(json_path: str) -> dict:
    with open(json_path, "r", encoding="utf-8") as f:
        dict = jsonlib.load(f)
    return dict

In [26]:
# Generamos el dataframe de temporadas de una liga
def get_seasons_df(league_id: int, available_seasons_json: dict) -> pd.DataFrame:

    # Info que necesitaremos
    all_seasons = available_seasons_json.get('seasons', {})

    # Obtenemos información
    if not all_seasons:
        return pd.DataFrame()
    
    # Lista para concatenar info
    rows = []

    for season in all_seasons:

        season_key = season.get('year', '').replace('/','')

        if season_key in desired_seasons:
            rows.append({'league': league_id,
                         'year': season_key,
                         'season_name': season.get('name', '')})

    return pd.DataFrame(rows)

In [27]:
league_id = 73
raw_out_path = r'C:\Users\xrosinach\Desktop\TFM-Scouting-FC-Barcelona\data\raw'
clean_out_path = r'C:\Users\xrosinach\Desktop\TFM-Scouting-FC-Barcelona\data\clean'

# Obtenemos el nombre de la liga y el path -> creación de la carpeta de output (clean)
league_name = comps[comps['id'] == league_id]['tournament'].iloc[0]
league_slug = league_name.lower().replace(' ', '-')
out_league_path = os.path.join(clean_out_path, 'ss', league_slug)
os.makedirs(out_league_path, exist_ok=True)

# Capeta de la liga y carpetas (seasons) dentro
raw_data_path = os.path.join(raw_out_path, 'ss', league_slug)
av_seasons_path = os.path.join(raw_data_path, 'available_seasons.json')
av_seasons = json_to_dict(json_path=av_seasons_path)

# Obtención del dataframe y guardado
av_sasons_df = get_seasons_df(league_id=league_id, available_seasons_json=av_seasons)
av_sasons_df.to_csv(os.path.join(out_league_path, 'available_seasons.csv'), sep=';', index=False)

In [30]:
season = '2526'

In [31]:
# Carpeta con la información, lectura de los ficheros json
info_path = os.path.join(raw_data_path, season, 'info')
standings_json = json_to_dict(json_path=os.path.join(info_path, 'standings.json'))
player_json = json_to_dict(json_path=os.path.join(info_path, 'player.json'))
team_json = json_to_dict(json_path=os.path.join(info_path, 'team.json'))
venue_json = json_to_dict(json_path=os.path.join(info_path, 'venue.json'))

In [35]:
player_json

{'players': [{'playerId': 826643,
   'playerName': 'Kylian Mbappé',
   'position': 'F',
   'teamId': 2829,
   'teamName': 'Real Madrid'},
  {'playerId': 1402912,
   'playerName': 'Lamine Yamal',
   'position': 'M',
   'teamId': 2817,
   'teamName': 'Barcelona'},
  {'playerId': 868812,
   'playerName': 'Vinícius Júnior',
   'position': 'F',
   'teamId': 2829,
   'teamName': 'Real Madrid'},
  {'playerId': 991011,
   'playerName': 'Jude Bellingham',
   'position': 'M',
   'teamId': 2829,
   'teamName': 'Real Madrid'},
  {'playerId': 831005,
   'playerName': 'Raphinha',
   'position': 'M',
   'teamId': 2817,
   'teamName': 'Barcelona'},
  {'playerId': 910536,
   'playerName': 'Rodrygo',
   'position': 'F',
   'teamId': 2829,
   'teamName': 'Real Madrid'},
  {'playerId': 992587,
   'playerName': 'Pedri',
   'position': 'M',
   'teamId': 2817,
   'teamName': 'Barcelona'},
  {'playerId': 41789,
   'playerName': 'Robert Lewandowski',
   'position': 'F',
   'teamId': 2817,
   'teamName': 'Barce