In [17]:
import requests
import pandas as pd
import numpy as np
import os
import json as jsonlib
import time

out_path = 'C:\\Users\\xrosinach\\Desktop\\TFM-Scouting-FC-Barcelona\\data\\fm'

# Obtenemos el CSV con competiciones
cdir = os.getcwd()
utils = os.path.join(os.path.abspath(os.path.join(cdir, '..', '..')), 'utils')
comps = pd.read_csv(os.path.join(utils, 'comps.csv'), sep=';')

# Convertir un URL a JSON
def url_to_json(url: str) -> dict:

    out = requests.get(url).json()
    time.sleep(3)                           # Para garantir seguridad

    return out

In [18]:
# Obtenemos un diccionario JSON con las temporadas disponibles en una liga
def league_available_seasons(league_code: int, out_path: str) -> dict:

    # Entorno de carpetas output
    out_league_path = os.path.join(out_path, str(league_code))
    os.makedirs(out_league_path, exist_ok=True)
    json_path = os.path.join(out_league_path, 'AvailableSeasons.json')

    # Si existe el fichero
    if os.path.exists(json_path):
        with open(json_path, "r", encoding="utf-8") as f:
            available_seasons_json = jsonlib.load(f)
        return available_seasons_json

    # Si no existe entramos en el proceso de scraping
    fotmob_url = f'https://www.fotmob.com/api/leagues?id={league_code}'

    # Leer JSON y obtener lista con temporadas disponibles
    available_seasons_json = url_to_json(url=fotmob_url)
    available_seasons = available_seasons_json.get('allAvailableSeasons', [])

    # Diccionario para añadir info
    seasons_dict = {}

    # Para cada temporada, obtenemos el link
    for s in available_seasons:

        # Obtenemos el link a partir de sus años
        if "/" in s:
            start, end = s.split("/")           # Separación por año
            key_candidate = start[-2:] + end[-2:]
        else:
            s = s[:4]                           # Sino, primer año
            key_candidate = f"{int(s) % 100:02d}{(int(s)+1) % 100:02d}"

        season_link = f"{fotmob_url}&season={s}"

        # Guardamos temporada en key y link
        seasons_dict[s] = {"key": key_candidate, "link": season_link}

    # Substituimos dentro del JSON original
    available_seasons_json["allAvailableSeasons"] = seasons_dict

    # Guardado en JSON
    with open(json_path, "w", encoding="utf-8") as f:
        jsonlib.dump(available_seasons_json, f)
    
    return available_seasons_json

# A partir de la clave de una temporada obtenemos el JSON con su información
def season_data(seasons_dict: dict, season_key: str, league_code: int, out_path: str) -> dict:

    # Entorno de carpetas output
    out_league_path = os.path.join(out_path, str(league_code))
    os.makedirs(out_league_path, exist_ok=True)
    json_path = os.path.join(out_league_path, f'Season{season_key}.json')

    # Si existe el fichero
    if os.path.exists(json_path):
        with open(json_path, "r", encoding="utf-8") as f:
            season_json = jsonlib.load(f)
        return season_json
    
    # Si no existe, comprovamos
    if season_key not in seasons_dict.keys():
        return {}
    else:
        season_link = seasons_dict[season_key]

    # Leemos el link y guardado en JSON
    season_json = url_to_json(season_link)
    with open(json_path, "w", encoding="utf-8") as f:
        jsonlib.dump(season_json, f)
    
    return season_json

# Obtención de los datos de un partido usando su ID
def match_data(matches_dict: dict, match_id: str, league_code: int, out_path: str) -> dict:

    # Entorno de carpetas output
    out_league_path = os.path.join(out_path, str(league_code), 'matches')
    os.makedirs(out_league_path, exist_ok=True)
    json_path = os.path.join(out_league_path, f'Match{match_id}.json')

    # Si existe el fichero
    if os.path.exists(json_path):
        with open(json_path, "r", encoding="utf-8") as f:
            match_json = jsonlib.load(f)
        return match_json
    
    # Comprovamos que el ID esta entre los partidos
    if match_id not in matches_dict.keys():
        return {}
    else:
        match_link = matches_dict[match_id]
        print(match_id, json_path)

    # Leemos el link y guardado en JSON
    match_json = url_to_json(url=match_link)
    with open(json_path, "w", encoding="utf-8") as f:
        jsonlib.dump(match_json, f)
    
    return match_json

In [19]:
for i, row in comps.iterrows():

    # Temporadas disponibles de nuestra liga seleccionada
    available_seasons_json = league_available_seasons(league_code=int(row['fm']), out_path=out_path)

    # Diccionario con las temporadas que tengo y su link
    available_seasons = available_seasons_json.get('allAvailableSeasons', {}).values()
    seasons_dict = {v['key']: v['link'] for v in available_seasons}

    # Fichero de temporada a partir de la key
    for season in seasons_dict.keys():
        season_json = season_data(seasons_dict=seasons_dict, season_key=str(season), league_code=int(row['fm']), out_path=out_path)

    # Partidos y diccionario con los IDs
    matches = season_json.get('fixtures', {}).get('allMatches', {})
    dict_matches_urls = {match['id']: f'https://www.fotmob.com/api/matchDetails?matchId={match['id']}'
                        for match in matches if match.get('status', {}).get('finished', False)}
    
    # Fichero de partido a partir del ID de partido
    # for match in dict_matches_urls.keys():
    #     match_json = match_data(matches_dict=dict_matches_urls, match_id=str(match), league_code=int(row['fm']), out_path=out_path)

    print(f'{row['tournament']} ({i+1}/{len(comps)})')

Liga Profesional (1/64)
Bundesliga Austria (2/64)
A-League (3/64)
First Division A (4/64)
Primera División (5/64)
Serie A Brazil (6/64)


KeyboardInterrupt: 

In [20]:
dict_matches_urls

{'814657': 'https://www.fotmob.com/api/matchDetails?matchId=814657',
 '814656': 'https://www.fotmob.com/api/matchDetails?matchId=814656',
 '814658': 'https://www.fotmob.com/api/matchDetails?matchId=814658',
 '814659': 'https://www.fotmob.com/api/matchDetails?matchId=814659',
 '814662': 'https://www.fotmob.com/api/matchDetails?matchId=814662',
 '814661': 'https://www.fotmob.com/api/matchDetails?matchId=814661',
 '814660': 'https://www.fotmob.com/api/matchDetails?matchId=814660',
 '814665': 'https://www.fotmob.com/api/matchDetails?matchId=814665',
 '814666': 'https://www.fotmob.com/api/matchDetails?matchId=814666',
 '814664': 'https://www.fotmob.com/api/matchDetails?matchId=814664',
 '814668': 'https://www.fotmob.com/api/matchDetails?matchId=814668',
 '814667': 'https://www.fotmob.com/api/matchDetails?matchId=814667',
 '814671': 'https://www.fotmob.com/api/matchDetails?matchId=814671',
 '814672': 'https://www.fotmob.com/api/matchDetails?matchId=814672',
 '814670': 'https://www.fotmob.com