In [4]:
import requests
import pandas as pd
import numpy as np
from google.cloud import storage

def fetch_data():
    url = "https://api-football-v1.p.rapidapi.com/v3/players"
    querystring = {"team": "33", "league": "39", "season": "2022"}
    headers = {
        "X-RapidAPI-Key": "b01fb7d4d5msh9d6010d034fce6bp136078jsnfe71045ac5ee",
        "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com"
    }
    response = requests.get(url, headers=headers, params=querystring)
    return response.json()

def process_data(data):
    players_data = []
    for player in data['response']:
        player_info = player['player']
        stats = player['statistics'][0]
        player_data = {
            "Player name": player_info['name'],
            "Nationality": player_info['nationality'],
            "Position": player_info.get('position', 'N/A'),
            "Weight": player_info.get('weight', 'N/A'),
            "Height": player_info.get('height', 'N/A'),
            "Team": stats['team']['name'],
            "League": stats['league']['name'],
            "Appearances": stats['games']['appearences'],
            "Lineups": stats['games']['lineups'],
            "Minutes": stats['games']['minutes'],
            "Goals": stats['goals']['total'],
            "Assists": stats['goals']['assists'],
            "Yellow Cards": stats['cards']['yellow'],
            "Red Cards": stats['cards']['red'],
            "Shots Total": stats['shots']['total'],
            "Shots On": stats['shots']['on'],
            "Passes Total": stats['passes']['total'],
            "Key Passes": stats['passes']['key'],
            "Pass Accuracy": stats['passes']['accuracy'],
            "Tackles Total": stats['tackles']['total'],
            "Blocks": stats['tackles']['blocks'],
            "Interceptions": stats['tackles']['interceptions'],
            "Dribbles Attempts": stats['dribbles']['attempts'],
            "Dribbles Success": stats['dribbles']['success'],
            "Dribbled Past": stats['dribbles']['past'],
            "Fouls Drawn": stats['fouls']['drawn'],
            "Fouls Committed": stats['fouls']['committed'],
            "Duels Total": stats['duels']['total'],
            "Duels Won": stats['duels']['won'],
            "Penalty Scored": stats['penalty']['scored'],
            "Penalty Missed": stats['penalty']['missed'],
            "Penalty Saved": stats['penalty']['saved']
        }
        players_data.append(player_data)
    return pd.DataFrame(players_data)

def clean_and_transform_data(df):

    # Drop unnecessary columns
    df = df.drop(['Position', 'Dribbled Past'], axis=1)

    # Cleaning Weight column
    # Remove 'kg' and convert 'none' to NaN, then convert to numeric
    df['Weight'] = df['Weight'].str.replace(' kg', '').str.replace('kg', '')
    df['Weight'].replace('none', np.nan, inplace=True)
    df['Weight'] = pd.to_numeric(df['Weight'], errors='coerce')

    # Calculate the average weight and replace NaN values with it
    average_weight = df['Weight'].dropna().mean()
    df['Weight'].fillna(average_weight, inplace=True)

    # Cleaning Height column
    # Remove 'cm' and convert 'none' to NaN, then convert to numeric
    df['Height'] = df['Height'].str.replace(' cm', '').str.replace('cm', '')
    df['Height'].replace('none', np.nan, inplace=True)
    df['Height'] = pd.to_numeric(df['Height'], errors='coerce')

    # Calculate the average height and replace NaN values with it
    average_height = df['Height'].dropna().mean()
    df['Height'].fillna(average_height, inplace=True)

    # Replace all other NaN values in the DataFrame with 0
    df.fillna(0, inplace=True)
    
    # Remove the first row
    df = df.iloc[1:]

    return df

def upload_to_gcs(df, bucket_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Save DataFrame to a CSV on memory
    df.to_csv(destination_blob_name, index=False)

    # Upload the CSV to GCS
    blob.upload_from_filename(destination_blob_name)
    print(f"File {destination_blob_name} uploaded to {bucket_name}.")

def main():
    """
    Main function to orchestrate data fetching, processing, and uploading.
    """
    data = fetch_data()
    df = process_data(data)
    df_cleaned = clean_and_transform_data(df)
    upload_to_gcs(df_cleaned, "YOUR_GCS_BUCKET_NAME", "players_data.csv")

if __name__ == "__main__":
    main()


In [5]:
data = fetch_data()
df = process_data(data)
df

Unnamed: 0,Player name,Nationality,Position,Weight,Height,Team,League,Appearances,Lineups,Minutes,...,Dribbles Attempts,Dribbles Success,Dribbled Past,Fouls Drawn,Fouls Committed,Duels Total,Duels Won,Penalty Scored,Penalty Missed,Penalty Saved
0,Lee Grant,England,,83 kg,193 cm,Manchester United,Premier League,,,,...,,,,,,,,,,
1,T. Fredricson,England,,,,Manchester United,Premier League,,,,...,,,,,,,,,,
2,B. Hardley,Netherlands,,,189 cm,Manchester United,Premier League,,,,...,,,,,,,,,,
3,S. Murray,England,,,,Manchester United,Premier League,,,,...,,,,,,,,,,
4,David de Gea,Spain,,76 kg,192 cm,Manchester United,Premier League,38.0,38.0,3420.0,...,,,,5.0,,12.0,10.0,0.0,0.0,1.0
5,P. Jones,England,,71 kg,180 cm,Manchester United,Premier League,0.0,0.0,0.0,...,,,,,,,,,,
6,C. Savage,Wales,,74 kg,182 cm,Manchester United,Premier League,0.0,0.0,0.0,...,,,,,,,,,,
7,H. Mejbri,Tunisia,,74 kg,177 cm,Manchester United,Premier League,0.0,0.0,0.0,...,,,,,,,,,,
8,S. McTominay,Scotland,,88 kg,193 cm,Manchester United,Premier League,24.0,10.0,1149.0,...,22.0,13.0,,16.0,28.0,183.0,95.0,0.0,0.0,
9,Casemiro,Brazil,,84 kg,185 cm,Manchester United,Premier League,28.0,24.0,2126.0,...,17.0,9.0,,24.0,48.0,320.0,179.0,0.0,0.0,


In [6]:
df_cleaned = clean_and_transform_data(df)
df_cleaned

Unnamed: 0,Player name,Nationality,Weight,Height,Team,League,Appearances,Lineups,Minutes,Goals,...,Interceptions,Dribbles Attempts,Dribbles Success,Fouls Drawn,Fouls Committed,Duels Total,Duels Won,Penalty Scored,Penalty Missed,Penalty Saved
0,Lee Grant,England,83.0,193.0,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,T. Fredricson,England,75.125,181.941176,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,B. Hardley,Netherlands,75.125,189.0,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,S. Murray,England,75.125,181.941176,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,David de Gea,Spain,76.0,192.0,Manchester United,Premier League,38.0,38.0,3420.0,0.0,...,1.0,0.0,0.0,5.0,0.0,12.0,10.0,0.0,0.0,1.0
5,P. Jones,England,71.0,180.0,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,C. Savage,Wales,74.0,182.0,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,H. Mejbri,Tunisia,74.0,177.0,Manchester United,Premier League,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,S. McTominay,Scotland,88.0,193.0,Manchester United,Premier League,24.0,10.0,1149.0,1.0,...,14.0,22.0,13.0,16.0,28.0,183.0,95.0,0.0,0.0,0.0
9,Casemiro,Brazil,84.0,185.0,Manchester United,Premier League,28.0,24.0,2126.0,4.0,...,34.0,17.0,9.0,24.0,48.0,320.0,179.0,0.0,0.0,0.0
