In [20]:
import pandas as pd

In [46]:
import pandas as pd
import soccerdata as sd
from pathlib import Path

class FBREFAPI:
    def __init__(self, years, list_stats, league, save_dir):
        self.years = years
        self.list_stats = list_stats
        self.league = league
        self.save_dir = Path(save_dir)  # Convert to Path object

    def individual_stats(self, year):
        """Collect individual player statistics for each year."""
        fbref = sd.FBref(leagues=[self.league], seasons=[year])
        total_others = pd.DataFrame()

        for i, stat in enumerate(self.list_stats):
            pl = fbref.read_player_season_stats(stat_type=stat)
            pl.columns = ['_'.join(col).strip() if col[1] else col[0] 
                         for col in pl.columns.values]
            pl = pl.reset_index()
            others = pl[pl['pos'] != "GK"]

            if i == 0:
                total_others = others
            else:
                others_columns = ['player'] + [col for col in others.columns 
                                             if col not in total_others.columns and col != 'player']
                others = others[others_columns]
                total_others = total_others.merge(others, how='left', on=['player'])
                total_others = total_others[total_others['team'].notna()]

            print("Data Collected:", stat)

        total_others = total_others.drop_duplicates(subset=['player', 'season']).reset_index(drop=True)
        print("All Individual Data Collected for", year)
        return total_others
    
    def save_all(self):
        output_path = self.save_dir
        
        for year in self.years:
            df = self.individual_stats(year)
            df.to_csv(f"/Users/lionlucky7/01.Projects/In-progress/soccer_prediction/model_2/data/raw_yearly/fbref_{year}.csv", index=False)
            print(f"✓ {year} data ({len(df)} rows) saved")
            del df

    def combine_total(self):
        """Combine all yearly data into one total file."""
        output_path = self.save_dir / "fbref_second.csv"
        first_year = not output_path.exists()

        for year in self.years:
            df = self.individual_stats(year)  # Fixed: use self.individual_stats()
            df.to_csv(output_path, mode='w' if first_year else 'a', 
                     header=first_year, index=False)
            first_year = False
            print(f"✓ {year} data ({len(df)} rows) appended")
            del df



In [22]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
df = pd.read_csv("./data/final/final.csv")


In [4]:
df

Unnamed: 0,player,league,team,nation,general_position,best_position,age,born,season,overall_rating,...,Performance_PKcon,Performance_OG,Performance_Recov,Aerial_Duels_Won,Aerial_Duels_Lost,height(cm),weight(kg),value(€),wage(€),release_clause(€)
0,a. abrashi,GER-Bundesliga,Freiburg,ALB,MF,CDM,27,1990.0,2018,71.0,...,0.0,0.0,55.0,17.0,17.0,172,71,2000000.0,15000.0,3600000.0
1,a. abrashi,GER-Bundesliga,Freiburg,ALB,MF,CDM,28,1990.0,2019,71.0,...,0.0,0.0,61.0,5.0,4.0,172,71,1900000.0,15000.0,3500000.0
2,a. abrashi,GER-Bundesliga,Freiburg,ALB,MF,CDM,29,1990.0,2020,71.0,...,0.0,0.0,51.0,9.0,9.0,170,74,2000000.0,15000.0,3600000.0
3,a. adli,GER-Bundesliga,Leverkusen,MAR,FW,LM,21,2000.0,2022,74.0,...,0.0,0.0,62.0,12.0,29.0,174,60,9500000.0,32000.0,18100000.0
4,a. adli,GER-Bundesliga,Leverkusen,MAR,FW,ST,22,2000.0,2023,76.0,...,1.0,0.0,61.0,29.0,41.0,174,73,17000000.0,42000.0,32300000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9854,ł. piszczek,GER-Bundesliga,Dortmund,POL,DF,CB,33,1985.0,2019,81.0,...,0.0,0.0,110.0,61.0,31.0,184,79,7000000.0,60000.0,11900000.0
9855,ł. piszczek,GER-Bundesliga,Dortmund,POL,DF,RB,34,1985.0,2020,80.0,...,1.0,0.0,149.0,30.0,24.0,184,79,5000000.0,71000.0,8500000.0
9856,š. vrsaljko,ESP-La Liga,Atlético Madrid,CRO,DF,RB,25,1992.0,2018,80.0,...,0.0,0.0,68.0,44.0,21.0,181,76,15500000.0,50000.0,32900000.0
9857,š. vrsaljko,ITA-Serie A,Inter,CRO,DF,RB,26,1992.0,2019,81.0,...,0.0,0.0,42.0,12.0,5.0,181,76,15000000.0,70000.0,0.0
