In [1]:
import pandas as pd
import os

In [2]:
# Directory containing all datasets
directory = './dataset_nba'

# List of CSV files in the directory
csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

# Dictionary to memorize DataFrames
dataframes = {}

# Importa i file CSV e assegna loro nomi basati sul titolo del file
for file_name in csv_files:
    # Estrai il titolo del file (senza l'estensione .csv)
    title = os.path.splitext(file_name)[0]
    
    # Costruisci il percorso completo del file
    file_path = os.path.join(directory, file_name)
    
    # Leggi il file CSV in un DataFrame e assegna il titolo come chiave
    dataframes[title] = pd.read_csv(file_path)
    dataframes[title]['Season'] = str(title[:4])
    

# Ora puoi accedere ai tuoi DataFrame utilizzando i titoli assegnati ai file
# Ad esempio, per accedere al DataFrame del file 'file1.csv':
dataframes['2018-rookies']


Unnamed: 0,Rk,Player,Debut,Age,Yrs,G,MPTOT,FG,FGA,3P,...,PTSTOT,FG%,3P%,FT%,MP,PTS,TRB,AST,ID,Season
0,1,Bam Adebayo,Oct 18 '17 MIA @ ORL,20,1,69,1368,174,340,0,...,477,0.512,0.000,0.721,19.8,6.9,5.5,1.5,adebaba01,2018
1,2,Jarrett Allen,Oct 20 '17 BRK vs. ORL,19,1,72,1441,234,397,5,...,587,0.589,0.333,0.776,20.0,8.2,5.4,0.7,allenja01,2018
2,3,Kadeem Allen,Dec 11 '17 BOS @ CHI,25,1,18,107,6,22,0,...,19,0.273,0.000,0.778,5.9,1.1,0.6,0.7,allenka01,2018
3,4,Ike Anigbogu,Oct 20 '17 IND vs. POR,19,1,11,30,4,9,0,...,13,0.444,,0.833,2.7,1.2,0.8,0.0,anigbik01,2018
4,5,OG Anunoby,Oct 19 '17 TOR vs. CHI,20,1,74,1481,163,346,73,...,438,0.471,0.371,0.629,20.0,5.9,2.5,0.7,anunoog01,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,116,Matt Williams,Dec 23 '17 MIA vs. NOP,24,1,3,11,2,6,1,...,5,0.333,0.200,,3.7,1.7,0.3,0.0,willima05,2018
116,117,D.J. Wilson,Oct 20 '17 MIL vs. CLE,21,1,22,71,9,16,2,...,21,0.563,0.400,0.500,3.2,1.0,0.5,0.1,wilsodj01,2018
117,118,Jamil Wilson,Nov 20 '17 LAC @ NYK,27,1,15,274,38,81,27,...,105,0.469,0.429,0.500,18.3,7.0,2.1,0.7,wilsoja02,2018
118,119,Guerschon Yabusele,Oct 20 '17 BOS @ PHI,22,1,33,235,26,61,12,...,79,0.426,0.324,0.682,7.1,2.4,1.6,0.5,yabusgu01,2018


In [3]:
rookies_id = dict()
for year in range(2018, 2024):
    rookies_id[year] = dataframes.get(f'{year}-rookies')['ID'].tolist()

rookies_id

{2018: ['adebaba01',
  'allenja01',
  'allenka01',
  'anigbik01',
  'anunoog01',
  'arcidry01',
  'artisja01',
  'bacondw01',
  'balllo01',
  'belljo01',
  'birchkh01',
  'birdja01',
  'blakean01',
  'bogdabo01',
  'bouchch01',
  'bradlto01',
  'brookdi01',
  'brownst02',
  'bryanth01',
  'carusal01',
  'cavanty01',
  'clavegi01',
  'clevean01',
  'collijo01',
  'colliza01',
  'colliky01',
  'cookech01',
  'costema01',
  'craigto01',
  'dorsety01',
  'dotsoda01',
  'doylemi01',
  'doziepj01',
  'evansja01',
  'fergute01',
  'foxde01',
  'fultzma01',
  'grayjo01',
  'hamilda02',
  'harrish01',
  'hartjo01',
  'hayesni01',
  'hearnre01',
  'henrymy01',
  'hicksis01',
  'hollajo02',
  'huntevi01',
  'ingraan01',
  'isaacjo01',
  'iwundwe01',
  'jacksaa01',
  'jacksjo02',
  'jacksju01',
  'jamesmi02',
  'johnsda04',
  'johnsom01',
  'jonesja04',
  'kennalu01',
  'klebima01',
  'korkmfu01',
  'kornelu01',
  'kuzmaky01',
  'leaftj01',
  'leeda03',
  'lemonwa01',
  'lydonty01',
  'magetjo01',

In [4]:
df_final = dataframes['2018-rookies']

for year_rookie in range(2018, 2023):
    for year_player in range(year_rookie + 1, 2023):
        df_att = dataframes.get(f'{year_player}-players')
        df_att = df_att[df_att['ID'].isin(rookies_id[year_rookie])]
        df_final = pd.concat([df_att, df_final], ignore_index=True)

df_final

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,PF,PTS,ID,Season,Debut,Yrs,MPTOT,TRBTOT,ASTTOT,PTSTOT
0,1,Precious Achiuwa,C,22,TOR,73,28.0,23.6,3.6,8.3,...,2.1,9.1,achiupr01,2022,,,,,,
1,15,Cole Anthony,PG,21,ORL,65,65.0,31.7,5.5,14.0,...,2.6,16.3,anthoco01,2022,,,,,,
2,20,Deni Avdija,SF,21,WAS,82,8.0,24.2,3.0,7.1,...,2.3,8.4,avdijde01,2022,,,,,,
3,23,Udoka Azubuike,C,22,UTA,17,6.0,11.5,2.2,2.9,...,1.5,4.7,azubuud01,2022,,,,,,
4,25,LaMelo Ball,PG,20,CHO,75,75.0,32.3,7.2,16.7,...,3.2,20.1,ballla01,2022,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999,116,Matt Williams,,24,,3,,3.7,2.0,6.0,...,1.0,1.7,willima05,2018,Dec 23 '17 MIA vs. NOP,1.0,11.0,1.0,0.0,5.0
1000,117,D.J. Wilson,,21,,22,,3.2,9.0,16.0,...,7.0,1.0,wilsodj01,2018,Oct 20 '17 MIL vs. CLE,1.0,71.0,10.0,3.0,21.0
1001,118,Jamil Wilson,,27,,15,,18.3,38.0,81.0,...,34.0,7.0,wilsoja02,2018,Nov 20 '17 LAC @ NYK,1.0,274.0,32.0,10.0,105.0
1002,119,Guerschon Yabusele,,22,,33,,7.1,26.0,61.0,...,23.0,2.4,yabusgu01,2018,Oct 20 '17 BOS @ PHI,1.0,235.0,52.0,16.0,79.0


In [5]:
dff = dataframes.get(f'{2022}-players')
dff

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,ID,Season
0,1,Precious Achiuwa,C,22,TOR,73,28,23.6,3.6,8.3,...,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1,achiupr01,2022
1,2,Steven Adams,C,28,MEM,76,75,26.3,2.8,5.1,...,5.4,10.0,3.4,0.9,0.8,1.5,2.0,6.9,adamsst01,2022
2,3,Bam Adebayo,C,24,MIA,56,56,32.6,7.3,13.0,...,7.6,10.1,3.4,1.4,0.8,2.6,3.1,19.1,adebaba01,2022
3,4,Santi Aldama,PF,21,MEM,32,0,11.3,1.7,4.1,...,1.7,2.7,0.7,0.2,0.3,0.5,1.1,4.1,aldamsa01,2022
4,5,LaMarcus Aldridge,C,36,BRK,47,12,22.3,5.4,9.7,...,3.9,5.5,0.9,0.3,1.0,0.9,1.7,12.9,aldrila01,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
807,601,Thaddeus Young,PF,33,TOR,26,0,18.3,2.6,5.5,...,2.9,4.4,1.7,1.2,0.4,0.8,1.7,6.3,youngth01,2022
808,602,Trae Young,PG,23,ATL,76,76,34.9,9.4,20.3,...,3.1,3.7,9.7,0.9,0.1,4.0,1.7,28.4,youngtr01,2022
809,603,Omer Yurtseven,C,23,MIA,56,12,12.6,2.3,4.4,...,3.7,5.3,0.9,0.3,0.4,0.7,1.5,5.3,yurtsom01,2022
810,604,Cody Zeller,C,29,POR,27,0,13.1,1.9,3.3,...,2.8,4.6,0.8,0.3,0.2,0.7,2.1,5.2,zelleco01,2022
