In [1]:
import pandas as pd
import numpy as np
import os
import time
import pickle
from bs4 import BeautifulSoup

## Basic Functions

In [2]:
def read_html(filename):
    soup = BeautifulSoup(open(filename), "html.parser")
    return soup

In [3]:
def dump_pickle(file, filename):
    outfile = open(filename, 'wb')
    pickle.dump(file, outfile)
    outfile.close()

def load_pickle(filename):
    infile = open(filename, 'rb')
    file = pickle.load(infile)
    infile.close()
    return file

## Content

In [4]:
def get_team_cumulative_df(soup, season):
    tables = soup.find_all('table')
    data = [[td.text for td in row.find_all('td')]
             for row in tables[0].find_all('tr')]
    df = pd.DataFrame(data[1:len(data)-1], columns=data[0])
    df['Season'] = season
    team = df.columns[0]
    df['Team'] = team
    return df.rename(columns={team:'Player'})

In [5]:
def get_season_cumulative_df(season, league):
    print(season)
    season_dfs = {}
    directory = f'data_{league}_{season}'
    
    team_cumulative_dfs = []
    for file in os.listdir(directory):
        if 'box' in file:
            soup = read_html(f'{directory}/{file}')
            team_cumulative_dfs.append(get_team_cumulative_df(soup, season))
    return pd.concat(team_cumulative_dfs)

### CBA

In [6]:
cba_dfs = {season: get_season_cumulative_df(season, 'cba')
           for season in range(2017, 2021)}

2017
2018
2019
2020


In [7]:
pd.read_csv('cba_offense.csv')\
  .merge(pd.concat([cba_dfs[season] for season in range(2017, 2021)]),
         on=['Player', 'Team', 'Season'], 
         how='left')\
  .to_csv('cba_offense_cumulative.csv', index=False)

### Euroleague

In [8]:
euroleague_dfs = {season: get_season_cumulative_df(season, 'euroleague')
                  for season in range(2016, 2021)}

2016
2017
2018
2019
2020


In [9]:
pd.read_csv('euroleague_offense.csv')\
  .merge(pd.concat([euroleague_dfs[season] for season in range(2016, 2021)]),
         on=['Player', 'Team', 'Season'], 
         how='left')\
  .to_csv('euroleague_offense_cumulative.csv', index=False)

### NBA

In [10]:
nba_dfs = {season: get_season_cumulative_df(season, 'nba')
           for season in range(2016, 2021)}

2016
2017
2018
2019
2020


In [11]:
pd.read_csv('nba_offense.csv')\
  .merge(pd.concat([nba_dfs[season] for season in range(2016, 2021)]),
         on=['Player', 'Team', 'Season'], 
         how='left')\
  .to_csv('nba_offense_cumulative.csv', index=False)

### NBL

In [12]:
nbl_dfs = {season: get_season_cumulative_df(season, 'nbl')
           for season in range(2016, 2021)}

2016
2017
2018
2019
2020


In [13]:
pd.read_csv('nbl_offense.csv')\
  .merge(pd.concat([nbl_dfs[season] for season in range(2016, 2021)]),
         on=['Player', 'Team', 'Season'], 
         how='left')\
  .to_csv('nbl_offense_cumulative.csv', index=False)