In [1]:
import pandas as pd
import numpy as np
import os
import time
import pickle
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

## Login

In [2]:
synergy_url = 'https://www.synergysportstech.com'

In [3]:
def login(driver, cred, synergy_url=synergy_url):
    driver.get(f'{synergy_url}/synergy/')
    # Get username & password
    with open(cred, 'r') as f:
        user_password = f.readlines()[0].replace('\n', '').split(', ')
    # Login
    userfield = driver.find_element_by_name('txtUserName')
    userfield.send_keys(user_password[0])
    passwordfield = driver.find_element_by_name('txtPassword')
    passwordfield.send_keys(user_password[1])
    driver.find_element_by_name('btnLogin').click()

In [4]:
# driver = webdriver.Chrome('/usr/local/bin/chromedriver')
service = Service(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)



Current google-chrome version is 102.0.5005
Get LATEST chromedriver version for 102.0.5005 google-chrome
Driver [/Users/xulian/.wdm/drivers/chromedriver/mac64/102.0.5005.61/chromedriver] found in cache


In [5]:
cred = 'synergy_cred.txt'
login(driver, cred)

  userfield = driver.find_element_by_name('txtUserName')
  passwordfield = driver.find_element_by_name('txtPassword')
  driver.find_element_by_name('btnLogin').click()


## Basic Functions

In [6]:
def read_html(filename):
    soup = BeautifulSoup(open(filename), "html.parser")
    return soup

In [7]:
def dump_pickle(file, filename):
    outfile = open(filename, 'wb')
    pickle.dump(file, outfile)
    outfile.close()

def load_pickle(filename):
    infile = open(filename, 'rb')
    file = pickle.load(infile)
    infile.close()
    return file

## Download Team Stats Sheets

In [8]:
side_dict = {'offense': '1', 'defense': '0'}

In [9]:
def download_team_stats(directory, team_id, league_id, season_id, side,
                        synergy_url=synergy_url, side_dict=side_dict, driver=driver):
    bool_side = side_dict[side]
    tm_stat_url = (
        f'{synergy_url}/Synergy/Sport/Basketball/web/teamsst/Video/QuantifiedTeam2Printable.aspx?'
        f'iSeasonID={season_id}&offensive={bool_side}&pergame=0&'
        f'iGameSubTypeID={league_id}&iTeamID={team_id}'
    )
    driver.get(tm_stat_url)
    with open(f'{directory}/{team_id}_{side}.html', 'w') as f:
        f.write(driver.page_source)

In [10]:
def download_cumulative_stats(directory, team_id, league_id, season_id,
                              synergy_url=synergy_url, driver=driver):
    cumulative_stat_url = (
        f'{synergy_url}/Synergy/Sport/Basketball/web/teamsst/Video/ViewReport.aspx?'
        f'iSeasonID={season_id}&pergame=0&'
        f'iGameSubTypeID={league_id}&Type=CumulativeStats&iTeamID={team_id}'
    )
    driver.get(cumulative_stat_url)
    with open(f'{directory}/{team_id}_box.html', 'w') as f:
        f.write(driver.page_source)

In [11]:
def download_seasons(league_dict,
                     synergy_url=synergy_url, side_dict=side_dict, driver=driver):
    league_id = league_dict['league_id']
    league_name = league_dict['league_name']
    for season_id in league_dict['seasons'].keys():
        print(league_name, season_id)
        directory = f'data_{league_name}_{season_id}'
        if directory not in os.listdir():
            os.mkdir(directory)
        for team_id in league_dict['seasons'][season_id]['team_ids']:
            print(team_id)
            for side in ['offense', 'defense']:
                download_team_stats(directory, team_id, league_id, season_id, side,
                                    synergy_url=synergy_url, side_dict=side_dict, 
                                    driver=driver)
            download_cumulative_stats(directory, team_id, league_id, season_id,
                                      synergy_url=synergy_url, driver=driver)
            time.sleep(3)
        print()
    print('Done.')

### CBA

In [12]:
cba_team_ids = ['550', '507', '10295', '10294', 
                '547', '14815', 
                '517', '10292', '6291', '536',
                '542', '538', '543', '509',
                '548', '10296', '508', '528']
cba_dict = {'league_name': 'cba',
            'league_id': '45',
            'seasons': {
                '2021': {'team_ids': cba_team_ids +
                        ['18355', # Shenzhen
                         '526' # Bayi
                        ]},
                '2020': {'team_ids': cba_team_ids +
                        ['18355' # Shenzhen
                        ]},
                '2019': {'team_ids': cba_team_ids +
                        ['18355', # Shenzhen
                         '526' # Bayi
                        ]},
                '2018': {'team_ids': cba_team_ids +
                        ['537', # Shenzhen
                         '526' # Bayi
                        ]},
                '2017': {'team_ids': cba_team_ids +
                        ['537', # Shenzhen
                         '526' # Bayi
                        ]}
            }
           }

In [13]:
# dump_pickle(cba_dict, 'cba_dict')

In [14]:
# download_seasons(cba_dict)

### NBA

In [15]:
nba_team_ids = ['15', '1', '3', '30', '17', '18', '8', '9', '19', '23',
                '10', '20', '24', '25', '14', '2', '21', '11', '16', '4',
                '29', '5', '6', '26', '27', '28', '12', '22', '13', '7']
nba_dict = {'league_name': 'nba',
            'league_id': '2',
            'seasons': {season: {'team_ids': nba_team_ids}
                        for season in range(2016, 2022)}
           }

In [16]:
# dump_pickle(nba_dict, 'nba_dict')

In [17]:
# download_seasons(nba_dict)

### Euroleague

In [12]:
licensed_clubs = ['459', # Anadolu Efes
                  '407', # Olimpia Milano
                  '428', # CSKA Moscow
                  '475', # Barcelona
                  '466', # Fenerbahçe
                  '454', # Maccabi Tel Aviv
                  '469', # Olympiacos
                  '470', # Panathinaikos
                  '502', # Real Madrid
                  '521', # Baskonia
                  '460' # Zalgiris
                 ]
euroleague_dict = {'league_name': 'euroleague',
                   'league_id': '344',
                   'seasons': {
                       '2021': {'team_ids': licensed_clubs +
                                ['722', # ASVEL
                                 '405', # Crvena Zvezda 
                                 '595', # ALBA Berlin
                                 '3410', # Bayern Munich
                                 '716', # Zenit Saint Petersburg
                                 '717', # UNICS
                                 '8390' # AS Monaco
                                ]},
                       '2020': {'team_ids': licensed_clubs + 
                                ['595', # ALBA Berlin
                                 '405', # Crvena Zvezda
                                 '3410', # Bayern Munich
                                 '711', # Khimki Moscow
                                 '722', # ASVEL
                                 '425', # Valencia Basket
                                 '716' # Zenit Saint Petersburg
                                ]},
                       '2019': {'team_ids': licensed_clubs +
                                ['595', # ALBA Berlin
                                 '405', # Crvena Zvezda
                                 '3410', # Bayern Munich
                                 '711', # Khimki Moscow
                                 '722', # ASVEL
                                 '425', # Valencia Basket
                                 '716' # Zenit Saint Petersburg
                                ]},
                       '2018': {'team_ids': licensed_clubs +
                                ['411', # Budućnost VOLI
                                 '520', # Darüşşafaka Tekfen 
                                 '3410', # Bayern Munich
                                 '424', # Herbalife Gran Canaria
                                 '711'# Khimki Moscow
                                ]},
                       '2017': {'team_ids': licensed_clubs +
                                ['620', # Brose Bamberg
                                 '405', # Crvena Zvezda
                                 '711', # Khimki Moscow
                                 '427', # Unicaja
                                 '425' # Valencia Basket
                                ]},
                       '2016': {'team_ids': licensed_clubs + 
                                ['620', # Brose Bamberg
                                 '405', # Crvena Zvezda
                                 '520', # Darüşşafaka Tekfen 
                                 '724', # Galatasaray Odeabank
                                 '717' # UNICS
                                ]},
                   }
                  }

In [19]:
# dump_pickle(euroleague_dict, 'euroleague_dict')

In [15]:
# download_seasons(euroleague_dict)

### NBL

In [16]:
nbl_team_ids = ['877', '541', '545', '735', '546',
                '540', '533', '544']
nbl_dict = {'league_name': 'nbl',
            'league_id': '49',
            'seasons': {
                '2021': {'team_ids': nbl_team_ids +
                         ['17543', '25755'
                         ]},
                '2020': {'team_ids': nbl_team_ids + 
                         ['17543'
                         ]},
                '2019': {'team_ids': nbl_team_ids + 
                         ['17543'
                         ]},
                '2018': {'team_ids': nbl_team_ids},
                '2017': {'team_ids': nbl_team_ids},
                '2016': {'team_ids': nbl_team_ids}
            }
           }

In [17]:
nbl_dict = {'league_name': 'nbl',
            'league_id': '49',
            'seasons': {
                '2021': {'team_ids': nbl_team_ids +
                         ['17543', '25755'
                         ]}
            }
           }

In [24]:
# dump_pickle(nbl_dict, 'nbl_dict')

In [19]:
# download_seasons(nbl_dict)

In [20]:
driver.close()