In [1]:
import pandas as pd
import numpy as np
import os
import time
import pickle
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Login

In [3]:
synergy_url = 'https://www.synergysportstech.com'
app_url = 'https://apps.synergysports.com/basketball'

In [4]:
def login(driver, cred, synergy_url=synergy_url):
    driver.get(f'{synergy_url}/synergy/')
    time.sleep(5)
    # Get username & password
    with open(cred, 'r') as f:
        user_password = f.readlines()[0].replace('\n', '').split(', ')
    # Login
    userfield = driver.find_element_by_id('Username')
    userfield.send_keys(user_password[0])
    passwordfield = driver.find_element_by_id('Password')
    passwordfield.send_keys(user_password[1])
    driver.find_element_by_xpath("//button[@class='btn btn-primary']").click()

In [5]:
# driver = webdriver.Chrome('/usr/local/bin/chromedriver')
service = Service(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)



Current google-chrome version is 110.0.5481
Get LATEST chromedriver version for 110.0.5481 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/110.0.5481.77/chromedriver_mac64.zip
Driver has been saved in cache [/Users/xulian/.wdm/drivers/chromedriver/mac64/110.0.5481.77]


In [8]:
cred = '../synergy_cred.txt'
login(driver, cred)

## Scrape Setup

In [9]:
downloads_dir = '../../../../Downloads'

### Situations

In [10]:
situations_dict = {
    'Pick and Rolls Including Passes': '101',
    'Isolations Including Passes': '100',
    'Post-Ups Including Passes': '102',
    'Catch and Shoot': '116',
    'Isolation': '10',
    'P&R Ball Handler': '11',
    'Post-Up': '15',
    'P&R Roll Man': '31',
    'Spot Up': '50',
    'Off Screen': '67',
    'Hand Off': '78',
    'Cut': '81',
    'Offensive Rebounds': '89',
    'Transition': '92',
    'Miscellaneous': '124',
    'All field goal attempts': '99',
    'At Rim': '127',
    'Long (3 point jump shots)': '109',
    'All Jump Shots off the Dribble': '110'
}

### Functions

In [11]:
def create_dirs(league_dict):
    league = league_dict['league']
    for season in league_dict['seasonId'].keys():
        dirname = f'../data/{league}_{season}'
        if dirname not in os.listdir():
            os.mkdir(dirname)

In [12]:
def get_stats_sheet(situation, 
                    reportType, season,
                    league_dict, 
                    output_format='xlsx',
                    situations_dict=situations_dict,
                    driver=driver):
    sheet_url = (
        f'{app_url}/'
        'leaderboards?'
        f'leagueId={league_dict["leagueId"]}&'
        f'seasonId={league_dict["seasonId"][season]}&'
        f'competitionIds={league_dict["competitionIds"]}&'
        f'reportType={league_dict["reportType"][reportType]}&'
        f'situation={situations_dict[situation]}&'
        'perGame=0&cutoff=0'
    )
    driver.get(sheet_url)
    time.sleep(10)
    
    # Download
    xpath = "//div[@class='ng-select']"
    if output_format == 'csv':
        xpath = "//div[@class='ng-select ml-2']"
        
    for b in driver.find_elements_by_xpath(xpath):
        if b.text == output_format.upper():
            b.click()
    
    time.sleep(10)

In [13]:
def download_season(season, 
                    reportType, 
                    league_dict, 
                    output_format='xlsx',
                    situations_dict=situations_dict,
                    driver=driver):
    print(f'Downloading Season {season}')
    for situation in tqdm(situations_dict.keys()):
        get_stats_sheet(situation=situation,
                        reportType=reportType,
                        season=season,
                        league_dict=league_dict,
                        output_format=output_format)
    print('Done.')

In [23]:
def move_season(season, league_dict, 
                downloads_dir=downloads_dir):
    league = league_dict["league"]
    filename_prefix = (
        'Leaderboards - '
        f'International '
        f'{season} '
    )
    
    new_dir = f'../data/{league}_{season}'
    for fn in os.listdir(downloads_dir):
        if fn.startswith(filename_prefix):
            os.rename(f'{downloads_dir}/{fn}',
                      f'{new_dir}/{fn}')
            
    print(f'{new_dir}: {len(os.listdir(new_dir))}')

In [37]:
def find_missing_sheets(season, reportType, league,
                        output_format, league_dict, 
                        situations_dict=situations_dict,
                        driver=driver,
                        downloads_dir=downloads_dir):
    for situation in situations_dict.keys():
        miss_flag = 1
        for f in os.listdir(f'../data/{league}_{season}'):
            if situation in f:
                miss_flag = 0
                break
        if miss_flag == 1:
            print(situation)
            get_stats_sheet(situation=situation,
                            reportType=reportType,
                            season=season,
                            league_dict=league_dict, 
                            output_format=output_format)
    move_season(season=season, 
                league_dict=league_dict)

## Scrape EuroCup

In [16]:
eurocup_dict = {
    'league': 'EuroCup',
    'leagueId': '54457dce300969b132fcfb3f',
    'seasonId': {
        '2021-2022': '6089924f5e7b4977beddb1d1',
        '2020-2021': '5f171af2e0962f87ad2aafb2',
        '2019-2020': '5c6cc33b85d07ba976fe09ca',
        '2018-2019': '5b5360189559b26e7ed69ded',
        '2017-2018': '59af08c917120e9c9a979910'
    },
    'competitionIds': '560100ac8dc7a24394b95656',
    'reportType': {
        'Player Offensive': '0',
        'Player Defensive': '1',
        'Team Offensive': '2',
        'Team Defensive': '3'
    }
}

In [17]:
create_dirs(eurocup_dict)

In [18]:
reportType = 'Player Offensive'
output_format = 'csv'

### 2021-2022

In [19]:
league = 'EuroCup'

In [24]:
season = '2021-2022'

In [25]:
download_season(season=season,
                reportType=reportType,
                league_dict=eurocup_dict,
                output_format=output_format)
move_season(season=season, 
            league_dict=eurocup_dict)

Downloading Season 2021-2022


100%|███████████████████████████████████████████| 19/19 [06:40<00:00, 21.08s/it]

Done.
../data/EuroCup_2021-2022: 19





### 2020-2021

In [27]:
season = '2020-2021'

In [28]:
download_season(season=season,
                reportType=reportType,
                league_dict=eurocup_dict,
                output_format=output_format)
move_season(season=season, 
            league_dict=eurocup_dict)

Downloading Season 2020-2021


100%|███████████████████████████████████████████| 19/19 [06:41<00:00, 21.13s/it]

Done.
../data/EuroCup_2020-2021: 19





### 2019-2020

In [35]:
season = '2019-2020'

In [30]:
download_season(season=season,
                reportType=reportType,
                league_dict=eurocup_dict,
                output_format=output_format)
move_season(season=season, 
            league_dict=eurocup_dict)

Downloading Season 2019-2020


100%|███████████████████████████████████████████| 19/19 [06:39<00:00, 21.04s/it]

Done.
../data/EuroCup_2019-2020: 18





In [38]:
find_missing_sheets(season=season, 
                    reportType=reportType, 
                    league=league,
                    output_format=output_format, 
                    league_dict=eurocup_dict)

All field goal attempts
../data/EuroCup_2019-2020: 19


### 2018-2019

In [31]:
season = '2018-2019'

In [32]:
download_season(season=season,
                reportType=reportType,
                league_dict=eurocup_dict,
                output_format=output_format)
move_season(season=season, 
            league_dict=eurocup_dict)

Downloading Season 2018-2019


100%|███████████████████████████████████████████| 19/19 [06:39<00:00, 21.03s/it]

Done.
../data/EuroCup_2018-2019: 19





### 2017-2018

In [33]:
season = '2017-2018'

In [34]:
download_season(season=season,
                reportType=reportType,
                league_dict=eurocup_dict,
                output_format=output_format)
move_season(season=season, 
            league_dict=eurocup_dict)

Downloading Season 2017-2018


100%|███████████████████████████████████████████| 19/19 [06:39<00:00, 21.03s/it]

Done.
../data/EuroCup_2017-2018: 19





In [39]:
driver.close()