###  This notebook scrapes LoL betting odds from 

https://arcanebet.com/esports/title/59

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup as soup
import pandas as pd
import numpy as np
import datetime as dt
import time
from parse import *

In [2]:
target_urls = ['https://arcanebet.com/esports/title/59/tour/9256', 'https://arcanebet.com/esports/title/59/tour/5009',
               'https://arcanebet.com/esports/title/59/tour/9265', 'https://arcanebet.com/esports/title/59/tour/2375']

In [3]:
def scrape_site(url):
    #browser = webdriver.Safari(executable_path = '/usr/bin/safaridriver')    
    browser = webdriver.Firefox() 
    browser.get(url)
    time.sleep(5)
    page_soup = soup(browser.page_source, 'lxml')
    browser.close()
    odds = page_soup.find_all('div', class_ = 'px-4 py-1 market-odd col')
    times = page_soup.find_all('div', class_ = 'row p-3 no-gutters')
    return odds, times
    
def parse_odds(odds):
    team_list = []
    odd_list = []
    for odd in odds:
        res = parse('{Team}\n{space}\n{Odds}', odd.text.strip())
        team_list.append(res.named.get('Team'))
        odd_list.append(float(res.named.get('Odds')))
    return team_list, odd_list

def parse_times(times):
    time_list = []
    for time in times:
        res = parse('{Day}\n{space}\n{Time}', time.text.strip())
        date = res.named.get('Day')
        if date == 'Today':
            temp = dt.date.today()
            day = temp.day
            month = temp.month
            year = temp.year
        elif date == 'Tomorrow':
            temp = dt.date.today() + dt.timedelta(days = 1)
            day = temp.day
            month = temp.month
            year = temp.year
        else:
            temp = parse('{Day}.{Month}', date)
            temp2 = dt.date.today()
            day = int(temp.named.get('Day'))
            month = int(temp.named.get('Month'))
            year = temp2.year
        time_list.append(str(month) + '/' + str(day) + '/' + str(year) + ' ' + res.named.get('Time').strip())       
    return time_list

def build_table(team_list, odd_list, time_list):
    team_1 = []
    team_2 = []
    team_1_odd = []
    team_2_odd = []
    count = 0
    for i in range(len(team_list)):
        if count % 2 == 0:
            team_1.append(team_list[i])
            team_1_odd.append(odd_list[i])
        else:
            team_2.append(team_list[i])
            team_2_odd.append(odd_list[i])
        count = count + 1
    table = pd.DataFrame(np.c_[time_list, team_1, team_2, team_1_odd, team_2_odd], 
                         columns = ['Date', 'Team 1', 'Team 2', 'Odd 1', 'Odd 2'])
    return table

In [4]:
odd_table = pd.DataFrame()
for target_url in target_urls:
    odds, times = scrape_site(target_url)
    if len(odds) > 0:
        team_list, odd_list = parse_odds(odds)
        time_list = parse_times(times)
        table = build_table(team_list, odd_list, time_list)
        odd_table = odd_table.append(table)
odd_table['Date'] = pd.to_datetime(odd_table['Date'])
pull_time = pd.to_datetime(dt.datetime.today().replace(microsecond = 0))
odd_table['Pull Time'] = [pull_time for i in range(len(odd_table))]
odd_table = odd_table.reset_index().drop('index', axis = 1)

In [5]:
try:
    Odds_DB = pd.read_excel('LoL_Odds_DB.xlsx')
    cur_size = len(Odds_DB)
    for i in range(odd_table.index.shape[0]):
        dup = Odds_DB[(pd.to_datetime(Odds_DB['Date']) == odd_table['Date'][i]) \
                      & (Odds_DB['Team 1'] == odd_table['Team 1'][i]) \
                      & (Odds_DB['Team 2'] == odd_table['Team 2'][i]) \
                      & (Odds_DB['Odd 1'] == odd_table['Odd 1'][i]) \
                      & (Odds_DB['Odd 2'] == odd_table['Odd 2'][i])] 
        if not dup.empty:
            continue
        else:
            Odds_DB.loc[Odds_DB.shape[0]] = odd_table.loc[i]
    Odds_DB['Date'] = pd.to_datetime(Odds_DB['Date'])
    Odds_DB = Odds_DB.sort_values(by=['Date'])
    if len(Odds_DB) > cur_size:
        Odds_DB.to_excel('LoL_Odds_DB.xlsx', index = False)
except:
    Odds_DB = odd_table
    Odds_DB['Date'] = pd.to_datetime(Odds_DB['Date'])
    Odds_DB = Odds_DB.sort_values(by=['Date'])
    Odds_DB.to_excel('LoL_Odds_DB.xlsx', index = False)