In [1]:
import sys
import time
import os
import asyncio
import numpy as np
import pandas as pd
import json
import traceback
from typing import List, Dict

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

from IPython.display import clear_output

from parsers import *
from functions import *

In [2]:
!pip install selenium

Collecting selenium
  Downloading https://files.pythonhosted.org/packages/80/d6/4294f0b4bce4de0abf13e17190289f9d0613b0a44e5dd6a7f5ca98459853/selenium-3.141.0-py2.py3-none-any.whl (904kB)
Collecting urllib3
  Downloading https://files.pythonhosted.org/packages/9f/f0/a391d1463ebb1b233795cabfc0ef38d3db4442339de68f847026199e69d7/urllib3-1.25.10-py2.py3-none-any.whl (127kB)
Installing collected packages: urllib3, selenium
Successfully installed selenium-3.141.0 urllib3-1.25.10


In [2]:
def getDriver(url):
    chrome_options = Options()

    chrome_prefs = {}
    chrome_prefs["profile.default_content_settings"] = { "popups": 1 }

    driver = webdriver.Chrome(options=chrome_options, executable_path="./chromedriver.exe")
    driver.get(url)
    driver.maximize_window()
    
    return driver

In [3]:
fonbet = getDriver('https://www.fonbet.ru/live/')
xstavka = getDriver('https://1xstavka.ru/en/live/')

In [4]:
def extractAttr(dct: Dict, attr: str) -> List:
    res = []    
    if attr in dct:
        res.append(dct[attr])
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractAttr(x, attr))
            
    return res


def extractText(dct: Dict) -> List:
    res = []    
    if 'text' in dct:
        res.append(dct['text'].strip())
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractText(x))
    
    return res


def simplifyList(lst):
    if type(lst) == type('s'):
        return lst        
    if lst == []:
        return None
    if len(lst) == 1:
        return simplifyList(lst[0])
    
    res = []
    
    for el in lst:        
        res.append(simplifyList(el))
        
    return res


def toDict(web_elem):
    parser = DataParser()
    parser.feed(web_elem.get_attribute('innerHTML'))
    return parser.data


def getUniqueHeaders(headers_row):
    headers = []
    for el in headers_row:
        if el not in headers:
            headers.append(el)
        else:
            headers.append(el + ('*'))
    return headers


def percFork(coeff_1, coeff_2):
    return 1 - (1 / float(coeff_1) + 1 / float(coeff_2))


def checkPair(first, second):
    res = []

    for x in df_joined.loc[:, [first, second]].values:
        try:
            res.append(percFork(*x))
        except Exception as exc:
            print(exc)
            res.append(None)
            
    return res


async def parseBlock(root, features_cnt):
    text_tmp = extractText(block)
    text = simplifyList(text_tmp)

    headers_row = text[1][3] #unique for every bookmaker

    headers = getUniqueHeaders(headers_row)

    if not columns_added_flg:
        df = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    return df

In [5]:
simplifyList(['', 'g', 'd', [[[['b']], 't']]])

['', 'g', 'd', ['b', 't']]

In [6]:
fonbet_root = WebDriverWait(fonbet, 10).until(
        EC.presence_of_element_located((By.XPATH, '//div[@class="table__flex-container"]'))
    ).find_elements_by_xpath('./*')[0]

data = toDict(fonbet_root)

blocks = data['children'][0]['children']

features_cnt = 14
df_fonbet = pd.DataFrame()
columns_added_flg = False

for block in blocks:
    text_tmp = extractText(block)
    text = simplifyList(text_tmp)
    headers_row = text[0][-features_cnt:]
        
    headers = getUniqueHeaders(headers_row)
    
    if not columns_added_flg:
        df_fonbet = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True
        
    rows = text[1:]
    
    for row in rows:
        try:
            players, coeffs = row[1][0][1].split('—'), row[-features_cnt:] #костыль для игроков
            
            if len(players) == 2: #только строки, в которых ставки на основные исходы матча
                player_1_tmp, player_2_tmp = players

                player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()

                if len(headers) == len(coeffs):
                    columns = ['Player 1', 'Player 2'] + headers
                    data = [player_1, player_2] + coeffs

                    df_fonbet = df_fonbet.append(dict(zip(columns, data)), ignore_index=True)
            
        except Exception as exc:
            print(exc)

df_fonbet

Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,X2,Hcap.,1*,Hcap.*,2*,Total,O,U,Extras
0,Wuhan Zall,Chongqing Lifan,1.45,3.55,10.0,1.03,1.27,2.65,-1.5,3.55,1.5,1.28,2.5,2.6,1.47,158.0
1,Daejeon Citizen,Seoul E-Land,3.0,2.35,3.1,1.32,1.53,1.35,-1.5,8.0,1.5,1.05,1.5,2.5,1.48,132.0
2,Khaan Khuns Erchim,Anduud City,1.04,14.0,38.0,,,8.5,-3.5,1.8,3.5,1.9,4.5,1.75,1.95,38.0
3,Blue Star,Country Lions,2.5,2.55,3.45,1.25,1.45,1.47,-1.5,5.6,1.5,1.11,3.5,2.0,1.72,96.0
4,Zheltie Tulpany,Krasnie Maki,1.48,5.1,4.7,1.15,1.13,2.47,-1.5,2.03,1.5,1.7,6.5,2.15,1.62,30.0
5,Kostin (NFC) Chelsea,Amoyan (EZ1) Atletico M,,,,,,,,,,,,,,
6,Dynamo K (LaikingDast),CSKA M (DangerDim77),1.28,,,,,3.3,-1.5,2.55,1.5,1.45,2.5,2.7,1.4,19.0
7,Shakhtar D (Hrusch),Lokomotiv M (d1pseN),3.4,1.8,4.3,1.18,1.9,1.27,0.0,1.67,0.0,2.08,0.5,1.5,2.4,54.0
8,AS ROMA (DIMQAA),Porto (Specialist),2.35,3.7,2.6,1.45,1.23,1.53,0.0,1.78,0.0,1.92,2.5,1.65,2.1,
9,BORUSSIA MONCHENGLADBACH (CARLWHIZZER),WOLVERHAMPTON WANDERERS (QUAVO),1.4,4.8,6.2,1.09,1.15,2.75,-1.5,2.05,1.5,1.68,4.5,2.0,1.72,82.0


In [7]:
xstavka_root = xstavka.find_element_by_xpath('//div[@class="game_content_line on_main live-content "]/div/div/div/div')
blocks = list(map(toDict, xstavka_root.find_elements_by_xpath('.//div[@data-name="dashboard-champ-content"]')))

# data = toDict(xstavka_root)
# blocks = data['children'][0]['children']
# display(blocks.keys())
# display(blocks)

features_cnt = 18
df_xstavka = pd.DataFrame()
columns_added_flg = False

for block in blocks:
# display(block)

    text_tmp = extractText(block)
    text = simplifyList(text_tmp)
    # display(text)

    headers_row = text[1][-1]
    # display(headers_row)

    headers = getUniqueHeaders(headers_row)
    # display(headers)

    if not columns_added_flg:
        df_xstavka = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    rows = text[2:]

    for row in rows:
        # display(row)
        try:
            players_tmp = row[1][1][1][2]
    #         display(players_tmp)
            player_1_tmp, player_2_tmp = players_tmp[1][1], players_tmp[2][1]

            player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()
    #         display(player_1, player_2)

            coeffs = row[1][-1]
    #         display(coeffs)
#             display(len(headers), len(coeffs))

            if len(headers) == len(coeffs):
                columns = ['Player 1', 'Player 2'] + headers
                data = [player_1, player_2] + coeffs

                df_xstavka = df_xstavka.append(dict(zip(columns, data)), ignore_index=True)

        except Exception as exc:
            print(exc)

df_xstavka

cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
cannot reindex from a duplicate axis
c

Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,2X,O,Total,U,1*,Handicap,2*,O*,IT1,U*,O*.1,IT2,U*.1
0,Wuhan Zall,Chongqing Lifan,1.512,3.78,8.5,1.095,1.3,2.655,1.615,2,2.37,2.344,-1+,1.625,1.71,1.5,2.13,1.71,0.5,2.13
1,Erchim,Anduud City,1.04,14,25,-,1.008,9.03,1.845,5,1.96,2.1,-4+,1.73,-,-,-,-,-,-
2,Daejeon Citizen,Seoul E-Land,3.31,2.21,3.17,1.33,1.63,1.31,1.775,1,2.04,2.06,0,1.76,1.9,0.5,1.86,1.9,0.5,1.86
3,Blue Star,Up Country Lions,2.54,2.64,3.36,1.3,1.456,1.488,2.05,3.5,1.77,1.76,0,2.06,1.74,1.5,2.05,1.74,1.5,2.05
4,Beijing+,Shanghai+,-,-,-,-,-,-,1.83,7.5,1.97,-,-,-,2.13,5.5,1.72,2.13,2.5,1.72
5,Italy (3х3),Spain (3х3),1.32,8.52,4.81,1.144,1.035,3.075,1.864,12.5,1.936,1.9,-2+,1.9,1.725,7,2.11,1.725,5,2.11
6,Dmytro Myznikov,Dmitry Karpenko,9.11,-,1.05,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
7,Borussia (Amateur),Schalke (Amateur),1.06,11.6,9.87,-,1.005,6.24,1.845,8.5,1.96,1.725,-2.5+,2.115,2.08,6,1.75,2.08,3,1.75
8,Bears (blue),Falcons (white),1.48,4.28,5.34,1.14,1.18,2.49,1.78,107.5,1.99,2.37,-1.5+,1.56,2.22,54.5,1.63,2.22,53.5,1.63
9,Zenit (Amateur),Krasnodar (Amateur),-,-,-,-,-,-,1.936,11.5,1.864,-,-,-,-,-,-,-,-,-


In [8]:
df_fonbet_short = df_fonbet.iloc[:, range(8)]
df_xstavka_short = df_xstavka.iloc[:, range(8)]

df_joined = df_fonbet_short.merge(df_xstavka_short,  on='Player 1')
df_joined

Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X_x,12_x,X2,Player 2_y,1_y,X_y,2_y,1X_y,12_y,2X
0,Templeogue United,Wayside Celtic,2.95,2.75,2.65,1.42,1.4,1.35,Wayside Celtic,2.95,2.82,2.66,1.45,1.405,1.375
1,Home Farm,Malahide Un,3.05,3.95,2.0,1.72,1.2,1.33,Malahide United,3.07,4.1,2.0,1.765,1.216,1.35
2,Finn Harps,St. Patrick's Athletic,2.02,3.05,3.85,1.22,1.33,1.7,St Patrick's Athletic,2.14,3.05,3.81,1.26,1.375,1.7
3,Molde,Brann,1.5,4.1,6.3,1.1,1.22,2.5,Brann,1.55,3.94,5.91,1.115,1.23,2.37
4,Kungsangens IF,Skiljebo,,,1.02,14.0,,,Skiljebo,29.0,22.0,1.01,12.6,-,-


In [15]:
first, second = '1_x', '2X'
df_joined.loc[:, f'Fork: {first} - {second}'] = checkPair(first, second)
df_joined

Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X,12,X2,Player 2_y,1_y,X_y,2_y,O,Total,U,Fork: 1_x - 2X
0,Miami Heat,Toronto Raptors,2.7,,1.47,,,,Toronto Raptors,2.904,11,1.44,1.9,208.5,1.9,
1,Miami Heat,Toronto Raptors,2.7,,1.47,,,,Toronto Raptors,-,-,-,-,-,-,


In [8]:
df_fonbet.to_csv('fonbet_names.csv', mode='a', header=False)
df_xstavka.to_csv('xstavka_names.csv', mode='a', header=False)

In [None]:
df.to_csv('my_csv.csv', mode='a', header=False)