In [81]:
import sys
import time
import os
import asyncio
import numpy as np
import pandas as pd
import json
import traceback
from collections import defaultdict
from typing import List, Dict

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

from IPython.display import clear_output

from parsers import *
from functions import *

In [82]:
def getDriver(url):
    chrome_options = Options()

    chrome_prefs = {}
    chrome_prefs["profile.default_content_settings"] = { "popups": 1 }

    driver = webdriver.Chrome(options=chrome_options, executable_path="./chromedriver")
    driver.get(url)
    driver.maximize_window()
    
    return driver

In [89]:
fonbet = getDriver('https://www.fonbet.ru/live/')
xstavka = getDriver('https://1xstavka.ru/en/live/')

In [90]:
def extractAttr(dct: Dict, attr: str) -> List:
    res = []    
    if attr in dct:
        res.append(dct[attr])
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractAttr(x, attr))
            
    return res


def extractText(dct: Dict) -> List:
    res = []    
    if 'text' in dct:
        res.append(dct['text'].strip())
        
    if dct['children']:
        for x in dct['children']:
            res.append(extractText(x))
    
    return res


def simplifyList(lst):
    if type(lst) == type('s'):
        return lst        
    if lst == []:
        return None
    if len(lst) == 1:
        return simplifyList(lst[0])
    
    res = []
    
    for el in lst:        
        res.append(simplifyList(el))
        
    return res


def toDict(web_elem):
    parser = DataParser()
    parser.feed(web_elem.get_attribute('innerHTML'))
    return parser.data


def getUniqueHeaders(headers_row):
    headers = []
    cnt = defaultdict(int)
    
    for el in headers_row:
        headers.append(el + '*' * cnt[el])
        cnt[el] += 1
    return headers


def percFork(coeff_1, coeff_2):
    return 1 - (1 / float(coeff_1) + 1 / float(coeff_2))


def checkPair(first, second):
    res = []

    for x in df_joined.loc[:, [first, second]].values:
        try:
            res.append(percFork(*x))
        except Exception as exc:
            print(exc)
            res.append(None)
            
    return res


async def parseBlock(root, features_cnt):
    text_tmp = extractText(block)
    text = simplifyList(text_tmp)

    headers_row = text[1][3] #unique for every bookmaker

    headers = getUniqueHeaders(headers_row)

    if not columns_added_flg:
        df = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    return df

In [91]:
simplifyList(['', 'g', 'd', [[[['b']], 't']]])

['', 'g', 'd', ['b', 't']]

In [92]:
fonbet_root = WebDriverWait(fonbet, 10).until(
        EC.presence_of_element_located((By.XPATH, '//div[@class="table__flex-container"]'))
    ).find_elements_by_xpath('./*')[0]

data = toDict(fonbet_root)

blocks = data['children'][0]['children']

features_cnt = 14
df_fonbet = pd.DataFrame()
columns_added_flg = False

for block in blocks:
    try:
        text_tmp = extractText(block)
        text = simplifyList(text_tmp)
        headers_row = text[0][-features_cnt:]

        headers = getUniqueHeaders(headers_row)

        if not columns_added_flg:
            df_fonbet = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
            columns_added_flg = True

        rows = text[1:]

        for row in rows:
            try:
                players, coeffs = row[1][0][1].split('—'), row[-features_cnt:] #костыль для игроков

                if len(players) == 2: #только строки, в которых ставки на основные исходы матча
                    player_1_tmp, player_2_tmp = players

                    player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()

                    if len(headers) == len(coeffs):
                        columns = ['Player 1', 'Player 2'] + headers
                        data = [player_1, player_2] + coeffs

                        df_fonbet = df_fonbet.append(dict(zip(columns, data)), ignore_index=True)

            except Exception as exc:
                print(exc)
    except Exception as exc:
                print(exc)

df_fonbet

unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'
unhashable type: 'list'


Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,X2,Hcap.,1*,Hcap.*,2*,Total,O,U,Extras
0,Akhmat,CSKA Moscow,12.00,4.90,1.30,3.55,1.18,1.03,+1.5,1.82,-1.5,1.98,3.5,2.15,1.70,+219
1,Rubin Kazan,Tambov FC,1.55,3.75,7.30,1.09,1.27,2.45,-1.5,2.70,+1.5,1.45,1.5,1.53,2.50,+252
2,Beerschot-Wilrijk,Standard Liege,12.00,4.60,1.30,3.35,1.18,1.02,+1.5,1.43,-1.5,2.70,2.5,2.65,1.45,+138
3,Zbrojovka Brno,Bohemians Praha 1905,2.85,2.50,3.20,1.33,1.50,1.40,-1.5,7.40,+1.5,1.07,1.5,1.95,1.80,+171
4,Pardubice,Teplice,1.55,3.50,7.30,1.08,1.28,2.35,-1.5,3.70,+1.5,1.25,2.5,2.50,1.50,+126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,forZe,Dignitas,1.67,,2.07,,,,,,,,,,,
174,Alliance,FlyToMoon,,,,,,,,,,,,,,
175,Natus Vincere,MnM,,,,,,,,,,,,,,
176,Карта 3. Natus Vincere,MnM,3.45,,1.25,,,,,,,,,,,+2


In [93]:
xstavka_root = xstavka.find_element_by_xpath('//div[@class="game_content_line on_main live-content "]/div/div/div/div')
blocks = list(map(toDict, xstavka_root.find_elements_by_xpath('.//div[@data-name="dashboard-champ-content"]')))

# data = toDict(xstavka_root)
# blocks = data['children'][0]['children']
# display(blocks.keys())
# display(blocks)

features_cnt = 18
df_xstavka = pd.DataFrame()
columns_added_flg = False

for block in blocks:
# display(block)

    text_tmp = extractText(block)
    text = simplifyList(text_tmp)
    # display(text)

    headers_row = text[1][-1]
    # display(headers_row)

    headers = getUniqueHeaders(headers_row)
    # display(headers)

    if not columns_added_flg:
        df_xstavka = pd.DataFrame(columns=['Player 1', 'Player 2'] + headers)
        columns_added_flg = True

    rows = text[2:]

    for row in rows:
        # display(row)
        try:
            players_tmp = row[1][1][1][2]
    #         display(players_tmp)
            player_1_tmp, player_2_tmp = players_tmp[1][1], players_tmp[2][1]

            player_1, player_2 = player_1_tmp.strip(), player_2_tmp.strip()
    #         display(player_1, player_2)

            coeffs = row[1][-1]
    #         display(coeffs)
#             display(len(headers), len(coeffs))

            if len(headers) == len(coeffs):
                columns = ['Player 1', 'Player 2'] + headers
                data = [player_1, player_2] + coeffs

                df_xstavka = df_xstavka.append(dict(zip(columns, data)), ignore_index=True)

        except Exception as exc:
            print(exc)

df_xstavka

Unnamed: 0,Player 1,Player 2,1,X,2,1X,12,2X,O,Total,...,IT1,U*,O**,IT2,U**,1**,2**,set,-,X*
0,Akhmat,CSKA Moscow,12,4.98,1.315,3.59,1.2,1.055,2.09,3.5,...,1,1.69,2.07,2.5,1.76,,,,,
1,Rubin,Tambov,1.525,3.84,7.8,1.104,1.29,2.616,1.91,2,...,1.5,1.83,-,-,-,,,,,
2,Celtic,Motherwell,1.06,9.5,51,-,1.08,8.32,2.25,2.5,...,2,1.93,3.42,0.5,1.32,,,,,
3,FCO Beerschot,Standard Liege,13.5,5,1.3,3.71,1.2,1.045,1.59,2,...,0.5,1.84,1.73,1.5,2.1,,,,,
4,Sao Paolo,Corinthians Paulista,2.696,2.1,5.1,1.19,1.78,1.5,2.216,3,...,1.5,1.88,2.84,1.5,1.43,,,,,
5,Veles Moscow,Torpedo Moscow,25,25,-,-,-,-,1.736,4,...,0.5,1.39,1.7,3.5,2.1,,,,,
6,Dynamo Bryansk,Irtysh Omsk,1.21,5.56,15.2,-,1.13,4.1,2.184,2,...,1.5,1.73,2.69,0.5,1.45,,,,,
7,Nizhniy Novgorod,Tekstilshchik Ivanovo,2.39,2.01,6.53,1.096,1.755,1.544,2.34,3,...,1.5,1.89,3.64,1.5,1.27,,,,,
8,Bidvest Wits,Kaizer Chiefs,6.51,1.39,5.14,1.15,2.88,1.096,2.256,0.5,...,0.5,1.21,3.66,0.5,1.27,,,,,
9,Zdenek Kolar(Q),Michael Vrbensky(Q),2.68,-,1.47,,,,1.88,28.5,...,13.5,1.78,1.83,13.5,1.87,-,-,-,,


In [8]:
df_fonbet_short = df_fonbet.iloc[:, range(8)]
df_xstavka_short = df_xstavka.iloc[:, range(8)]

df_joined = df_fonbet_short.merge(df_xstavka_short,  on='Player 1')
df_joined

Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X,12,X2,Player 2_y,1_y,X_y,2_y,O,Total,U


In [9]:
first, second = '1_x', '2X'
df_joined.loc[:, f'Fork: {first} - {second}'] = checkPair(first, second)
df_joined

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,Player 1,Player 2_x,1_x,X_x,2_x,1X,12,X2,Player 2_y,1_y,X_y,2_y,O,Total,U,Fork: 1_x - 2X


In [94]:
df_fonbet.to_csv('fonbet_names47.csv', header=False)
df_xstavka.to_csv('xstavka_names47.csv', header=False)