In [1]:
import requests
import datetime
import json
import re
from tqdm import trange, tqdm

In [2]:
base_url = 'http://bet.hkjc.com'

In [3]:
def parse_challenge(page):
    """
    Parse a challenge given by mmi and mavat's web servers, forcing us to solve
    some math stuff and send the result as a header to actually get the page.
    This logic is pretty much copied from https://github.com/R3dy/jigsaw-rails/blob/master/lib/breakbot.rb
    """
    top = page.split('<script>')[1].split('\n')
    challenge = top[1].split(';')[0].split('=')[1]
    challenge_id = top[2].split(';')[0].split('=')[1]
    return {'challenge': challenge, 'challenge_id': challenge_id, 'challenge_result': get_challenge_answer(challenge)}


def get_challenge_answer(challenge):
    """
    Solve the math part of the challenge and get the result
    """
    arr = list(challenge)
    last_digit = int(arr[-1])
    arr.sort()
    min_digit = int(arr[0])
    subvar1 = (2 * int(arr[2])) + int(arr[1])
    subvar2 = str(2 * int(arr[2])) + arr[1]
    power = ((int(arr[0]) * 1) + 2) ** int(arr[1])
    x = (int(challenge) * 3 + subvar1)
    y = cos(pi * subvar1)
    answer = x * y
    answer -= power
    answer += (min_digit - last_digit)
    answer = str(int(floor(answer))) + subvar2
    return answer

In [18]:
class Team():
    
    id = ''
    team_name=''
    
    def __init__(self, id, team_name):
        
        self.id = id
        self.team_name = team_name
        
    def __str__(self):
        
        return f'{self.team_name} [{self.id}]'
    
class League():
    
    id = ''
    short_name = ''
    name = ''
    
    def __init__(self, id, short_name, name):
        
        self.id = id
        self.short_name = short_name
        self.name = name
    
class Match():
    
    date = None
    time = None
    id = None
    num = None
    short_id = None
    
    home_team = None
    away_team = None
    league = None
    events = None
    odds = {}
    
    def __init__(self, m):
        
        self.league = League(
                m['league']['leagueID'], 
                m['league']['leagueShortName'], 
                m['league']['leagueNameEN']
            ),
        self.home_team = Team(
                m['homeTeam']['teamID'],
                m['homeTeam']['teamNameEN']
            ),
        self.away_team = Team(
                m['awayTeam']['teamID'],
                m['awayTeam']['teamNameEN']
            )

        self.events = m['liveEvent']['liveevent']
        self.id = m['matchID']
        self.num = m['matchNum']
        self.date = m['matchDate']
        self.time = m['matchTime']
        self.short_id = m['matchIDinofficial']
        
        for o in [
            'HAD', 'TQL', 'FHA', 'HHA', 
            'HDC', 'HIL', 'FHL', 'CHL', 
            'SPC', 'CRS', 'FCS', 'FTS', 
            'TTG', 'OOE', 'FGS', 'HFT'
        ]:

            if o in m:
                self.odds[o] = m[f'{o.lower()}odds']
            else:
                self.odds[o] = None

In [19]:
matches = []

session = requests.Session()
r = session.get(base_url)
yum = r.cookies

odds_types = [
    'HAD', 'TQL', 'FHA', 'HHA', 
    'HDC', 'HIL', 'FHL', 'CHL', 
    'SPC', 'CRS', 'FCS', 'FTS', 
    'TTG', 'OOE', 'FGS', 'HFT'
]
    
for odds_name, odds_type_code in odds_type['football'].items():
    
    odds_url = f'{base_url}/football/getJSON.aspx?jsontype={odds_type_code}.aspx'
    result = session.post(
        odds_url,
        headers=dict(referer=base_url),
        cookies=yum
    )
    
    response = json.loads(result.text)
    
    for j in tqdm(response):

        if j['definedPools'] == []:
            
            odds_url = \
                f'{base_url}/football/getJSON.aspx?jsontype={odds_type_code}.aspx'\
                f'&matchid={j["matchID"]}'
            result = session.post(
                odds_url,
                headers=dict(referer=base_url),
                cookies=yum
            )
            match = Match(next(
                item for item in json.loads(result.text) if item["matchID"] == j["matchID"]
            ))
            
        else:
            
            match = Match(j)

100%|██████████| 98/98 [00:51<00:00,  1.91it/s]


In [36]:
for odds_name, odds_type_code in odds_type['football'].items():
    
    print(f'{base_url}/football/getJSON.aspx?jsontype={odds_type_code}')

http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_had
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_tql
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_fha
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_hha
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_hdc
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_hil
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_fhl
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_chl
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_spc
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_crs
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_fcs
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_fts
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_ttg
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_ooe
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_fgs
http://bet.hkjc.com/football/getJSON.aspx?jsontype=odds_HFT


In [5]:
from datetime import datetime, timedelta

In [16]:
datetime.strftime(datetime(2018, 2, 1) + timedelta(days=-31), '%Y%m%d')

'20180101'

In [18]:
datetime.strftime(datetime.today(), '%Y%m%d')

'20190303'

In [20]:
datetime.strftime(datetime.today() + timedelta(days=-30), '%Y%m%d')

'20190201'

In [20]:
from tqdm import trange
from tqdm import tqdm_notebook as tqdm

In [21]:
enddate =  datetime.today()

session = requests.Session()
r = session.get(base_url)
yum = r.cookies

leagues = {}
matches = {}

for i in range(48):
    
    print('Start:', datetime.strftime(enddate + timedelta(days=-30), "%Y%m%d"))
    print('End:', datetime.strftime(enddate, "%Y%m%d"))

    search_url = 'https://bet.hkjc.com/football/getJSON.aspx?jsontype=search_result.aspx&startdate='\
        f'{datetime.strftime(enddate + timedelta(days=-30), "%Y%m%d")}&'\
        f'enddate={datetime.strftime(enddate, "%Y%m%d")}&teamid=default'
    
    result = session.post(
        search_url,
        headers=dict(referer=base_url),
        cookies=yum
    )
    
    response = json.loads(result.text)[0]
    page_count = int(int(response['matchescount'])/20)
    
    for m in response['matches']:
        
        if m['league']['leagueShortName'] not in leagues:
            
            leagues[m['league']['leagueShortName']] = [m['league']['leagueID'], m['league']['leagueNameEN']]
            matches[m['league']['leagueShortName']] = m
            
    for j in tqdm(range(page_count), desc='pages'):
#     sleep(0.01)
#     tqdm.write(f"Done task {i}")
    
#     for j in trange(page_count, desc='pages'):
        
        search_url = 'https://bet.hkjc.com/football/getJSON.aspx?jsontype=search_result.aspx&startdate='\
            f'{datetime.strftime(enddate + timedelta(days=-30), "%Y%m%d")}&'\
            f'enddate={datetime.strftime(enddate, "%Y%m%d")}&teamid=default&pageno={j + 2}'

        result = session.post(
            search_url,
            headers=dict(referer=base_url),
            cookies=yum
        )

        response = json.loads(result.text)[0]

        for m in response['matches']:

            if m['league']['leagueShortName'] not in leagues:

                leagues[m['league']['leagueShortName']] = [m['league']['leagueID'], m['league']['leagueNameEN']]
                matches[m['league']['leagueShortName']] = m
                
    
    enddate += timedelta(days=-31)

Start: 20190201
End: 20190303


HBox(children=(IntProgress(value=0, description='pages', max=51), HTML(value='')))







Exception ignored in: <bound method tqdm.__del__ of pages:  76%|███████▋  | 39/51 [05:16<01:37,  8.11s/it]>
Traceback (most recent call last):
  File "c:\users\admin\appdata\local\programs\python\python36\lib\site-packages\tqdm\_tqdm.py", line 882, in __del__
    self.close()
  File "c:\users\admin\appdata\local\programs\python\python36\lib\site-packages\tqdm\_tqdm.py", line 1087, in close
    self._decr_instances(self)
  File "c:\users\admin\appdata\local\programs\python\python36\lib\site-packages\tqdm\_tqdm.py", line 452, in _decr_instances
    cls.monitor.exit()
  File "c:\users\admin\appdata\local\programs\python\python36\lib\site-packages\tqdm\_monitor.py", line 50, in exit
    self.join()
  File "c:\users\admin\appdata\local\programs\python\python36\lib\threading.py", line 1053, in join
    raise RuntimeError("cannot join current thread")
RuntimeError: cannot join current thread



Start: 20190101
End: 20190131


HBox(children=(IntProgress(value=0, description='pages', max=36), HTML(value='')))


Start: 20181201
End: 20181231


HBox(children=(IntProgress(value=0, description='pages', max=45), HTML(value='')))

IndexError: list index out of range

In [25]:
{x: [leagues[x][0], f"QtWidgets.QCheckBox('{leagues[x][1]}')'"] for x in leagues}

{'ACL': ['90', "QtWidgets.QCheckBox('Asian Champions League')'"],
 'AD1': ['89', "QtWidgets.QCheckBox('Australian Division 1')'"],
 'AGC': ['117', "QtWidgets.QCheckBox('Argentine Cup')'"],
 'APL': ['94', "QtWidgets.QCheckBox('Argentine Division 1')'"],
 'ASC': ['47', "QtWidgets.QCheckBox('Asian Cup')'"],
 'BD1': ['56', "QtWidgets.QCheckBox('Brazilian Division 1')'"],
 'BFC': ['38', "QtWidgets.QCheckBox('Belgian Cup')'"],
 'BFL': ['33', "QtWidgets.QCheckBox('Belgian Division 1')'"],
 'BPC': ['118', "QtWidgets.QCheckBox('Brazilian Paulista League')'"],
 'CD1': ['123', "QtWidgets.QCheckBox('Chilean Division 1')'"],
 'CLB': ['72', "QtWidgets.QCheckBox('Club Matches')'"],
 'CNC': ['100', "QtWidgets.QCheckBox('Central and North American Cup')'"],
 'CWP': ['67', "QtWidgets.QCheckBox('Club World Cup')'"],
 'DAC': ['39', "QtWidgets.QCheckBox('Dutch Cup')'"],
 'DF2': ['104', "QtWidgets.QCheckBox('Dutch Division 2')'"],
 'DFL': ['16', "QtWidgets.QCheckBox('Dutch Division 1')'"],
 'ED1': ['29', "Q

In [24]:
matches['ELT']

{'Cur': '1',
 'accumulatedscore': [{'away': '0',
   'home': '0',
   'periodstatus': 'ResultFinal',
   'periodvalue': 'FirstHalf'},
  {'away': '3',
   'home': '0',
   'periodstatus': 'ResultFinal',
   'periodvalue': 'SecondHalf'}],
 'awayTeam': {'teamID': '16',
  'teamNameCH': '樸茨茅夫',
  'teamNameEN': 'Portsmouth'},
 'cornerresult': '',
 'coupon': {'couponID': '4',
  'couponNameCH': '周二賽事',
  'couponNameEN': 'Tuesday Matches',
  'couponShortName': 'TUE'},
 'crsodds': {'ALLUP': 'true',
  'Cur': '1',
  'ID': 'a8c85e32-0dda-4483-8e1a-156415e3c829',
  'INPLAY': 'false',
  'POOLSTATUS': 'Payout',
  'S0000': '100@9.50',
  'S0001': '100@8.25',
  'S0002': '100@13.00',
  'S0003': '100@28.00',
  'S0004': '100@60.00',
  'S0005': '100@200.0',
  'S0100': '100@8.00',
  'S0101': '100@6.20',
  'S0102': '100@8.50',
  'S0103': '100@20.00',
  'S0104': '100@50.00',
  'S0105': '100@150.0',
  'S0200': '100@12.50',
  'S0201': '100@8.25',
  'S0202': '100@11.50',
  'S0203': '100@23.00',
  'S0204': '100@60.00',
 