In [1]:
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import requests
import bs4
from bs4 import BeautifulSoup

In [17]:
### Configs / Constants ###
FD_DRAW_LIST_URL = 'http://www.singaporepools.com.sg/DataFileArchive/Lottery/Output/fourd_result_draw_list_en.html'
FD_RESULT_URL = 'http://www.singaporepools.com.sg/en/product/Pages/4d_results.aspx?sppl='
TOTO_DRAW_LIST_URL = 'http://www.singaporepools.com.sg/DataFileArchive/Lottery/Output/toto_result_draw_list_en.html'
TOTO_RESULT_URL = 'http://www.singaporepools.com.sg/en/product/sr/Pages/toto_results.aspx?sppl='

PARSER_NAME = 'html.parser'

SPPL_ATTR = 'querystring'
SPPL_TAG = 'option'

DT_FORMAT = '%d %b %Y'
DRAW_DATE_CLASS = 'drawDate'

FD_FIRST_PRIZE_CLASS = 'tdFirstPrize'
FD_SECOND_PRIZE_CLASS = 'tdSecondPrize'
FD_THIRD_PRIZE_CLASS = 'tdThirdPrize'
FD_STARTER_PRIZE_CLASS = 'tbodyStarterPrizes'
FD_CONSOLATION_PRIZE_CLASS = 'tbodyConsolationPrizes'

FD_STARTER_PRIZE_CSS_SEL = ' '.join(['.' + FD_STARTER_PRIZE_CLASS, 'td'])
FD_CONSOLAION_PRIZE_CSS_SEL = ' '.join(['.' + FD_CONSOLATION_PRIZE_CLASS, 'td'])

FD_LAST_N_DRAWS = 10

TOTO_OUTLETS = 'divWinningOutlets'

In [4]:
### Get 4D Draw List ###
fd_draw_list_page = requests.get(FD_DRAW_LIST_URL)
fd_draw_list_soup = BeautifulSoup(fd_draw_list_page.content, PARSER_NAME)
fd_sppl_ids = [draw.get(SPPL_ATTR).rpartition('=')[2] for draw in fd_draw_list_soup.find_all(SPPL_TAG)]

### Iterate through 4D Draw List to Consolidate 4D Results ###
fd_result_list = []
i = 0
for fd_sppl_id in fd_sppl_ids:
    i += 1
    if i > FD_LAST_N_DRAWS:
        break

    fd_result_page = requests.get(FD_RESULT_URL + fd_sppl_id)
    fd_result_soup = BeautifulSoup(fd_result_page.content, PARSER_NAME)
    
    fd_result_dt = datetime.strptime(fd_result_soup.find_all(class_=DRAW_DATE_CLASS)[0].get_text().rpartition(', ')[2], DT_FORMAT)
    fd_result_first_prize = fd_result_soup.find_all(class_=FD_FIRST_PRIZE_CLASS)[0].get_text()
    fd_result_second_prize = fd_result_soup.find_all(class_=FD_SECOND_PRIZE_CLASS)[0].get_text()
    fd_result_third_prize = fd_result_soup.find_all(class_=FD_THIRD_PRIZE_CLASS)[0].get_text()
    fd_result_starter_prize_list = [fd_prize_num.get_text() for fd_prize_num in fd_result_soup.select(FD_STARTER_PRIZE_CSS_SEL)]
    fd_result_consolation_prize_list = [fd_prize_num.get_text() for fd_prize_num in fd_result_soup.select(FD_CONSOLAION_PRIZE_CSS_SEL)]
    
    fd_result_list.append([fd_result_dt, fd_result_first_prize, FD_FIRST_PRIZE_CLASS])
    fd_result_list.append([fd_result_dt, fd_result_second_prize, FD_SECOND_PRIZE_CLASS])
    fd_result_list.append([fd_result_dt, fd_result_third_prize, FD_THIRD_PRIZE_CLASS])
    for fd_prize_num in fd_result_starter_prize_list:
        fd_result_list.append([fd_result_dt, fd_prize_num, FD_STARTER_PRIZE_CLASS])
    for fd_prize_num in fd_result_consolation_prize_list:
        fd_result_list.append([fd_result_dt, fd_prize_num, FD_CONSOLATION_PRIZE_CLASS])
        
### Present Findings in pd DataFrame ###
fd_result_df = pd.DataFrame(np.array(fd_result_list), columns=['Date', 'Prize Number', 'Prize Type'])
fd_result_df.set_index('Date', inplace=True)
fd_result_df.groupby(['Prize Number']).count().sort_values('Prize Type', ascending=False).head()

Unnamed: 0_level_0,Prize Type
Prize Number,Unnamed: 1_level_1
4641,2
3902,2
2000,2
6783,2
871,2


In [33]:
### Get Toto Draw List ###
toto_draw_list_page = requests.get(TOTO_DRAW_LIST_URL)
toto_draw_list_soup = BeautifulSoup(toto_draw_list_page.content, PARSER_NAME)
toto_sppl_ids = [draw.get(SPPL_ATTR).rpartition('=')[2] for draw in toto_draw_list_soup.find_all(SPPL_TAG)]


In [34]:
for i in range(0, 5):
    toto_result_page = requests.get(TOTO_RESULT_URL + toto_sppl_ids[i])
    toto_result_soup = BeautifulSoup(toto_result_page.content, PARSER_NAME)
    toto_result_dt = datetime.strptime(toto_result_soup.find_all(class_=DRAW_DATE_CLASS)[0].get_text().rpartition(', ')[2], DT_FORMAT)
    winning_outlets = toto_result_soup.select('.divWinningOutlets ul')
    if len(winning_outlets) != 0:
        print(winning_outlets)

[<ul><li>
                                                    Nalayanee Trading Enterprise - 127 Upp Paya Lebar Rd ( 1 QuickPick System 7 Entry )
                                            </li>
<li>
                                                    Singapore Pools Clementi N7 Branch - Blk 722 Clementi West St 2 #01-164 ( 1 QuickPick Ordinary Entry )
                                            </li>
</ul>, <ul><li>
                                                    Livewire (Resorts World Sentosa) - 26 Sentosa Gateway #B1-208 ( 1 QuickPick System 7 Entry )
                                            </li>
<li>
                                                    Singapore Pools Holland Drive Branch - Blk 46 Holland Dr #01-371/373 ( 1 QuickPick System 7 Entry )
                                            </li>
<li>
                                                    Cheers Hougang Central - Blk 810 Hougang Central #01-214 ( 1 QuickPick Ordinary Entry )
                               