## Scraping Data from Hong Kong Jockey Club Website

In [33]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

# Function to format the date properly
def format_date(date_str):
    date_obj = datetime.strptime(date_str, "%Y/%m/%d")
    return date_obj.strftime("%d/%m/%Y")

# Function to scrape a single race
def scrape_race(date, race_number):
    global headers_set, headers
    formatted_date = format_date(date)
    url = f"https://racing.hkjc.com/racing/information/English/Racing/LocalResults.aspx?RaceDate={date}&Racecourse=HV&RaceNo={race_number}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    race_table = soup.find('table', class_='f_tac table_bd draggable')

    if not race_table:
        print(f"Table not found for race {race_number} on {formatted_date}")
        return

    if not headers_set: 
        headers = ['Date', 'Race Number'] + [header.get_text(strip=True) for header in race_table.find('tr').find_all('td')]
        headers_set = True

    rows = race_table.find('tbody').find_all('tr')
    
    for row in rows:
        cols = row.find_all('td')
        col_data = [formatted_date, race_number] + [col.get_text(strip=True) for col in cols]
        all_races_data.append(col_data)

# Dates and the number of races on each date
race_dates = [
    ("2024/04/24", 9),
    ("2024/04/17", 9),
    ("2024/04/10", 9),
    ("2024/03/27", 9),
    ("2024/03/20", 9),
    ("2024/03/13", 9),
    ("2024/03/06", 9),
    ("2024/02/28", 9),
    ("2024/02/21", 9),
    ("2024/02/15", 9),
    ("2024/02/07", 9),
    ("2024/01/31", 9),
    ("2024/01/17", 8),
    ("2024/01/10", 8),
    ("2024/01/04", 8),
    ("2023/12/29", 8),
    ("2023/12/20", 8),
    ("2023/12/13", 9),
    ("2023/12/06", 9),
    ("2023/11/29", 8)
]

# Loop through each date and race number
all_races_data = []
headers = []
headers_set = False

for date, num_races in race_dates:
    for race_number in range(1, num_races + 1):
        scrape_race(date, race_number)

# Check if any data was scraped before attempting to create DataFrame
if all_races_data and headers_set:
    df = pd.DataFrame(all_races_data, columns=headers)
    df.to_csv('racing_results.csv', index=False)
    print(df.head())  # Print the first few rows to verify
else:
    print("No data was scraped, or headers were not found.")


Table not found for race 8 on 13/03/2024
Table not found for race 2 on 28/02/2024
Table not found for race 5 on 28/02/2024
Table not found for race 5 on 21/02/2024
Table not found for race 6 on 21/02/2024
Table not found for race 3 on 17/01/2024
Table not found for race 4 on 17/01/2024
Table not found for race 5 on 17/01/2024
Table not found for race 6 on 10/01/2024
Table not found for race 4 on 29/12/2023
Table not found for race 4 on 20/12/2023
Table not found for race 6 on 20/12/2023
Table not found for race 9 on 06/12/2023
         Date  Race Number Pla. Horse No.                   Horse      Jockey  \
0  24/04/2024            1    1         1      BEAUTY WAVES(H294)    Z Purton   
1  24/04/2024            1    2         7  YOUTHFUL SPIRITS(J082)  L Ferraris   
2  24/04/2024            1    3         5     SUPER FORTUNE(G065)    M F Poon   
3  24/04/2024            1    4         4      HARMONY FIRE(E175)  E C W Wong   
4  24/04/2024            1    5         2     HEROIC MASTER(G3

In [34]:
num_rows = df.shape[0]
print(f"The number of rows in the DataFrame is: {num_rows}")


The number of rows in the DataFrame is: 1855


In [35]:
df.head()

Unnamed: 0,Date,Race Number,Pla.,Horse No.,Horse,Jockey,Trainer,Act. Wt.,Declar. Horse Wt.,Dr.,LBW,RunningPosition,Finish Time,Win Odds
0,24/04/2024,1,1,1,BEAUTY WAVES(H294),Z Purton,P C Ng,130,1163,7,-,331,0:56.62,2.0
1,24/04/2024,1,2,7,YOUTHFUL SPIRITS(J082),L Ferraris,M Newnham,120,1060,3,HD,212,0:56.64,9.2
2,24/04/2024,1,3,5,SUPER FORTUNE(G065),M F Poon,A S Cruz,122,1206,5,1-3/4,123,0:56.91,27.0
3,24/04/2024,1,4,4,HARMONY FIRE(E175),E C W Wong,D J Whyte,115,1118,6,2-1/4,454,0:56.96,3.4
4,24/04/2024,1,5,2,HEROIC MASTER(G394),H Bowman,F C Lor,129,1062,4,2-3/4,565,0:57.04,7.2


# Handling Issues with Website Scraping with Manual Entry

### Table not found for race 8 on 13/03/2024

In [44]:
import pandas as pd

# Define the data
data = {
    "Date": ["13/03/2024"] * 12,
    "Race Number": [8] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [2, 8, 9, 11, 1, 12, 10, 6, 4, 7, 3, 5],
    "Horse": ["MOMENTS IN TIME (H474)", "MAJESTIC VICTORY (E441)", "INTREPID WINNER (G144)", "KIMBERLEY (H329)",
              "BERLIN TANGO (E120)", "GALLANT HERO (E480)", "ESCAPE ROUTE (E273)", "LOOKING GREAT (E053)",
              "TURIN MASCOT (H165)", "M UNICORN (G466)", "PRIME MINISTER (D288)", "TRUE GENIUS (J073)"],
    "Jockey": ["E C W Wong", "Y L Chung", "K De Melo", "A Hamelin", "Z Purton", "A Badel", "H Bentley", "M Chadwick",
               "K Teetan", "L Ferraris", "M F Poon", "B Avdulla"],
    "Trainer": ["C S Shum", "A S Cruz", "C W Chang", "J Richards", "A S Cruz", "D J Hall", "J Size", "F C Lor",
                "P C Ng", "C Fownes", "P F Yiu", "A S Cruz"],
    "Act. Wt.": [125, 118, 122, 117, 135, 115, 119, 127, 130, 123, 132, 129],
    "Declar. Horse Wt.": [1065, 1038, 1053, 1093, 1065, 1102, 1076, 1132, 1126, 1196, 1163, 1066],
    "Dr.": [4, 6, 7, 1, 3, 9, 8, 2, 11, 5, 12, 10],
    "LBW": ["-", "2-1/4", "3-1/4", "3-1/2", "3-1/2", "3-3/4", "4", "4-1/4", "5", "5-1/2", "7-1/4", "7-1/2"],
    "RunningPosition": ["2 2 2 2 1", "3 3 4 3 2", "11 10 10 10 3", "4 4 3 4 4", "8 8 7 7 5", "7 7 8 8 6",
                        "9 9 9 9 7", "5 6 5 5 8", "10 11 11 11 9", "6 5 6 6 10", "12 12 12 12 11", "1 1 1 1 12"],
    "Finish Time": ["1:50.20", "1:50.57", "1:50.73", "1:50.76", "1:50.77", "1:50.82", "1:50.83", "1:50.88",
                    "1:51.00", "1:51.09", "1:51.37", "1:51.40"],
    "Win Odds": [7.3, 15, 15, 11, 8.6, 21, 5.7, 48, 6.6, 3.3, 35, 46]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race8_13032024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                    Horse  \
0  13/03/2024            8     1          2   MOMENTS IN TIME (H474)   
1  13/03/2024            8     2          8  MAJESTIC VICTORY (E441)   
2  13/03/2024            8     3          9   INTREPID WINNER (G144)   
3  13/03/2024            8     4         11         KIMBERLEY (H329)   
4  13/03/2024            8     5          1      BERLIN TANGO (E120)   

       Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0  E C W Wong    C S Shum       125               1065    4      -   
1   Y L Chung    A S Cruz       118               1038    6  2-1/4   
2   K De Melo   C W Chang       122               1053    7  3-1/4   
3   A Hamelin  J Richards       117               1093    1  3-1/2   
4    Z Purton    A S Cruz       135               1065    3  3-1/2   

  RunningPosition Finish Time  Win Odds  
0       2 2 2 2 1     1:50.20       7.3  
1       3 3 4 3 2     

### Table not found for race 2 on 28/02/2024

In [46]:
import pandas as pd

# Define the data
data = {
    "Date": ["28/02/2024"] * 12,
    "Race Number": [2] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [6, 2, 9, 7, 11, 5, 4, 12, 10, 3, 1, 8],
    "Horse": ["TACTICAL COMMAND (H197)", "MY INTELLIGENT (G031)", "MILLENNIUM FALCON (D281)", "DOUBLE SHOW (G250)",
              "FORTUNE MASTER (D371)", "HERO ICON (G422)", "MODEST GENTLEMAN (G352)", "NOIR RIDER (E138)",
              "EASY TWO (G423)", "FIRE BALL (D087)", "CHIU CHOW BROTHER (E188)", "PRECISION GOAL (H016)"],
    "Jockey": ["L Ferraris", "H Bowman", "H Bentley", "K Teetan", "A Hamelin", "M Chadwick",
               "A Atzeni", "M L Yeung", "K C Leung", "B Thompson", "A Badel", "C L Chau"],
    "Trainer": ["M Newnham", "W Y So", "T P Yung", "D J Whyte", "J Richards", "W K Mo",
                "K W Lui", "C W Chang", "K H Ting", "C H Yip", "K L Man", "D A Hayes"],
    "Act. Wt.": [132, 135, 124, 129, 121, 133, 134, 115, 123, 134, 135, 126],
    "Declar. Horse Wt.": [1059, 1107, 1169, 1226, 1090, 1212, 1053, 1183, 1061, 1049, 1075, 1089],
    "Dr.": [1, 10, 4, 9, 6, 8, 3, 2, 12, 7, 5, 11],
    "LBW": ["-", "N", "1-1/4", "3", "3-1/2", "3-1/2", "4", "4-3/4", "5", "5-1/2", "8-1/2", "11"],
    "RunningPosition": ["6 6 1", "2 2 2", "3 3 3", "11 11 4", "8 10 5", "10 9 6", "5 4 7", "9 8 8",
                        "12 12 9", "7 7 10", "1 1 11", "4 5 12"],
    "Finish Time": ["1:11.40", "1:11.45", "1:11.61", "1:11.87", "1:11.94", "1:11.95", "1:12.05", "1:12.17",
                    "1:12.21", "1:12.28", "1:12.74", "1:13.17"],
    "Win Odds": [5.9, 9.4, 6, 10, 16, 26, 7.1, 11, 45, 9.8, 8.6, 9.5]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race2_28022024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                     Horse  \
0  28/02/2024            2     1          6   TACTICAL COMMAND (H197)   
1  28/02/2024            2     2          2     MY INTELLIGENT (G031)   
2  28/02/2024            2     3          9  MILLENNIUM FALCON (D281)   
3  28/02/2024            2     4          7        DOUBLE SHOW (G250)   
4  28/02/2024            2     5         11     FORTUNE MASTER (D371)   

       Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0  L Ferraris   M Newnham       132               1059    1      -   
1    H Bowman      W Y So       135               1107   10      N   
2   H Bentley    T P Yung       124               1169    4  1-1/4   
3    K Teetan   D J Whyte       129               1226    9      3   
4   A Hamelin  J Richards       121               1090    6  3-1/2   

  RunningPosition Finish Time  Win Odds  
0           6 6 1     1:11.40       5.9  
1           2 2 

### Table not found for race 5 on 28/02/2024


In [47]:
import pandas as pd

# Define the data
data = {
    "Date": ["28/02/2024"] * 9,
    "Race Number": [5] * 9,
    "Pla.": list(range(1, 10)),
    "Horse No.": [5, 2, 6, 1, 7, 9, 3, 8, 4],
    "Horse": [
        "RAGING BLIZZARD (H368)", "POWER KOEPP (E301)", "WINGS OF WAR (H162)", "SUPERB CAPITALIST (H092)",
        "SING DRAGON (J141)", "STAR LINE (J110)", "HEALTHY HEALTHY (H203)", "CHAMPION METHOD (H366)",
        "ALL IS GOOD (G269)"
    ],
    "Jockey": [
        "Z Purton", "H Bowman", "H Bentley", "Y L Chung", "L Ferraris", "K De Melo",
        "A Badel", "K C Leung", "A Atzeni"
    ],
    "Trainer": [
        "J Size", "K L Man", "D J Whyte", "A S Cruz", "M Newnham", "D J Hall",
        "P C Ng", "C S Shum", "K W Lui"
    ],
    "Act. Wt.": [126, 129, 124, 130, 121, 118, 127, 119, 126],
    "Declar. Horse Wt.": [1094, 1107, 1158, 1177, 1185, 1192, 1155, 1128, 1194],
    "Dr.": [3, 1, 9, 4, 8, 2, 5, 7, 6],
    "LBW": ["-", "2-1/2", "3-1/4", "3-1/4", "3-3/4", "5", "5-1/4", "9", "12-3/4"],
    "RunningPosition": ["4 3 1", "5 5 2", "9 9 3", "2 2 4", "6 6 5", "7 7 6", "3 4 7", "1 1 8", "8 8 9"],
    "Finish Time": ["1:10.36", "1:10.76", "1:10.86", "1:10.88", "1:10.95", "1:11.18", "1:11.20", "1:11.80", "1:12.40"],
    "Win Odds": [1.7, 9.6, 11, 6.9, 16, 52, 6.6, 17, 54]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race5_28022024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                     Horse  \
0  28/02/2024            5     1          5    RAGING BLIZZARD (H368)   
1  28/02/2024            5     2          2        POWER KOEPP (E301)   
2  28/02/2024            5     3          6       WINGS OF WAR (H162)   
3  28/02/2024            5     4          1  SUPERB CAPITALIST (H092)   
4  28/02/2024            5     5          7        SING DRAGON (J141)   

       Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    Z Purton     J Size       126               1094    3      -   
1    H Bowman    K L Man       129               1107    1  2-1/2   
2   H Bentley  D J Whyte       124               1158    9  3-1/4   
3   Y L Chung   A S Cruz       130               1177    4  3-1/4   
4  L Ferraris  M Newnham       121               1185    8  3-3/4   

  RunningPosition Finish Time  Win Odds  
0           4 3 1     1:10.36       1.7  
1           5 5 2     

### Table not found for race 5 on 21/02/2024


In [49]:
import pandas as pd

# Define the data
data = {
    "Date": ["21/02/2024"] * 11,
    "Race Number": [5] * 11,
    "Pla.": ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11'],
    "Horse No.": [4, 7, 10, 9, 5, 6, 11, 1, 3, 12, 8],
    "Horse": [
        "SERANGOON (H296)", "FOREVER GLORIOUS (G229)", "KYRUS UNICORN (D267)", 
        "CASA COSMO (G436)", "TAKE ACTION (J098)", "MR ALADDIN (H386)", 
        "SUNNY BABY (E166)", "PRECISE EXPRESS (D198)", "OWNERS' PRAISE (H047)", 
        "TURTLE AGAIN (H083)", "ROBOT FIGHTER (H309)"
    ],
    "Jockey": [
        "H Bentley", "C L Chau", "A Atzeni", "A Hamelin", "B Avdulla", "K Teetan",
        "M L Yeung", "B Thompson", "K De Melo", "L Hewitson", "K C Leung"
    ],
    "Trainer": [
        "A S Cruz", "K W Lui", "D J Hall", "J Richards", "J Size", "P C Ng",
        "D A Hayes", "P F Yiu", "W Y So", "T P Yung", "K L Man"
    ],
    "Act. Wt.": [130, 125, 123, 124, 129, 128, 123, 135, 132, 120, 125],
    "Declar. Horse Wt.": [1100, 1088, 1103, 1047, 1154, 1086, 1109, 1036, 1256, 1192, 1109],
    "Dr.": [1, 11, 5, 4, 7, 10, 3, 6, 8, 12, 2],
    "LBW": ["-", "SH", "N", "1", "1-3/4", "2-1/4", "3", "4", "5-1/4", "5-1/4", "6"],
    "RunningPosition": [
        "5 4 5 5 1", "1 1 1 1 2", "6 6 6 6 3", "3 2 3 3 4", "4 5 4 4 5",
        "2 3 2 2 6", "8 8 8 7 7", "11 11 11 11 8", "9 10 10 9 9", "10 9 9 10 10", "7 7 7 8 11"
    ],
    "Finish Time": [
        "1:49.82", "1:49.84", "1:49.87", "1:49.99", "1:50.11",
        "1:50.20", "1:50.28", "1:50.44", "1:50.65", "1:50.66", "1:50.77"
    ],
    "Win Odds": [4, 8.6, 7.2, 29, 7.4, 6.3, 13, 21, 15, 5.9, 35]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race5_21022024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number Pla.  Horse No.                    Horse  \
0  21/02/2024            5    1          4         SERANGOON (H296)   
1  21/02/2024            5    2          7  FOREVER GLORIOUS (G229)   
2  21/02/2024            5    3         10     KYRUS UNICORN (D267)   
3  21/02/2024            5    4          9        CASA COSMO (G436)   
4  21/02/2024            5    5          5       TAKE ACTION (J098)   

      Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0  H Bentley    A S Cruz       130               1100    1      -   
1   C L Chau     K W Lui       125               1088   11     SH   
2   A Atzeni    D J Hall       123               1103    5      N   
3  A Hamelin  J Richards       124               1047    4      1   
4  B Avdulla      J Size       129               1154    7  1-3/4   

  RunningPosition Finish Time  Win Odds  
0       5 4 5 5 1     1:49.82       4.0  
1       1 1 1 1 2     1:49.84     

### Table not found for race 6 on 21/02/2024


In [50]:
import pandas as pd

# Define the data
data = {
    "Date": ["21/02/2024"] * 12,
    "Race Number": [6] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [5, 7, 1, 11, 12, 8, 2, 6, 3, 10, 4, 9],
    "Horse": [
        "GALVANIC (H147)", "SATIRICAL GLORY (H073)", "GALLANT VALOUR (H323)", "DUBLIN STAR (C436)",
        "AMAZING BOY (D513)", "YOUNG VICTORY (D399)", "WIN WIN FIGHTER (E347)", "MASTER HERO (D501)",
        "PERFETTO (G198)", "NOBLE WIN (H195)", "TOP TOP TEA (H048)", "V LOVE YOU (J048)"
    ],
    "Jockey": [
        "K Teetan", "B Avdulla", "A Atzeni", "M Chadwick", "M L Yeung", "B Thompson",
        "L Hewitson", "Y L Chung", "A Hamelin", "M F Poon", "E C W Wong", "K C Leung"
    ],
    "Trainer": [
        "C Fownes", "D A Hayes", "J Richards", "K L Man", "C W Chang", "W Y So",
        "K W Lui", "A S Cruz", "D J Hall", "J Size", "C H Yip", "C S Shum"
    ],
    "Act. Wt.": [132, 132, 135, 124, 120, 132, 135, 127, 134, 123, 124, 127],
    "Declar. Horse Wt.": [1074, 1201, 1098, 1271, 1088, 1065, 1088, 1126, 1080, 1178, 1140, 1093],
    "Dr.": [1, 6, 12, 9, 10, 5, 7, 3, 2, 4, 11, 8],
    "LBW": ["-", "1-3/4", "2-3/4", "3", "3-1/2", "4", "4-1/4", "4-1/4", "4-1/2", "6", "8", "11-1/4"],
    "RunningPosition": ["4 3 3 1", "1 1 1 2", "10 11 10 3", "3 4 4 4", "12 12 12 5", "11 9 9 6",
                        "9 8 7 7", "5 7 8 8", "7 6 5 9", "8 10 11 10", "2 2 2 11", "6 5 6 12"],
    "Finish Time": ["1:40.05", "1:40.33", "1:40.48", "1:40.55", "1:40.63", "1:40.68",
                    "1:40.73", "1:40.74", "1:40.78", "1:41.00", "1:41.33", "1:41.84"],
    "Win Odds": [4.6, 4.3, 19, 73, 25, 66, 8.3, 6.4, 10, 5.9, 12, 39]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race6_21022024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                   Horse  \
0  21/02/2024            6     1          5         GALVANIC (H147)   
1  21/02/2024            6     2          7  SATIRICAL GLORY (H073)   
2  21/02/2024            6     3          1   GALLANT VALOUR (H323)   
3  21/02/2024            6     4         11      DUBLIN STAR (C436)   
4  21/02/2024            6     5         12      AMAZING BOY (D513)   

       Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    K Teetan    C Fownes       132               1074    1      -   
1   B Avdulla   D A Hayes       132               1201    6  1-3/4   
2    A Atzeni  J Richards       135               1098   12  2-3/4   
3  M Chadwick     K L Man       124               1271    9      3   
4   M L Yeung   C W Chang       120               1088   10  3-1/2   

  RunningPosition Finish Time  Win Odds  
0         4 3 3 1     1:40.05       4.6  
1         1 1 1 2     1:40.3

### Table not found for race 3 on 17/01/2024


In [51]:
import pandas as pd

# Define the data
data = {
    "Date": ["17/01/2024"] * 12,
    "Race Number": [3] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [11, 5, 12, 6, 7, 9, 3, 1, 2, 8, 4, 10],
    "Horse": [
        "CRYSTAL POWERFUL (H068)", "FLYING PHANTOM (J086)", "SUPER ELITE (H247)", "CHEONG FAT (H058)",
        "MULTIDARLING (H148)", "DAN ATTACK (H317)", "TATTENHAM (H121)", "NORDIC COMBINED (G015)",
        "YOUTH POWER (H358)", "SHINKANSEN (H277)", "BRAVE WITNESS (H425)", "MIRACLES (G083)"
    ],
    "Jockey": [
        "A Atzeni", "B Avdulla", "M L Yeung", "Z Purton", "H T Mo", "A Hamelin",
        "L Ferraris", "H Bentley", "K H Chan", "K Teetan", "L Hewitson", "K C Leung"
    ],
    "Trainer": [
        "W K Mo", "J Size", "P F Yiu", "C W Chang", "T P Yung", "J Richards",
        "P C Ng", "W Y So", "D A Hayes", "D J Whyte", "C S Shum", "K L Man"
    ],
    "Act. Wt.": [118, 127, 117, 126, 122, 120, 128, 117, 130, 123, 127, 119],
    "Declar. Horse Wt.": [1128, 1042, 1087, 1155, 1245, 1165, 1116, 1073, 1035, 1142, 1075, 1133],
    "Dr.": [6, 9, 10, 4, 5, 11, 8, 1, 12, 2, 3, 7],
    "LBW": ["-", "1", "2-1/4", "2-1/4", "2-3/4", "3", "3", "3-1/2", "3-3/4", "3-3/4", "4", "9"],
    "RunningPosition": [
        "5 4 1", "9 8 2", "2 2 3", "10 10 4", "3 3 5", "11 11 6", "7 6 7", "8 9 8",
        "12 12 9", "6 7 10", "4 5 11", "1 1 12"
    ],
    "Finish Time": [
        "1:10.84", "1:10.99", "1:11.18", "1:11.18", "1:11.30", "1:11.30",
        "1:11.31", "1:11.39", "1:11.42", "1:11.45", "1:11.47", "1:12.28"
    ],
    "Win Odds": [6.3, 13, 59, 5.8, 8.5, 41, 17, 8.3, 116, 3, 15, 13]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race3_17012024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                    Horse  \
0  17/01/2024            3     1         11  CRYSTAL POWERFUL (H068)   
1  17/01/2024            3     2          5    FLYING PHANTOM (J086)   
2  17/01/2024            3     3         12       SUPER ELITE (H247)   
3  17/01/2024            3     4          6        CHEONG FAT (H058)   
4  17/01/2024            3     5          7      MULTIDARLING (H148)   

      Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0   A Atzeni     W K Mo       118               1128    6      -   
1  B Avdulla     J Size       127               1042    9      1   
2  M L Yeung    P F Yiu       117               1087   10  2-1/4   
3   Z Purton  C W Chang       126               1155    4  2-1/4   
4     H T Mo   T P Yung       122               1245    5  2-3/4   

  RunningPosition Finish Time  Win Odds  
0           5 4 1     1:10.84       6.3  
1           9 8 2     1:10.99     

### Table not found for race 4 on 17/01/2024


In [52]:
import pandas as pd

# Define the data
data = {
    "Date": ["17/01/2024"] * 10,
    "Race Number": [4] * 10,
    "Pla.": list(range(1, 11)),
    "Horse No.": [9, 2, 5, 6, 3, 10, 7, 1, 4, 8],
    "Horse": [
        "HAPPY UNITED (H208)", "HEROIC MASTER (G394)", "ATOMIC ENERGY (H150)", "NICCONI COUNTY (G208)",
        "CALIFORNIA DEEPLY (G451)", "FORTUNE WARRIOR (H229)", "CHAIN OF GOLD (J138)", "HARMONY N BLESSED (E061)",
        "BEAUTY GLORY (E459)", "ENFOLDED (H336)"
    ],
    "Jockey": [
        "Z Purton", "A Atzeni", "Y L Chung", "C Y Ho", "L Ferraris", "K Teetan",
        "L Hewitson", "B Avdulla", "H Bentley", "K C Leung"
    ],
    "Trainer": [
        "J Richards", "F C Lor", "A S Cruz", "C Fownes", "A S Cruz", "P C Ng",
        "W K Mo", "D A Hayes", "A S Cruz", "J Richards"
    ],
    "Act. Wt.": [120, 129, 116, 121, 128, 116, 119, 135, 125, 119],
    "Declar. Horse Wt.": [1122, 1081, 1037, 1151, 1095, 1115, 1016, 1216, 1172, 1080],
    "Dr.": [4, 6, 5, 1, 8, 9, 3, 2, 7, 10],
    "LBW": ["-", "1-1/4", "1-1/4", "1-1/2", "2", "2", "2-3/4", "3-1/4", "3-1/2", "7-1/2"],
    "RunningPosition": [
        "2 2 1", "5 6 2", "3 4 3", "6 5 4", "9 8 5", "1 1 6", "8 7 7", "4 3 8", "7 9 9", "10 10 10"
    ],
    "Finish Time": [
        "0:57.43", "0:57.63", "0:57.65", "0:57.66", "0:57.74", "0:57.76", "0:57.89", "0:57.95", "0:57.98", "0:58.64"
    ],
    "Win Odds": [2.8, 6.8, 7.6, 6.2, 13, 7.7, 33, 7.2, 27, 95]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race4_17012024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                     Horse  \
0  17/01/2024            4     1          9       HAPPY UNITED (H208)   
1  17/01/2024            4     2          2      HEROIC MASTER (G394)   
2  17/01/2024            4     3          5      ATOMIC ENERGY (H150)   
3  17/01/2024            4     4          6     NICCONI COUNTY (G208)   
4  17/01/2024            4     5          3  CALIFORNIA DEEPLY (G451)   

       Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    Z Purton  J Richards       120               1122    4      -   
1    A Atzeni     F C Lor       129               1081    6  1-1/4   
2   Y L Chung    A S Cruz       116               1037    5  1-1/4   
3      C Y Ho    C Fownes       121               1151    1  1-1/2   
4  L Ferraris    A S Cruz       128               1095    8      2   

  RunningPosition Finish Time  Win Odds  
0           2 2 1     0:57.43       2.8  
1           5 6 

### Table not found for race 5 on 17/01/2024


In [53]:
import pandas as pd

# Define the data
data = {
    "Date": ["17/01/2024"] * 12,
    "Race Number": [5] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [7, 2, 9, 4, 5, 12, 6, 11, 1, 8, 3, 10],
    "Horse": [
        "WIN WIN FIGHTER (E347)", "DRAGON STAR (H128)", "ATOMIC BEAUTY (H066)", "SATIRICAL GLORY (H073)",
        "STURDY RUBY (H207)", "SUPERB MOVE (G304)", "V LOVE YOU (J048)", "RIGHT HONOURABLE (A164)",
        "INVINCIBLE MISSILE (C037)", "AROMATICA (H437)", "GOLDEN LINK (D222)", "DRAGON PRIDE (D164)"
    ],
    "Jockey": [
        "C Y Ho", "K Teetan", "K H Chan", "Z Purton", "M L Yeung", "Y L Chung",
        "H Bentley", "M Chadwick", "L Hewitson", "K C Leung", "M F Poon", "A Hamelin"
    ],
    "Trainer": [
        "K W Lui", "P C Ng", "A S Cruz", "D A Hayes", "K L Man", "C H Yip",
        "C S Shum", "M Newnham", "C Fownes", "W Y So", "P F Yiu", "T P Yung"
    ],
    "Act. Wt.": [127, 131, 119, 130, 128, 114, 127, 120, 135, 126, 129, 122],
    "Declar. Horse Wt.": [1088, 1106, 1209, 1210, 1093, 1134, 1086, 1035, 1130, 1195, 1073, 1117],
    "Dr.": [5, 9, 3, 8, 4, 11, 7, 6, 2, 10, 12, 1],
    "LBW": ["-", "N", "3/4", "1", "2-1/4", "2-3/4", "3-1/4", "5-3/4", "6-1/4", "8-1/4", "8-1/4", "9-1/4"],
    "RunningPosition": [
        "8 8 8 1", "2 2 2 2", "3 3 3 3", "1 1 1 4", "7 6 7 5", "5 5 5 6",
        "4 4 4 7", "10 10 10 8", "9 9 11 9", "12 11 9 10", "11 12 12 11", "6 7 6 12"
    ],
    "Finish Time": [
        "1:40.59", "1:40.63", "1:40.72", "1:40.76", "1:40.94", "1:41.01",
        "1:41.09", "1:41.52", "1:41.60", "1:41.90", "1:41.91", "1:42.06"
    ],
    "Win Odds": [4.2, 9.5, 5.8, 4.9, 6.4, 11, 144, 24, 10, 23, 36, 21]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race5_17012024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                   Horse  \
0  17/01/2024            5     1          7  WIN WIN FIGHTER (E347)   
1  17/01/2024            5     2          2      DRAGON STAR (H128)   
2  17/01/2024            5     3          9    ATOMIC BEAUTY (H066)   
3  17/01/2024            5     4          4  SATIRICAL GLORY (H073)   
4  17/01/2024            5     5          5      STURDY RUBY (H207)   

      Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0     C Y Ho    K W Lui       127               1088    5      -   
1   K Teetan     P C Ng       131               1106    9      N   
2   K H Chan   A S Cruz       119               1209    3    3/4   
3   Z Purton  D A Hayes       130               1210    8      1   
4  M L Yeung    K L Man       128               1093    4  2-1/4   

  RunningPosition Finish Time  Win Odds  
0         8 8 8 1     1:40.59       4.2  
1         2 2 2 2     1:40.63       9.5 

### Table not found for race 6 on 10/01/2024


In [54]:
import pandas as pd

# Define the data
data = {
    "Date": ["10/01/2024"] * 12,
    "Race Number": [6] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [9, 11, 7, 8, 5, 6, 4, 1, 12, 2, 3, 10],
    "Horse": [
        "HAPPY TOGETHER (G446)", "LA CITY BLANCHE (G371)", "HELENE FEELING (H215)", "NIMBLE NIMBUS (E249)",
        "TOURBILLON DIAMOND (E054)", "CHAMPION DRAGON (G070)", "TELECOM FIGHTERS (D482)", "ENCOUNTERED (G236)",
        "BERLIN TANGO (E120)", "MONEY CATCHER (G171)", "SWORD POINT (G448)", "RISING FROM ASHES (G209)"
    ],
    "Jockey": [
        "A Badel", "K Teetan", "C Y Ho", "A Atzeni", "H Bentley", "Y L Chung",
        "L Hewitson", "K C Leung", "M F Poon", "M Chadwick", "H Bowman", "K De Melo"
    ],
    "Trainer": [
        "F C Lor", "A S Cruz", "C S Shum", "P F Yiu", "M Newnham", "A S Cruz",
        "Y S Tsui", "K L Man", "A S Cruz", "F C Lor", "F C Lor", "K W Lui"
    ],
    "Act. Wt.": [115, 115, 117, 115, 118, 117, 124, 135, 115, 134, 126, 117],
    "Declar. Horse Wt.": [1138, 996, 1220, 1153, 1236, 1125, 1129, 1164, 1073, 1156, 1124, 1024],
    "Dr.": [3, 1, 9, 11, 4, 2, 8, 12, 10, 5, 6, 7],
    "LBW": ["-", "3/4", "1-1/2", "2-1/4", "2-1/2", "3-3/4", "4-1/4", "4-1/4", "4-1/2", "4-3/4", "6", "7"],
    "RunningPosition": [
        "6 5 5 6 1", "9 7 7 8 2", "3 3 3 4 3", "2 2 2 2 4", "10 9 9 9 5", "8 10 10 11 6",
        "1 1 1 1 7", "7 8 8 7 8", "12 12 12 12 9", "5 4 4 3 10", "4 6 6 5 11", "11 11 11 10 12"
    ],
    "Finish Time": [
        "1:48.29", "1:48.41", "1:48.55", "1:48.67", "1:48.70", "1:48.88",
        "1:48.98", "1:48.99", "1:49.03", "1:49.06", "1:49.24", "1:49.39"
    ],
    "Win Odds": [4.1, 10, 2.4, 16, 25, 20, 80, 11, 66, 27, 7.8, 35]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race6_10012024.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                      Horse  \
0  10/01/2024            6     1          9      HAPPY TOGETHER (G446)   
1  10/01/2024            6     2         11     LA CITY BLANCHE (G371)   
2  10/01/2024            6     3          7      HELENE FEELING (H215)   
3  10/01/2024            6     4          8       NIMBLE NIMBUS (E249)   
4  10/01/2024            6     5          5  TOURBILLON DIAMOND (E054)   

      Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    A Badel    F C Lor       115               1138    3      -   
1   K Teetan   A S Cruz       115                996    1    3/4   
2     C Y Ho   C S Shum       117               1220    9  1-1/2   
3   A Atzeni    P F Yiu       115               1153   11  2-1/4   
4  H Bentley  M Newnham       118               1236    4  2-1/2   

  RunningPosition Finish Time  Win Odds  
0       6 5 5 6 1     1:48.29       4.1  
1       9 7 7 8 2     

### Table not found for race 4 on 29/12/2023


In [55]:
import pandas as pd

# Define the data
data = {
    "Date": ["29/12/2023"] * 12,
    "Race Number": [4] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [12, 3, 5, 2, 4, 10, 1, 7, 9, 6, 11, 8],
    "Horse": [
        "ATOMIC BEAUTY (H066)", "SATIRICAL GLORY (H073)", "FOREVER GLORIOUS (G229)", "CONCERTO (E176)",
        "AROMATICA (H437)", "DAILY TROPHY (H283)", "GOLDEN LINK (D222)", "PODIUM (H173)",
        "YEE CHEONG WARRIOR (E025)", "HAPPY ANGEL (G133)", "RIGHT HONOURABLE (A164)", "HO HO STAR (H291)"
    ],
    "Jockey": [
        "K H Chan", "J McDonald", "L Hewitson", "L Ferraris", "K C Leung", "M L Yeung",
        "H Bowman", "A Badel", "M F Poon", "K De Melo", "M Chadwick", "A Atzeni"
    ],
    "Trainer": [
        "A S Cruz", "D A Hayes", "K W Lui", "K H Ting", "W Y So", "C Fownes",
        "P F Yiu", "C W Chang", "D J Hall", "P C Ng", "M Newnham", "C H Yip"
    ],
    "Act. Wt.": [117, 131, 128, 132, 129, 122, 134, 128, 122, 128, 122, 124],
    "Declar. Horse Wt.": [1222, 1206, 1134, 1036, 1196, 1170, 1066, 1053, 1097, 1116, 1037, 1123],
    "Dr.": [10, 11, 7, 6, 5, 3, 8, 4, 1, 2, 9, 12],
    "LBW": ["-", "1", "1-1/2", "2-1/4", "3", "3-1/2", "3-3/4", "3-3/4", "4-1/2", "5-3/4", "6-1/4", "30-3/4"],
    "RunningPosition": [
        "8 8 6 1", "3 3 2 2", "1 1 1 3", "5 4 4 4", "11 11 9 5", "4 5 3 6",
        "6 6 8 7", "9 10 12 8", "7 7 7 9", "10 9 10 10", "12 12 11 11", "2 2 5 12"
    ],
    "Finish Time": [
        "1:40.71", "1:40.87", "1:40.96", "1:41.09", "1:41.20", "1:41.25",
        "1:41.30", "1:41.31", "1:41.45", "1:41.63", "1:41.73", "1:45.63"
    ],
    "Win Odds": [48, 5.7, 9.5, 16, 27, 9.8, 11, 12, 8.5, 3.2, 11, 35]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race4_29122023.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                    Horse  \
0  29/12/2023            4     1         12     ATOMIC BEAUTY (H066)   
1  29/12/2023            4     2          3   SATIRICAL GLORY (H073)   
2  29/12/2023            4     3          5  FOREVER GLORIOUS (G229)   
3  29/12/2023            4     4          2          CONCERTO (E176)   
4  29/12/2023            4     5          4         AROMATICA (H437)   

       Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    K H Chan   A S Cruz       117               1222   10      -   
1  J McDonald  D A Hayes       131               1206   11      1   
2  L Hewitson    K W Lui       128               1134    7  1-1/2   
3  L Ferraris   K H Ting       132               1036    6  2-1/4   
4   K C Leung     W Y So       129               1196    5      3   

  RunningPosition Finish Time  Win Odds  
0         8 8 6 1     1:40.71      48.0  
1         3 3 2 2     1:40.8

### Table not found for race 4 on 20/12/2023


In [56]:
import pandas as pd

# Define the data
data = {
    "Date": ["20/12/2023"] * 12,
    "Race Number": [4] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [11, 8, 2, 5, 10, 6, 3, 7, 4, 9, 1, 12],
    "Horse": [
        "LIFE IS GOOD (H118)", "LUCKY QUALITY (D050)", "SAVVY DELIGHT (E386)", "KOWLOON EAST STAR (E158)",
        "KING INVINCIBLE (H199)", "PACKING PROSPECT (H424)", "GREAT ACHIEVER (J049)", "VALIANT ELEGANCE (D285)",
        "GUSTY FIGHTER (J029)", "DIAMOND SOARS (G201)", "SERGEANT PEPPER (E432)", "TIMESTORM (G109)"
    ],
    "Jockey": [
        "Y L Chung", "K H Chan", "Z Purton", "M Chadwick", "K C Leung", "J McDonald",
        "C L Chau", "A Hamelin", "A Atzeni", "C Y Ho", "L Hewitson", "A Badel"
    ],
    "Trainer": [
        "P C Ng", "C Fownes", "J Size", "K L Man", "F C Lor", "C S Shum",
        "T P Yung", "D J Whyte", "J Richards", "P F Yiu", "Y S Tsui", "D J Hall"
    ],
    "Act. Wt.": [117, 122, 131, 129, 125, 129, 127, 129, 129, 126, 134, 120],
    "Declar. Horse Wt.": [1113, 1110, 1223, 1239, 1023, 1106, 1043, 1133, 1091, 1201, 1143, 1069],
    "Dr.": [8, 9, 12, 6, 3, 5, 10, 11, 7, 4, 2, 1],
    "LBW": ["-", "1-1/2", "1-3/4", "2", "2-1/4", "2-3/4", "2-3/4", "3-1/2", "4-3/4", "4-3/4", "5", "6"],
    "RunningPosition": [
        "7 7 1", "10 11 2", "9 8 3", "2 1 4", "5 4 5", "12 10 6",
        "11 12 7", "6 6 8", "4 5 9", "1 2 10", "3 3 11", "8 9 12"
    ],
    "Finish Time": [
        "0:57.46", "0:57.70", "0:57.73", "0:57.77", "0:57.82", "0:57.89",
        "0:57.90", "0:58.03", "0:58.23", "0:58.23", "0:58.25", "0:58.41"
    ],
    "Win Odds": [18, 26, 10, 8.5, 9.9, 6.2, 97, 44, 18, 4.7, 6.4, 5.3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race4_20122023.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                     Horse  \
0  20/12/2023            4     1         11       LIFE IS GOOD (H118)   
1  20/12/2023            4     2          8      LUCKY QUALITY (D050)   
2  20/12/2023            4     3          2      SAVVY DELIGHT (E386)   
3  20/12/2023            4     4          5  KOWLOON EAST STAR (E158)   
4  20/12/2023            4     5         10    KING INVINCIBLE (H199)   

       Jockey   Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0   Y L Chung    P C Ng       117               1113    8      -   
1    K H Chan  C Fownes       122               1110    9  1-1/2   
2    Z Purton    J Size       131               1223   12  1-3/4   
3  M Chadwick   K L Man       129               1239    6      2   
4   K C Leung   F C Lor       125               1023    3  2-1/4   

  RunningPosition Finish Time  Win Odds  
0           7 7 1     0:57.46      18.0  
1         10 11 2     0:57.7

### Table not found for race 6 on 20/12/2023


In [57]:
import pandas as pd

# Define the data
data = {
    "Date": ["20/12/2023"] * 12,
    "Race Number": [6] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [10, 4, 2, 8, 5, 7, 3, 9, 11, 1, 12, 6],
    "Horse": [
        "TO INFINITY (G277)", "RED HARE KING (H188)", "SIMPLY MAVERICK (H346)", "MR ALADDIN (H386)",
        "MASTER TORNADO (G410)", "NOBLE WIN (H195)", "ROYAL PRIDE (E351)", "SPARKY STAR (C416)",
        "TACTICAL COMMAND (H197)", "SPANGLE FORTUNE (G427)", "CHOICERUS (H029)", "SIR HURRICANE (H449)"
    ],
    "Jockey": [
        "Z Purton", "L Hewitson", "A Atzeni", "C L Chau", "K Teetan", "A Badel",
        "L Ferraris", "H T Mo", "M L Yeung", "K C Leung", "A Hamelin", "K H Chan"
    ],
    "Trainer": [
        "J Richards", "A S Cruz", "P F Yiu", "P C Ng", "W Y So", "J Size",
        "C Fownes", "T P Yung", "M Newnham", "K L Man", "D J Whyte", "C H Yip"
    ],
    "Act. Wt.": [120, 132, 134, 124, 130, 127, 133, 119, 119, 119, 118, 124],
    "Declar. Horse Wt.": [1029, 1087, 1213, 1089, 1189, 1179, 1080, 1211, 1054, 1083, 1079, 1154],
    "Dr.": [2, 9, 10, 1, 4, 12, 6, 8, 7, 11, 3, 5],
    "LBW": ["-", "N", "1/2", "3/4", "2-1/4", "3", "4-1/2", "8-3/4", "8-3/4", "9-1/4", "10-3/4", "13-1/2"],
    "RunningPosition": [
        "8 8 6 1", "2 2 1 2", "7 9 9 3", "4 5 5 4", "3 4 4 5", "12 11 10 6",
        "11 10 11 7", "9 12 12 8", "1 1 2 9", "10 3 3 10", "5 6 7 11", "6 7 8 12"
    ],
    "Finish Time": [
        "1:41.59", "1:41.65", "1:41.66", "1:41.72", "1:41.95", "1:42.06",
        "1:42.29", "1:42.98", "1:42.99", "1:43.05", "1:43.31", "1:43.75"
    ],
    "Win Odds": [7.3, 5.8, 2, 11, 14, 12, 12, 89, 33, 94, 23, 135]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race6_20122023.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                   Horse  \
0  20/12/2023            6     1         10      TO INFINITY (G277)   
1  20/12/2023            6     2          4    RED HARE KING (H188)   
2  20/12/2023            6     3          2  SIMPLY MAVERICK (H346)   
3  20/12/2023            6     4          8       MR ALADDIN (H386)   
4  20/12/2023            6     5          5   MASTER TORNADO (G410)   

       Jockey     Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0    Z Purton  J Richards       120               1029    2      -   
1  L Hewitson    A S Cruz       132               1087    9      N   
2    A Atzeni     P F Yiu       134               1213   10    1/2   
3    C L Chau      P C Ng       124               1089    1    3/4   
4    K Teetan      W Y So       130               1189    4  2-1/4   

  RunningPosition Finish Time  Win Odds  
0         8 8 6 1     1:41.59       7.3  
1         2 2 1 2     1:41.6

### Table not found for race 9 on 06/12/2023

In [58]:
import pandas as pd

# Define the data
data = {
    "Date": ["06/12/2023"] * 12,
    "Race Number": [9] * 12,
    "Pla.": list(range(1, 13)),
    "Horse No.": [6, 9, 7, 11, 1, 10, 4, 8, 12, 3, 2, 5],
    "Horse": [
        "HELENE FEELING (H215)", "BUTTERFIELD (C462)", "THE BEST PEACH (H139)", "NEARLY FINE (E430)",
        "TOURBILLON DIAMOND (E054)", "HAMERON (H265)", "SPIRITED EXPRESS (G356)", "COLUMBUS COUNTY (C465)",
        "LOOKING GREAT (E053)", "BOURBONAIRE (E365)", "ALL FOR ST PAUL'S (C517)", "RISING FROM ASHES (G209)"
    ],
    "Jockey": [
        "J McDonald", "T Marquand", "Z Purton", "M L Yeung", "B Avdulla", "L Ferraris",
        "H Doyle", "C Y Ho", "K Teetan", "A Hamelin", "M Barzalona", "L Hewitson"
    ],
    "Trainer": [
        "C S Shum", "C S Shum", "A S Cruz", "C Fownes", "M Newnham", "K W Lui",
        "P F Yiu", "C Fownes", "F C Lor", "J Size", "F C Lor", "K W Lui"
    ],
    "Act. Wt.": [124, 121, 123, 118, 135, 121, 128, 122, 117, 129, 129, 125],
    "Declar. Horse Wt.": [1217, 1175, 1016, 1113, 1234, 1044, 1179, 1072, 1126, 1047, 1116, 1000],
    "Dr.": [6, 12, 7, 5, 1, 10, 2, 9, 11, 4, 3, 8],
    "LBW": ["-", "1/2", "1-3/4", "2", "3-1/4", "4-1/2", "4-1/2", "4-3/4", "5-1/4", "6-1/2", "6-3/4", "9-1/4"],
    "RunningPosition": [
        "2 4 4 3 1", "7 8 9 8 2", "11 11 10 10 3", "6 7 7 7 4", "9 5 5 5 5",
        "3 2 2 2 6", "10 10 11 12 7", "5 6 6 6 8", "8 9 8 9 9", "4 3 3 4 10", "1 1 1 1 11", "12 12 12 11 12"
    ],
    "Finish Time": [
        "1:49.53", "1:49.62", "1:49.81", "1:49.86", "1:50.05", "1:50.24",
        "1:50.26", "1:50.29", "1:50.37", "1:50.59", "1:50.62", "1:51.00"
    ],
    "Win Odds": [2.4, 27, 6.7, 20, 17, 9.6, 10, 23, 57, 20, 24, 6.3]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('race9_06122023.csv', index=False)

print("CSV file has been created successfully.")
print(df.head())  # Display the first few rows to ensure it looks correct


CSV file has been created successfully.
         Date  Race Number  Pla.  Horse No.                      Horse  \
0  06/12/2023            9     1          6      HELENE FEELING (H215)   
1  06/12/2023            9     2          9         BUTTERFIELD (C462)   
2  06/12/2023            9     3          7      THE BEST PEACH (H139)   
3  06/12/2023            9     4         11         NEARLY FINE (E430)   
4  06/12/2023            9     5          1  TOURBILLON DIAMOND (E054)   

       Jockey    Trainer  Act. Wt.  Declar. Horse Wt.  Dr.    LBW  \
0  J McDonald   C S Shum       124               1217    6      -   
1  T Marquand   C S Shum       121               1175   12    1/2   
2    Z Purton   A S Cruz       123               1016    7  1-3/4   
3   M L Yeung   C Fownes       118               1113    5      2   
4   B Avdulla  M Newnham       135               1234    1  3-1/4   

  RunningPosition Finish Time  Win Odds  
0       2 4 4 3 1     1:49.53       2.4  
1       7 8 9 8 

## Merging the Scraped Datasets

In [59]:
import pandas as pd

# Load the main dataset
main_df = pd.read_csv('racing_results.csv')

# Load the individual race datasets
races_files = [
    'race8_13032024.csv', 'race2_28022024.csv', 'race5_28022024.csv',
    'race5_21022024.csv', 'race6_21022024.csv', 'race3_17012024.csv',
    'race4_17012024.csv', 'race5_17012024.csv', 'race6_10012024.csv',
    'race4_29122023.csv', 'race4_20122023.csv', 'race6_20122023.csv',
    'race9_06122023.csv'
]

# Read and store all race DataFrames in a list
races_dfs = [pd.read_csv(file) for file in races_files]

# Append each race DataFrame to the main DataFrame
for race_df in races_dfs:
    main_df = pd.concat([main_df, race_df], ignore_index=True)

# Sort by 'Date' and 'Race Number' if these columns exist
if 'Date' in main_df.columns and 'Race Number' in main_df.columns:
    main_df['Date'] = pd.to_datetime(main_df['Date'], dayfirst=True)  # Ensure the date format is uniform
    main_df.sort_values(by=['Date', 'Race Number'], inplace=True)

# Reset the index after sorting
main_df.reset_index(drop=True, inplace=True)

# Save the merged dataset to a new CSV file
main_df.to_csv('merged_racing_results.csv', index=False)

print("Merged dataset created successfully and saved as 'merged_racing_results.csv'.")


Merged dataset created successfully and saved as 'merged_racing_results.csv'.


In [62]:
import pandas as pd

# Load the merged dataset
df = pd.read_csv('merged_racing_results.csv')

# Get the number of rows
num_rows = df.shape[0]

print(f"The dataset contains {num_rows} rows.")

The dataset contains 2005 rows.


In [64]:
df

Unnamed: 0,Date,Race Number,Pla.,Horse No.,Horse,Jockey,Trainer,Act. Wt.,Declar. Horse Wt.,Dr.,LBW,RunningPosition,Finish Time,Win Odds
0,2023-11-29,1,1,8.0,HAPPY HORSE(H061),Y L Chung,K L Man,117,1100,3,-,111,0:57.60,5.3
1,2023-11-29,1,2,6.0,GOODMANSHIP(E208),L Ferraris,D J Whyte,123,1165,4,3/4,632,0:57.71,9.6
2,2023-11-29,1,3,9.0,FIGHTING STAR(E255),K Teetan,C H Yip,120,1085,5,1-3/4,993,0:57.87,3.1
3,2023-11-29,1,4,3.0,STRIVE FOR GLORY(D186),Z Purton,W Y So,129,1113,7,2-1/4,224,0:57.96,5.3
4,2023-11-29,1,5,7.0,HAPPILY FRIENDS(C533),J McDonald,W Y So,122,1107,6,2-1/4,565,0:57.96,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000,2024-04-24,9,8,6.0,DEVILDOM(H445),L Hewitson,T P Yung,123,1069,3,5-1/2,66688,1:51.13,9.5
2001,2024-04-24,9,9,8.0,AFFORDABLE(J242),B Thompson,D A Hayes,119,1241,8,7,88859,1:51.38,66
2002,2024-04-24,9,10,4.0,EIGHTEEN PALMS(H033),A Badel,C S Shum,125,1214,5,7-1/2,333310,1:51.43,8
2003,2024-04-24,9,11,11.0,TRADITION(J111),M F Poon,P C Ng,115,1022,4,8-3/4,1110101211,1:51.64,85
