In [5]:
from dotenv import load_dotenv

load_dotenv(
    dotenv_path="/Users/tomwattley/App/racing-api-project/racing-api-project/libraries/api-helpers/src/api_helpers/.env"
)
from api_helpers.config import Config

config = Config()
from selenium.webdriver.common.by import By

In [2]:
from racing_etl.raw.webdriver.web_driver import WebDriver

driver = WebDriver(config, headless_mode=False)

2025-07-29T11:32:51Z | INFO - Logging configuration initialized with level: INFO


In [3]:
driver = driver.create_session()

2025-07-29T11:32:53Z | INFO - Webdriver session created


In [4]:
driver.get("https://www.racingpost.com/non-runners/")

In [6]:
button = driver.find_element(By.ID, "truste-consent-required")
button.click()

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [11]:
wait = WebDriverWait(driver, 10)

nr_blocks = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "nr-block")))

In [12]:
def extract_racing_data(driver):
    # Wait for the blocks to load
    wait = WebDriverWait(driver, 10)

    # Find all nr-block elements
    nr_blocks = wait.until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "nr-block"))
    )

    racing_data = []

    for block in nr_blocks:
        block_data = {}

        try:
            # Get course link and href
            course_link = block.find_element(By.CSS_SELECTOR, "a.nr-block__courseLink")
            block_data["course_name"] = course_link.text.strip()
            block_data["course_href"] = course_link.get_attribute("href")

            # Get all info descriptions (going, weather, stalls)
            info_descriptions = block.find_elements(
                By.CLASS_NAME, "nr-block__infoDescription"
            )

            # Assuming order: Going, Weather, Stalls (adjust indices if different)
            if len(info_descriptions) >= 1:
                block_data["going"] = info_descriptions[0].text.strip()
            if len(info_descriptions) >= 2:
                block_data["weather"] = info_descriptions[1].text.strip()
            if len(info_descriptions) >= 3:
                block_data["stalls"] = info_descriptions[2].text.strip()

            # Get race links
            race_links = block.find_elements(
                By.CSS_SELECTOR, "a.ui-link_table[href*='/racecards/']"
            )
            block_data["races"] = []

            for race_link in race_links:
                # Check if this is a race time link (not a horse profile link)
                href = race_link.get_attribute("href")
                if "/racecards/" in href and "/profile/horse/" not in href:
                    race_data = {"time": race_link.text.strip(), "href": href}
                    block_data["races"].append(race_data)

            # Get non-runner horse links
            horse_links = block.find_elements(
                By.CSS_SELECTOR, "a[href*='/profile/horse/']"
            )
            block_data["non_runners"] = []

            for horse_link in horse_links:
                # Get the race number from the span that follows
                try:
                    race_number_span = horse_link.find_element(
                        By.XPATH,
                        "./following-sibling::span[@class='nr-block__raceNumber']",
                    )
                    race_number = race_number_span.text.strip()
                except:
                    race_number = ""

                horse_data = {
                    "name": horse_link.text.strip(),
                    "href": horse_link.get_attribute("href"),
                    "race_number": race_number,
                }
                block_data["non_runners"].append(horse_data)

            racing_data.append(block_data)

        except Exception as e:
            print(f"Error processing block: {e}")
            continue

    return racing_data


# Usage example:
# Assuming you already have your webdriver setup
data = extract_racing_data(driver)

# Print the extracted data
for i, block in enumerate(data):
    print(f"\n--- Block {i+1} ---")
    print(f"Course: {block.get('course_name', 'N/A')}")
    print(f"Course Link: {block.get('course_href', 'N/A')}")
    print(f"Going: {block.get('going', 'N/A')}")
    print(f"Weather: {block.get('weather', 'N/A')}")
    print(f"Stalls: {block.get('stalls', 'N/A')}")

    print(f"\nRaces ({len(block.get('races', []))}):")
    for race in block.get("races", []):
        print(f"  Time: {race['time']} | Link: {race['href']}")

    print(f"\nNon-runners ({len(block.get('non_runners', []))}):")
    for horse in block.get("non_runners", []):
        print(f"  {horse['name']} {horse['race_number']} | Link: {horse['href']}")


--- Block 1 ---
Course: Beverley
Course Link: https://www.racingpost.com/racecards/2025-07-29#beverley
Going: GOOD TO FIRM, Good in places (GoingStick: 6.8) (Watered)
Weather: Mostly cloudy
Stalls: Inside

Races (4):
  Time: 2:10 right | Link: https://www.racingpost.com/racecards/6/beverley/2025-07-29/898915
  Time: 4:00 right | Link: https://www.racingpost.com/racecards/6/beverley/2025-07-29/898914
  Time: 5:10 right | Link: https://www.racingpost.com/racecards/6/beverley/2025-07-29/898916
  Time: 5:47 right | Link: https://www.racingpost.com/racecards/6/beverley/2025-07-29/898911

Non-runners (6):
  Persian Phoenix right (6) | Link: https://www.racingpost.com/profile/horse/5073364/persian-phoenix/
  Highwaygrey right (9) | Link: https://www.racingpost.com/profile/horse/2168858/highwaygrey/
  Apodictique right (5) | Link: https://www.racingpost.com/profile/horse/4983972/apodictique/
  Haberdash right (2) | Link: https://www.racingpost.com/profile/horse/7395267/haberdash/
  Military G