# Scrape one at a time, only for PRO divisions.

In [None]:
import time
import random
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# ==========================
# CONFIG
# ==========================

BASE_URLS = {
    "Season_7": "https://results.hyrox.com/season-7/",
    "Season_8": "https://results.hyrox.com/season-8/"
}

SAVE_ROOT = r"Datasets\Hyrox"
MAX_THREADS = 3  # adjust depending on RAM / CPU

def human_pause(a=3, b=7):
    time.sleep(random.uniform(a, b))

# ==========================
# SELENIUM DRIVER
# ==========================

def create_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--headless=new")  # run headless for multiple threads
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
    return webdriver.Chrome(options=options)

# ==========================
# SCRAPE FUNCTION
# ==========================

def scrape_pages(driver, race_name, division, gender_label, race_results):
    """Scrape all pages for a given race/division/gender."""
    is_doubles = "DOUBLES" in division.upper()
    page_number = 1

    while True:
        try:
            WebDriverWait(driver, 25).until(lambda d: 
                "There are currently no results available" in d.page_source
                or len(d.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")) > 1
            )
        except TimeoutException:
            return False

        # Check no-results message
        try:
            no_result_elem = driver.find_element(By.XPATH,
                "//*[contains(text(),'There are currently no results available')]"
            )
            if no_result_elem.is_displayed():
                return False
        except NoSuchElementException:
            pass

        rows = driver.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")
        rows = [r for r in rows if "list-group-header" not in r.get_attribute("class")]

        if not rows:
            return False

        scraped_any = False
        for row in rows:
            try:
                rank = row.find_element(By.CSS_SELECTOR, ".place-primary").text
                age_rank = row.find_element(By.CSS_SELECTOR, ".place-secondary").text
                total_time = row.find_element(By.CSS_SELECTOR, ".type-time").text.replace("Total", "").strip()
                age_group = row.find_element(By.CSS_SELECTOR, ".type-age_class").text.replace("Age Group", "").strip()

                if is_doubles:
                    members = row.find_elements(By.CSS_SELECTOR, ".type-relay_member a")
                    member_names = " & ".join([m.text for m in members])
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, member_names, "",
                        age_group, total_time
                    ])
                else:
                    name = row.find_element(By.CSS_SELECTOR, "h4.type-fullname").text
                    nation = row.find_element(By.CSS_SELECTOR, ".nation__abbr").text
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, name, nation,
                        age_group, total_time
                    ])
                scraped_any = True
            except:
                continue

        print(f"      Page {page_number} scraped")
        page_number += 1

        # Go to next page
        try:
            next_button = driver.find_element(By.XPATH, "//a[text()='>']")
            driver.execute_script("arguments[0].click();", next_button)
            human_pause(3, 6)
        except NoSuchElementException:
            break

    return scraped_any

# ==========================
# RACE SCRAPER (THREAD TARGET)
# ==========================

def scrape_race(season, base_url, race_name, race_value):
    """Scrape a single race (all divisions)."""
    driver = create_driver()
    season_folder = os.path.join(SAVE_ROOT, season)
    os.makedirs(season_folder, exist_ok=True)

    safe_name = race_name.replace(" ", "_").replace("/", "-")
    print(f"\nProcessing race: {race_name}")

    driver.get(base_url)
    human_pause(4, 6)
    Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
    human_pause(2,4)
    select_division = Select(driver.find_element(By.ID, "default-lists-event"))
    available = [o.text for o in select_division.options]

    def build_priority(base_name):
        priority = []
        if base_name in available:
            priority.append(base_name)
        overall = f"{base_name} - Overall"
        if overall in available:
            priority.append(overall)
        other = [d for d in available if d.startswith(f"{base_name} -") and d not in priority]
        priority.extend(other)
        return priority

    def process_group(base_name):
        priority_list = build_priority(base_name)
        if not priority_list:
            return

        for div in priority_list:
            div_safe = div.replace(" ", "_").replace("/", "-")
            file_path = os.path.join(season_folder, f"{safe_name}_{div_safe}.csv")
            if os.path.exists(file_path):
                print(f"   {div} already scraped")
                continue

            print(f"   Trying {div}")
            is_doubles = "DOUBLES" in div.upper()
            genders = [("M","Male"),("W","Women")] if not is_doubles else [("M","Male"),("W","Female"),("X","Mixed")]

            division_results = []
            division_has_data = False

            # Load division page
            driver.get(base_url)
            human_pause(3,6)
            Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
            human_pause(2,4)
            Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
            human_pause(2,4)

            try:
                gender_dropdown = Select(driver.find_element(By.ID, "default-lists-sex"))
                available_gender_values = [o.get_attribute("value") for o in gender_dropdown.options]
            except NoSuchElementException:
                available_gender_values = []

            valid_genders = [(code, label) for code, label in genders if code in available_gender_values]
            if not valid_genders:
                print(f"   {div} has no gender categories")
                continue

            # Scrape all genders
            for idx, (gender_code, gender_label) in enumerate(valid_genders):
                print(f"      Gender: {gender_label}")
                driver.get(base_url)
                human_pause(3,6)
                Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
                human_pause(2,4)
                Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
                human_pause(2,4)
                Select(driver.find_element(By.ID, "default-lists-sex")).select_by_value(gender_code)
                Select(driver.find_element(By.ID, "default-num_results")).select_by_value("100")
                human_pause(2,4)
                driver.find_element(By.ID, "default-submit").click()
                human_pause(5,10)

                has_data = scrape_pages(driver, race_name, div, gender_label, division_results)
                if idx == 0 and not has_data:
                    print(f"   {div} had NO DATA")
                    division_has_data = False
                    break
                if has_data:
                    division_has_data = True

            # Save CSV
            if division_has_data:
                df = pd.DataFrame(
                    division_results,
                    columns=["Race","Division","Gender",
                             "Rank Overall","Rank Age Group",
                             "Name","Nation","Age Group","Total Time"]
                )
                df.to_csv(file_path,index=False)
                print(f"   Saved {len(df)} rows")

    process_group("HYROX PRO")
    process_group("HYROX PRO DOUBLES")

    driver.quit()
    return f"Finished {race_name}"

# ==========================
# MAIN LOOP
# ==========================

all_tasks = []
for season, base_url in BASE_URLS.items():
    driver_main = create_driver()
    driver_main.get(base_url)
    human_pause(4,6)
    try:
        WebDriverWait(driver_main, 25).until(EC.presence_of_element_located((By.ID, "default-lists-event_main_group")))
    except TimeoutException:
        continue
    race_dropdown = Select(driver_main.find_element(By.ID, "default-lists-event_main_group"))
    races = [(race_dropdown.options[i].text, race_dropdown.options[i].get_attribute("value")) for i in range(len(race_dropdown.options))]
    driver_main.quit()
    for race_name, race_value in races:
        all_tasks.append((season, base_url, race_name, race_value))

# Threaded execution
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    futures = [executor.submit(scrape_race, *task) for task in all_tasks]
    for f in futures:
        print(f.result())

print("\nALL DONE.")


Processing race: 2025 Shanghai
Processing race: 2025 Valencia


Processing race: 2025 Atlanta
   HYROX PRO - Overall already scraped
   Trying HYROX PRO - Saturday
   HYROX PRO already scraped
   HYROX PRO DOUBLES already scraped
   HYROX PRO - Overall already scraped
   Trying HYROX PRO - Saturday
Finished 2025 Shanghai

Processing race: 2025 Maastricht
      Gender: Male
      Gender: Male
   HYROX PRO already scraped
   HYROX PRO DOUBLES already scraped

Processing race: 2025 Mumbai
   HYROX PRO already scraped
   HYROX PRO DOUBLES already scraped

Processing race: 2025 World Championships
   Trying HYROX PRO - Overall
      Page 1 scraped
      Page 1 scraped
      Page 2 scraped
      Gender: Women
      Gender: Male
      Page 2 scraped
      Gender: Women
      Page 1 scraped
   Saved 139 rows
   Trying HYROX PRO - Sunday
   HYROX PRO - Overall had NO DATA
   HYROX PRO - Friday already scraped
   HYROX PRO - Saturday already scraped
   Trying HYROX PRO DOUBLES - Overall
      G

# Scrape 20 at a time, only for PRO divisions.

In [None]:
import time
import random
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# ==========================
# CONFIG
# ==========================

BASE_URLS = {
    "Season_6": "https://results.hyrox.com/season-6/",
    "Season_7": "https://results.hyrox.com/season-7/",
    "Season_8": "https://results.hyrox.com/season-8/"
}

SAVE_ROOT = r"Datasets\Hyrox"
MAX_THREADS = 20  # number of concurrent races

def human_pause(a=3, b=7):
    time.sleep(random.uniform(a, b))

# ==========================
# SELENIUM DRIVER
# ==========================

def create_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--headless=new")  # headless for parallel threads
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
    return webdriver.Chrome(options=options)

# ==========================
# SCRAPE FUNCTION
# ==========================

def scrape_pages(driver, race_name, division, gender_label, race_results):
    """Scrape all pages for a given race/division/gender."""
    is_doubles = "DOUBLES" in division.upper()
    page_number = 1

    while True:
        try:
            WebDriverWait(driver, 25).until(lambda d: 
                "There are currently no results available" in d.page_source
                or len(d.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")) > 1
            )
        except TimeoutException:
            print(f"[{race_name}] Timeout waiting for results in {division} - {gender_label}")
            return False

        # Check no-results message
        try:
            no_result_elem = driver.find_element(By.XPATH,
                "//*[contains(text(),'There are currently no results available')]"
            )
            if no_result_elem.is_displayed():
                print(f"[{race_name}] No results for {division} - {gender_label}")
                return False
        except NoSuchElementException:
            pass

        rows = driver.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")
        rows = [r for r in rows if "list-group-header" not in r.get_attribute("class")]

        if not rows:
            print(f"[{race_name}] No rows found for {division} - {gender_label}")
            return False

        scraped_any = False
        for row in rows:
            try:
                rank = row.find_element(By.CSS_SELECTOR, ".place-primary").text
                age_rank = row.find_element(By.CSS_SELECTOR, ".place-secondary").text
                total_time = row.find_element(By.CSS_SELECTOR, ".type-time").text.replace("Total", "").strip()
                age_group = row.find_element(By.CSS_SELECTOR, ".type-age_class").text.replace("Age Group", "").strip()

                if is_doubles:
                    members = row.find_elements(By.CSS_SELECTOR, ".type-relay_member a")
                    member_names = " & ".join([m.text for m in members])
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, member_names, "",
                        age_group, total_time
                    ])
                else:
                    name = row.find_element(By.CSS_SELECTOR, "h4.type-fullname").text
                    nation = row.find_element(By.CSS_SELECTOR, ".nation__abbr").text
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, name, nation,
                        age_group, total_time
                    ])
                scraped_any = True
            except:
                continue

        print(f"[{race_name}] Page {page_number} scraped for {division} - {gender_label}")
        page_number += 1

        # Go to next page
        try:
            next_button = driver.find_element(By.XPATH, "//a[text()='>']")
            driver.execute_script("arguments[0].click();", next_button)
            human_pause(3, 6)
        except NoSuchElementException:
            break

    return scraped_any

# ==========================
# RACE SCRAPER (THREAD TARGET)
# ==========================

def scrape_race(season, base_url, race_name, race_value):
    driver = create_driver()
    season_folder = os.path.join(SAVE_ROOT, season)
    os.makedirs(season_folder, exist_ok=True)

    safe_name = race_name.replace(" ", "_").replace("/", "-")
    print(f"\n[{race_name}] Starting race scraping")

    driver.get(base_url)
    human_pause(4, 6)
    Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
    human_pause(2,4)
    select_division = Select(driver.find_element(By.ID, "default-lists-event"))
    available = [o.text for o in select_division.options]

    def build_priority(base_name):
        priority = []
        if base_name in available:
            priority.append(base_name)
        overall = f"{base_name} - Overall"
        if overall in available:
            priority.append(overall)
        other = [d for d in available if d.startswith(f"{base_name} -") and d not in priority]
        priority.extend(other)
        return priority

    def process_group(base_name):
        priority_list = build_priority(base_name)
        if not priority_list:
            print(f"[{race_name}] No {base_name} divisions available")
            return

        for div in priority_list:
            div_safe = div.replace(" ", "_").replace("/", "-")
            file_path = os.path.join(season_folder, f"{safe_name}_{div_safe}.csv")
            if os.path.exists(file_path):
                print(f"[{race_name}] {div} already scraped")
                continue

            print(f"[{race_name}] Trying {div}")
            is_doubles = "DOUBLES" in div.upper()
            genders = [("M","Male"),("W","Women")] if not is_doubles else [("M","Male"),("W","Female"),("X","Mixed")]

            division_results = []
            division_has_data = False

            driver.get(base_url)
            human_pause(3,6)
            Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
            human_pause(2,4)
            Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
            human_pause(2,4)

            try:
                gender_dropdown = Select(driver.find_element(By.ID, "default-lists-sex"))
                available_gender_values = [o.get_attribute("value") for o in gender_dropdown.options]
            except NoSuchElementException:
                available_gender_values = []

            valid_genders = [(code, label) for code, label in genders if code in available_gender_values]
            if not valid_genders:
                print(f"[{race_name}] {div} has no gender categories")
                continue

            for idx, (gender_code, gender_label) in enumerate(valid_genders):
                print(f"[{race_name}] Scraping gender: {gender_label} in {div}")
                driver.get(base_url)
                human_pause(3,6)
                Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
                human_pause(2,4)
                Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
                human_pause(2,4)
                Select(driver.find_element(By.ID, "default-lists-sex")).select_by_value(gender_code)
                Select(driver.find_element(By.ID, "default-num_results")).select_by_value("100")
                human_pause(2,4)
                driver.find_element(By.ID, "default-submit").click()
                human_pause(5,10)

                has_data = scrape_pages(driver, race_name, div, gender_label, division_results)
                if idx == 0 and not has_data:
                    print(f"[{race_name}] {div} had NO DATA")
                    division_has_data = False
                    break
                if has_data:
                    division_has_data = True

            if division_has_data:
                df = pd.DataFrame(
                    division_results,
                    columns=["Race","Division","Gender",
                             "Rank Overall","Rank Age Group",
                             "Name","Nation","Age Group","Total Time"]
                )
                df.to_csv(file_path,index=False)
                print(f"[{race_name}] Saved {len(df)} rows for {div}")

    process_group("HYROX PRO")
    process_group("HYROX PRO DOUBLES")

    driver.quit()
    return f"[{race_name}] Finished scraping"

# ==========================
# MAIN LOOP
# ==========================

all_tasks = []
for season, base_url in BASE_URLS.items():
    driver_main = create_driver()
    driver_main.get(base_url)
    human_pause(4,6)
    try:
        WebDriverWait(driver_main, 25).until(EC.presence_of_element_located((By.ID, "default-lists-event_main_group")))
    except TimeoutException:
        continue
    race_dropdown = Select(driver_main.find_element(By.ID, "default-lists-event_main_group"))
    races = [(race_dropdown.options[i].text, race_dropdown.options[i].get_attribute("value")) for i in range(len(race_dropdown.options))]
    driver_main.quit()
    for race_name, race_value in races:
        all_tasks.append((season, base_url, race_name, race_value))

# Threaded execution
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    futures = [executor.submit(scrape_race, *task) for task in all_tasks]
    for f in futures:
        print(f.result())

print("\nALL DONE.")


[2024 Incheon] Starting race scraping

[2024 Taipei] Starting race scraping

[2024 Madrid] Starting race scraping

[2024 Gainful Anaheim] Starting race scraping

[2024 Mexico City] Starting race scraping

[2024 Rimini] Starting race scraping

[2024 New York] Starting race scraping

[2024 Doha All Women's Race] Starting race scraping

[2024 Manchester] Starting race scraping

[2024 World Championships Nice] Starting race scraping

[2024 Doha] Starting race scraping

[2024 Malaga] Starting race scraping

[2024 Bordeaux] Starting race scraping

[2024 Sports Direct HYROX London] Starting race scraping

[2024 Berlin] Starting race scraping

[2024 Köln] Starting race scraping

[2024 Gdansk] Starting race scraping
[2024 Houston] Starting race scraping


[2024 Rotterdam] Starting race scraping

[2024 Copenhagen] Starting race scraping
[2024 New York] Trying HYROX PRO[2024 Doha All Women's Race] Trying HYROX PRO

[2024 Gainful Anaheim] Trying HYROX PRO
[2024 Bordeaux] Trying HYROX PRO
[2024 Ma

# Scrape 100 at a time, for all divisions available for all seasons available, even the empty ones.

In [None]:
import time
import random
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# ==========================
# CONFIG
# ==========================

BASE_URLS = {f"Season_{i}": f"https://results.hyrox.com/season-{i}/" for i in range(1, 9)}
SAVE_ROOT = r"Datasets\Hyrox"
MAX_THREADS = 100  # adjust for your system

def human_pause(a=2, b=5):
    time.sleep(random.uniform(a, b))

# ==========================
# SELENIUM DRIVER
# ==========================

def create_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_argument("--headless=new")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )
    return webdriver.Chrome(options=options)

# ==========================
# SCRAPE PAGES FUNCTION
# ==========================

def scrape_pages(driver, race_name, division, gender_label, race_results):
    is_doubles = "DOUBLES" in division.upper()
    page_number = 1

    while True:
        try:
            WebDriverWait(driver, 25).until(lambda d: 
                "There are currently no results available" in d.page_source
                or len(d.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")) > 1
            )
        except TimeoutException:
            print(f"[{race_name}] Timeout waiting for {division} - {gender_label}")
            return False

        # No results?
        try:
            no_result_elem = driver.find_element(By.XPATH,
                "//*[contains(text(),'There are currently no results available')]"
            )
            if no_result_elem.is_displayed():
                print(f"[{race_name}] No results for {division} - {gender_label}")
                return False
        except NoSuchElementException:
            pass

        rows = driver.find_elements(By.CSS_SELECTOR, "li.list-group-item.row")
        rows = [r for r in rows if "list-group-header" not in r.get_attribute("class")]

        if not rows:
            print(f"[{race_name}] No rows found for {division} - {gender_label}")
            return False

        scraped_any = False
        for row in rows:
            try:
                rank = row.find_element(By.CSS_SELECTOR, ".place-primary").text
                age_rank = row.find_element(By.CSS_SELECTOR, ".place-secondary").text
                total_time = row.find_element(By.CSS_SELECTOR, ".type-time").text.replace("Total", "").strip()
                age_group = row.find_element(By.CSS_SELECTOR, ".type-age_class").text.replace("Age Group", "").strip()

                if is_doubles:
                    members = row.find_elements(By.CSS_SELECTOR, ".type-relay_member a")
                    member_names = " & ".join([m.text for m in members])
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, member_names, "",
                        age_group, total_time
                    ])
                else:
                    name = row.find_element(By.CSS_SELECTOR, "h4.type-fullname").text
                    nation = row.find_element(By.CSS_SELECTOR, ".nation__abbr").text
                    race_results.append([
                        race_name, division, gender_label,
                        rank, age_rank, name, nation,
                        age_group, total_time
                    ])
                scraped_any = True
            except:
                continue

        print(f"[{race_name}] Page {page_number} scraped for {division} - {gender_label}")
        page_number += 1

        try:
            next_button = driver.find_element(By.XPATH, "//a[text()='>']")
            driver.execute_script("arguments[0].click();", next_button)
            human_pause(2,5)
        except NoSuchElementException:
            break

    return scraped_any

# ==========================
# SCRAPE RACE FUNCTION
# ==========================

def scrape_race(season, base_url, race_name, race_value):
    driver = create_driver()
    season_folder = os.path.join(SAVE_ROOT, season)
    os.makedirs(season_folder, exist_ok=True)

    safe_name = race_name.replace(" ", "_").replace("/", "-")
    print(f"\n[{race_name}] Starting scraping")

    driver.get(base_url)
    human_pause(2,5)
    Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
    human_pause(1,3)

    # Get all divisions
    select_division = Select(driver.find_element(By.ID, "default-lists-event"))
    divisions = [o.text for o in select_division.options]

    # Check if all division CSVs exist; if yes, skip the race entirely
    all_exist = True
    for div in divisions:
        div_safe = div.replace(" ", "_").replace("/", "-")
        file_path = os.path.join(season_folder, f"{safe_name}_{div_safe}.csv")
        if not os.path.exists(file_path):
            all_exist = False
            break
    if all_exist:
        print(f"[{race_name}] All division CSVs exist. Skipping race.")
        driver.quit()
        return f"[{race_name}] Skipped (all divisions exist)"

    for div in divisions:
        div_safe = div.replace(" ", "_").replace("/", "-")
        file_path = os.path.join(season_folder, f"{safe_name}_{div_safe}.csv")
        if os.path.exists(file_path):
            print(f"[{race_name}] {div} already exists, will check for data")
        
        print(f"[{race_name}] Scraping division: {div}")

        driver.get(base_url)
        human_pause(2,4)
        Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
        human_pause(1,2)
        Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
        human_pause(1,2)

        try:
            gender_dropdown = Select(driver.find_element(By.ID, "default-lists-sex"))
            genders = [(o.get_attribute("value"), o.text) for o in gender_dropdown.options]
        except NoSuchElementException:
            genders = [("", "All")]

        division_results = []
        division_has_data = False

        for gender_code, gender_label in genders:
            print(f"[{race_name}] Scraping gender: {gender_label} in {div}")

            driver.get(base_url)
            human_pause(2,4)
            Select(driver.find_element(By.ID, "default-lists-event_main_group")).select_by_value(race_value)
            human_pause(1,2)
            Select(driver.find_element(By.ID, "default-lists-event")).select_by_visible_text(div)
            human_pause(1,2)
            if gender_code:
                Select(driver.find_element(By.ID, "default-lists-sex")).select_by_value(gender_code)
            Select(driver.find_element(By.ID, "default-num_results")).select_by_value("100")
            human_pause(1,2)
            driver.find_element(By.ID, "default-submit").click()
            human_pause(2,4)

            has_data = scrape_pages(driver, race_name, div, gender_label, division_results)
            if has_data:
                division_has_data = True

        # Save CSV even if empty (no data)
        df = pd.DataFrame(
            division_results,
            columns=["Race","Division","Gender",
                     "Rank Overall","Rank Age Group",
                     "Name","Nation","Age Group","Total Time"]
        )
        df.to_csv(file_path, index=False)
        if division_has_data:
            print(f"[{race_name}] Saved {len(df)} rows for {div}")
        else:
            print(f"[{race_name}] Division {div} had no data. Empty CSV saved.")

    driver.quit()
    return f"[{race_name}] Finished scraping"

# ==========================
# MAIN EXECUTION
# ==========================

all_tasks = []

for season, base_url in BASE_URLS.items():
    driver_main = create_driver()
    driver_main.get(base_url)
    human_pause(2,4)
    try:
        WebDriverWait(driver_main, 20).until(EC.presence_of_element_located((By.ID, "default-lists-event_main_group")))
    except TimeoutException:
        driver_main.quit()
        continue

    race_dropdown = Select(driver_main.find_element(By.ID, "default-lists-event_main_group"))
    races = [(race_dropdown.options[i].text, race_dropdown.options[i].get_attribute("value")) for i in range(len(race_dropdown.options))]
    driver_main.quit()

    for race_name, race_value in races:
        all_tasks.append((season, base_url, race_name, race_value))

with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    futures = [executor.submit(scrape_race, *task) for task in all_tasks]
    for f in futures:
        print(f.result())

print("\nALL DONE.")