In [None]:
# ==============================================================================
# STEP 1: KAGGLE AUTH & PYTHON DEPENDENCIES
# ==============================================================================
print("--- Installing Python Dependencies ---")
!pip install -q selenium pandas kaggle

import os
import pandas as pd
import logging
from datetime import datetime
from kaggle_secrets import UserSecretsClient
from importlib import reload

# Force logging to be active so we see all messages
reload(logging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

print("\n--- Setting up Kaggle API Authentication ---")
api = None
try:
    user_secrets = UserSecretsClient()
    secret_value = user_secrets.get_secret("KAGGLE_JSON")
    kaggle_dir = os.path.expanduser('~/.kaggle')
    os.makedirs(kaggle_dir, exist_ok=True)
    kaggle_json_path = os.path.join(kaggle_dir, 'kaggle.json')
    with open(kaggle_json_path, 'w') as f: f.write(secret_value)
    os.chmod(kaggle_json_path, 600)
    
    from kaggle.api.kaggle_api_extended import KaggleApi
    api = KaggleApi()
    api.authenticate()
    print("Kagle API Authentication Successful.")
except Exception as e:
    logging.critical(f"FATAL: A critical error occurred during Kaggle setup. Error: {e}")
    raise

# ==============================================================================
# STEP 2: SYSTEM INSTALLATIONS (CHROME)
# ==============================================================================
print("\n--- Installing Google Chrome & ChromeDriver ---")
!sudo apt-get update > /dev/null
!sudo apt-get install -y wget gnupg > /dev/null
!wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
!sudo sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list'
!sudo apt-get update > /dev/null
!sudo apt-get install -y google-chrome-stable > /dev/null
!apt-get install -y chromium-chromedriver > /dev/null
!cp /usr/lib/chromium-browser/chromedriver /usr/bin &>/dev/null
print("--- Chrome & ChromeDriver Setup Complete ---")


# ==============================================================================
# STEP 3: YOUR PROVEN SCRAPER - UNCHANGED AND UNMODIFIED
# ==============================================================================
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

# YOUR WORKING SCRAPER FUNCTIONS - VERBATIM
def get_main_wnba_lines(driver):
    url = "https://www.pinnacle.com/en/basketball/matchups/"
    logging.info(f"Navigating to matchups page: {url}")
    driver.get(url)
    try:
        cookie_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler")))
        logging.info("Found and clicked the Accept button for cookies."); cookie_button.click(); time.sleep(2)
    except TimeoutException:
        logging.warning("Cookie banner not found or already handled.")
    all_games_summary = []
    try:
        logging.info("Searching for WNBA header row...")
        wnba_header_row_xpath = "//a[contains(@href, '/wnba/matchups/')]/ancestor::div[contains(@class, 'row-')]"
        wnba_header_row = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, wnba_header_row_xpath)))
        all_following_rows = wnba_header_row.find_elements(By.XPATH, "./following-sibling::div")
        logging.info(f"Scanning {len(all_following_rows)} rows after WNBA header...")
        for row in all_following_rows:
            row_class = row.get_attribute('class')
            if 'row-k9ktBvvTsJ' in row_class:
                try:
                    game = {}
                    link_tag = row.find_element(By.CSS_SELECTOR, "a[href*='/basketball/wnba/']")
                    teams = link_tag.find_elements(By.CSS_SELECTOR, "span.ellipsis.gameInfoLabel-EDDYv5xEfd")
                    game['team1'], game['team2'] = teams[0].text, teams[1].text
                    game['game_link'] = link_tag.get_attribute('href')
                    odds_groups = row.find_elements(By.CSS_SELECTOR, "div.buttons-j19Jlcwsi9")
                    def get_text(elements, index): return elements[index].text if index < len(elements) else 'N/A'
                    h_spans = odds_groups[0].find_elements(By.CSS_SELECTOR, "button span")
                    ml_spans = odds_groups[1].find_elements(By.CSS_SELECTOR, "span.price-r5BU0ynJha")
                    t_spans = odds_groups[2].find_elements(By.CSS_SELECTOR, "button span")
                    game.update({'team1_moneyline': get_text(ml_spans, 0), 'team2_moneyline': get_text(ml_spans, 1),'team1_spread': get_text(h_spans, 0), 'team1_spread_odds': get_text(h_spans, 1),'team2_spread': get_text(h_spans, 2), 'team2_spread_odds': get_text(h_spans, 3),'over_total': get_text(t_spans, 0), 'over_total_odds': get_text(t_spans, 1),'under_total': get_text(t_spans, 2), 'under_total_odds': get_text(t_spans, 3)})
                    all_games_summary.append(game)
                except (NoSuchElementException, IndexError): continue
            elif 'row-CTcjEjV6yK' in row_class:
                logging.info("Reached the next league's header. Stopping WNBA scan."); break
        return all_games_summary
    except TimeoutException:
        logging.error("Could not find the WNBA section on the main matchups page."); return []

def scrape_detailed_game_odds(driver, game_url):
    logging.info(f"Scraping detailed odds from: {game_url}"); driver.get(game_url)
    all_markets_data = []
    try:
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.marketGroups-HjCkfKkLNt"))); time.sleep(2)
        market_groups = driver.find_elements(By.CSS_SELECTOR, "div.marketGroup-wMlWprW2iC")
        for group in market_groups:
            market_title = group.find_element(By.CSS_SELECTOR, "span.titleText-BgvECQYfHf").text
            if not group.find_elements(By.CSS_SELECTOR, "ul[data-test-id]"):
                for btn in group.find_elements(By.CSS_SELECTOR, "button"):
                    parts = btn.text.split('\n')
                    if len(parts) == 2: all_markets_data.append({'Market': market_title, 'Selection': parts[0], 'Odds': parts[1]})
                continue
            headers = [h.text for h in group.find_elements(By.CSS_SELECTOR, "ul[data-test-id] > li")]
            button_rows = group.find_elements(By.CSS_SELECTOR, ".buttonRow-zWMLOGu5YB")
            for row in button_rows:
                buttons = row.find_elements(By.TAG_NAME, 'button')
                if len(buttons) == len(headers):
                    for i, btn in enumerate(buttons):
                        parts = btn.text.split('\n')
                        if len(parts) == 2:
                            selection_name = f"{headers[i]} {parts[0]}"
                            all_markets_data.append({'Market': market_title, 'Selection': selection_name, 'Odds': parts[1]})
    except TimeoutException:
        logging.error(f"Could not load market data for URL: {game_url}")
    return pd.DataFrame(all_markets_data)

# ==============================================================================
# STEP 4: MAIN DATA PIPELINE EXECUTION
# ==============================================================================
print("\n--- Starting Data Pipeline Execution ---")
if __name__ == "__main__" and api:
    DATASET_SLUG = "zachht/wnba-odds-history"
    WORKING_DIR = "/kaggle/working"
    MAIN_CSV_PATH = os.path.join(WORKING_DIR, "wnba_main_lines_history.csv")
    DETAILED_CSV_PATH = os.path.join(WORKING_DIR, "wnba_detailed_odds_history.csv")

    try:
        logging.info(f"Downloading existing dataset: {DATASET_SLUG}...")
        api.dataset_download_files(DATASET_SLUG, path=WORKING_DIR, unzip=True)
        old_main_df = pd.read_csv(MAIN_CSV_PATH)
        old_detailed_df = pd.read_csv(DETAILED_CSV_PATH)
        logging.info("Successfully loaded existing data.")
    except Exception as e:
        if "404" in str(e): logging.warning(f"Dataset '{DATASET_SLUG}' not found (404). Creating new files.")
        else: logging.warning(f"Could not read local files (Error: {e}). Starting with fresh history.")
        old_main_df, old_detailed_df = pd.DataFrame(), pd.DataFrame()
    
    driver = None
    try:
        # YOUR WORKING OPTIONS ARE NOW USED HERE, VERBATIM
        options = webdriver.ChromeOptions()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--window-size=1920,1080")
        options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36")
        
        driver = webdriver.Chrome(options=options)
        new_main_lines_data = get_main_wnba_lines(driver)
        
        if new_main_lines_data:
            logging.info(f"SUCCESS: Scraper found {len(new_main_lines_data)} game(s).")
            scrape_timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
            new_main_df = pd.DataFrame(new_main_lines_data)
            new_main_df['timestamp'] = scrape_timestamp
            combined_main_df = pd.concat([old_main_df, new_main_df], ignore_index=True)
            all_detailed_dfs = []
            for game in new_main_lines_data:
                detailed_df = scrape_detailed_game_odds(driver, game['game_link'])
                if not detailed_df.empty:
                    detailed_df['matchup'] = f"{game['team1']} vs {game['team2']}"
                    all_detailed_dfs.append(detailed_df)
            if all_detailed_dfs:
                new_detailed_df = pd.concat(all_detailed_dfs, ignore_index=True)
                new_detailed_df['timestamp'] = scrape_timestamp
                combined_detailed_df = pd.concat([old_detailed_df, new_detailed_df], ignore_index=True)
                logging.info(f"Saving combined data to {WORKING_DIR}...")
                combined_main_df.to_csv(MAIN_CSV_PATH, index=False)
                combined_detailed_df.to_csv(DETAILED_CSV_PATH, index=False)

                # --- START: ADD THESE LINES TO FIX THE ERROR ---
                import json
                metadata_path = os.path.join(WORKING_DIR, 'dataset-metadata.json')
                metadata = {
                  "title": "WNBA Odds History",  # Should match your dataset's title
                  "id": DATASET_SLUG,
                  "licenses": [{"name": "CC0-1.0"}]
                }
                with open(metadata_path, 'w') as f:
                    json.dump(metadata, f)
                # --- END: ADDED LINES ---
                
                version_note = f"Automated odds update from {scrape_timestamp} UTC, adding {len(new_main_lines_data)} games."
                logging.info("Pushing new dataset version to Kaggle...")
                # This line will now succeed
                api.dataset_create_version(folder=WORKING_DIR, version_notes=version_note, quiet=False, dir_mode='zip')
        else:
            logging.warning("Scraping finished, but no new WNBA games were found on the site.")
    except Exception as e:
        logging.error(f"An error occurred during the main scraping/uploading process: {e}", exc_info=True)
    finally:
        if driver: driver.quit(); logging.info("Selenium driver closed.")
else:
    logging.error("Kaggle API object was not created. Halting execution.")

print("\n--- Data Pipeline Execution Finished ---")

In [2]:
# ==============================================================================
# STEP 1: KAGGLE AUTH & PYTHON DEPENDENCIES
# ==============================================================================
print("--- Installing Python Dependencies ---")
!pip install -q selenium pandas kaggle

import os
import pandas as pd
import logging
import json
import re
from datetime import datetime
from kaggle_secrets import UserSecretsClient
from importlib import reload

# Force logging to be active so we see all messages
reload(logging)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

print("\n--- Setting up Kaggle API Authentication ---")
api = None
try:
    user_secrets = UserSecretsClient()
    secret_value = user_secrets.get_secret("KAGGLE_JSON")
    kaggle_dir = os.path.expanduser('~/.kaggle')
    os.makedirs(kaggle_dir, exist_ok=True)
    kaggle_json_path = os.path.join(kaggle_dir, 'kaggle.json')
    with open(kaggle_json_path, 'w') as f: f.write(secret_value)
    os.chmod(kaggle_json_path, 600)
    
    from kaggle.api.kaggle_api_extended import KaggleApi
    api = KaggleApi()
    api.authenticate()
    print("Kaggle API Authentication Successful.")
except Exception as e:
    logging.critical(f"FATAL: A critical error occurred during Kaggle setup. Error: {e}")
    raise

# ==============================================================================
# STEP 2: SYSTEM INSTALLATIONS (CHROME)
# ==============================================================================
print("\n--- Installing Google Chrome & ChromeDriver ---")
!sudo apt-get update > /dev/null
!sudo apt-get install -y wget gnupg > /dev/null
!wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
!sudo sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list'
!sudo apt-get update > /dev/null
!sudo apt-get install -y google-chrome-stable > /dev/null
!apt-get install -y chromium-chromedriver > /dev/null
!cp /usr/lib/chromium-browser/chromedriver /usr/bin &>/dev/null
print("--- Chrome & ChromeDriver Setup Complete ---")


# ==============================================================================
# STEP 3: SCRAPER FUNCTIONS USING YOUR PROVEN SELECTORS
# ==============================================================================
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

def get_all_leagues_and_games(driver):
    """
    Scrapes the main basketball page, discovering leagues and games using your proven selectors.
    """
    url = "https://www.pinnacle.com/en/basketball/matchups/"
    logging.info(f"Navigating to matchups page: {url}")
    driver.get(url)
    try:
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
        logging.info("Clicked the Accept button for cookies."); time.sleep(2)
    except TimeoutException:
        logging.warning("Cookie banner not found or already handled.")

    leagues_data = {}
    current_league_name = None

    try:
        # Wait until the content is loaded, identified by the first league header
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.row-CTcjEjV6yK"))
        )
        
        # Get all rows that are either league headers or game rows
        all_rows = driver.find_elements(By.CSS_SELECTOR, ".contentBlock.square > div[class*='row-']")
        logging.info(f"Found {len(all_rows)} total rows to process on the matchups page.")

        for row in all_rows:
            row_class = row.get_attribute('class')
            
            # YOUR SELECTOR FOR A LEAGUE HEADER
            if 'row-CTcjEjV6yK' in row_class:
                try:
                    league_name = row.find_element(By.CSS_SELECTOR, "a span").text.strip()
                    if league_name:
                        current_league_name = league_name
                        leagues_data[current_league_name] = []
                        logging.info(f"Discovered new league section: {current_league_name}")
                except NoSuchElementException:
                    continue 

            # YOUR SELECTOR FOR A GAME ROW
            elif 'row-k9ktBvvTsJ' in row_class and current_league_name:
                # THIS IS YOUR EXACT, UNMODIFIED SCRAPING LOGIC FOR A GAME ROW
                try:
                    game = {}
                    link_tag = row.find_element(By.CSS_SELECTOR, "a[href*='/basketball/']")
                    teams = link_tag.find_elements(By.CSS_SELECTOR, "span.ellipsis.gameInfoLabel-EDDYv5xEfd")
                    game['team1'], game['team2'] = teams[0].text, teams[1].text
                    game['game_link'] = link_tag.get_attribute('href')
                    
                    odds_groups = row.find_elements(By.CSS_SELECTOR, "div.buttons-j19Jlcwsi9")
                    def get_text(elements, index): return elements[index].text if index < len(elements) else 'N/A'
                    
                    h_spans = odds_groups[0].find_elements(By.CSS_SELECTOR, "button span")
                    ml_spans = odds_groups[1].find_elements(By.CSS_SELECTOR, "span.price-r5BU0ynJha")
                    t_spans = odds_groups[2].find_elements(By.CSS_SELECTOR, "button span")
                    
                    game.update({'team1_moneyline': get_text(ml_spans, 0), 'team2_moneyline': get_text(ml_spans, 1),'team1_spread': get_text(h_spans, 0), 'team1_spread_odds': get_text(h_spans, 1),'team2_spread': get_text(h_spans, 2), 'team2_spread_odds': get_text(h_spans, 3),'over_total': get_text(t_spans, 0), 'over_total_odds': get_text(t_spans, 1),'under_total': get_text(t_spans, 2), 'under_total_odds': get_text(t_spans, 3)})
                    
                    leagues_data[current_league_name].append(game)
                except (NoSuchElementException, IndexError):
                    continue
    except TimeoutException:
        logging.error("Could not find any content rows on the matchups page.")
    
    return leagues_data

# YOUR DETAILED SCRAPER IS PERFECT AND UNCHANGED
def scrape_detailed_game_odds(driver, game_url):
    logging.info(f"Scraping detailed odds from: {game_url}"); driver.get(game_url)
    all_markets_data = []
    try:
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.marketGroups-HjCkfKkLNt"))); time.sleep(2)
        market_groups = driver.find_elements(By.CSS_SELECTOR, "div.marketGroup-wMlWprW2iC")
        for group in market_groups:
            market_title = group.find_element(By.CSS_SELECTOR, "span.titleText-BgvECQYfHf").text
            if not group.find_elements(By.CSS_SELECTOR, "ul[data-test-id]"):
                for btn in group.find_elements(By.CSS_SELECTOR, "button"):
                    parts = btn.text.split('\n')
                    if len(parts) == 2: all_markets_data.append({'Market': market_title, 'Selection': parts[0], 'Odds': parts[1]})
                continue
            headers = [h.text for h in group.find_elements(By.CSS_SELECTOR, "ul[data-test-id] > li")]
            button_rows = group.find_elements(By.CSS_SELECTOR, ".buttonRow-zWMLOGu5YB")
            for row in button_rows:
                buttons = row.find_elements(By.TAG_NAME, 'button')
                if len(buttons) == len(headers):
                    for i, btn in enumerate(buttons):
                        parts = btn.text.split('\n')
                        if len(parts) == 2:
                            selection_name = f"{headers[i]} {parts[0]}"
                            all_markets_data.append({'Market': market_title, 'Selection': selection_name, 'Odds': parts[1]})
    except TimeoutException:
        logging.error(f"Could not load market data for URL: {game_url}")
    return pd.DataFrame(all_markets_data)

def to_slug(name):
    """Converts a league name like 'WNBA Summer League' to 'wnba_summer_league' for filenames."""
    return re.sub(r'[^a-z0-9]+', '_', name.lower()).strip('_')

# ==============================================================================
# STEP 4: MAIN DATA PIPELINE EXECUTION
# ==============================================================================
print("\n--- Starting Data Pipeline Execution ---")
if __name__ == "__main__" and api:
    DATASET_SLUG = "zachht/wnba-odds-history" 
    WORKING_DIR = "/kaggle/working"
    
    driver = None
    leagues_updated = []
    try:
        options = webdriver.ChromeOptions()
        options.add_argument("--headless"); options.add_argument("--no-sandbox"); options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--window-size=1920,1080")
        options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36")
        driver = webdriver.Chrome(options=options)
        
        all_leagues_games = get_all_leagues_and_games(driver)

        if not all_leagues_games:
            logging.warning("Scraping finished, but no leagues were found on the site.")
        else:
            for league_name, new_main_lines_data in all_leagues_games.items():
                if not new_main_lines_data:
                    logging.info(f"No games found for league: {league_name}. Skipping.")
                    continue

                logging.info(f"\n--- Processing League: {league_name} ({len(new_main_lines_data)} games found) ---")
                leagues_updated.append(league_name)
                league_slug = to_slug(league_name)

                MAIN_CSV_PATH = os.path.join(WORKING_DIR, f"{league_slug}_main_lines.csv")
                DETAILED_CSV_PATH = os.path.join(WORKING_DIR, f"{league_slug}_detailed_odds.csv")

                try:
                    logging.info(f"Downloading existing files for {league_name}...")
                    api.dataset_download_file(DATASET_SLUG, file_name=os.path.basename(MAIN_CSV_PATH), path=WORKING_DIR)
                    api.dataset_download_file(DATASET_SLUG, file_name=os.path.basename(DETAILED_CSV_PATH), path=WORKING_DIR)
                    old_main_df = pd.read_csv(MAIN_CSV_PATH)
                    old_detailed_df = pd.read_csv(DETAILED_CSV_PATH)
                    logging.info("Successfully loaded existing data.")
                except Exception:
                    logging.warning(f"Could not load existing data for {league_name}. Starting with fresh history files.")
                    old_main_df, old_detailed_df = pd.DataFrame(), pd.DataFrame()

                scrape_timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
                new_main_df = pd.DataFrame(new_main_lines_data)
                new_main_df['timestamp'] = scrape_timestamp
                combined_main_df = pd.concat([old_main_df, new_main_df], ignore_index=True)
                
                all_detailed_dfs = []
                for game in new_main_lines_data:
                    detailed_df = scrape_detailed_game_odds(driver, game['game_link'])
                    if not detailed_df.empty:
                        detailed_df['matchup'] = f"{game['team1']} vs {game['team2']}"
                        all_detailed_dfs.append(detailed_df)
                
                if all_detailed_dfs:
                    new_detailed_df = pd.concat(all_detailed_dfs, ignore_index=True)
                    new_detailed_df['timestamp'] = scrape_timestamp
                    combined_detailed_df = pd.concat([old_detailed_df, new_detailed_df], ignore_index=True)
                    
                    logging.info(f"Saving combined data to local CSVs for {league_name}...")
                    combined_main_df.to_csv(MAIN_CSV_PATH, index=False)
                    combined_detailed_df.to_csv(DETAILED_CSV_PATH, index=False)
            
            if leagues_updated:
                logging.info("\n--- Finalizing and Uploading to Kaggle ---")
                metadata_path = os.path.join(WORKING_DIR, 'dataset-metadata.json')
                metadata = {"title": "Pinnacle Basketball Odds History", "id": DATASET_SLUG, "licenses": [{"name": "CC0-1.0"}]}
                with open(metadata_path, 'w') as f: json.dump(metadata, f)
                
                version_note = f"Automated odds update. Leagues updated: {', '.join(leagues_updated)}."
                logging.info(f"Pushing new dataset version. {version_note}")
                api.dataset_create_version(folder=WORKING_DIR, version_notes=version_note, quiet=False, dir_mode='zip')
            else:
                logging.warning("No games were found for any leagues. No new version will be pushed.")

    except Exception as e:
        logging.error(f"An error occurred during the main pipeline: {e}", exc_info=True)
    finally:
        if driver: driver.quit(); logging.info("Selenium driver closed.")

print("\n--- Data Pipeline Execution Finished ---")

--- Installing Python Dependencies ---

--- Setting up Kaggle API Authentication ---
Kaggle API Authentication Successful.

--- Installing Google Chrome & ChromeDriver ---
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
OK
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
--- Chrome & ChromeDriver Setup Complete ---

--- Starting Data Pipeline Execution ---


2025-08-24 20:39:03,596 - INFO - Navigating to matchups page: https://www.pinnacle.com/en/basketball/matchups/
2025-08-24 20:39:14,578 - INFO - Found 30 total rows to process on the matchups page.
2025-08-24 20:39:14,621 - INFO - Discovered new league section: WNBA
2025-08-24 20:39:14,892 - INFO - Discovered new league section: BRAZIL - CAMPEONATO PAULISTA
2025-08-24 20:39:15,625 - INFO - Discovered new league section: BRAZIL - CAMP CARIOCA U19
2025-08-24 20:39:15,873 - INFO - Discovered new league section: FIBA - AMERICUP
2025-08-24 20:39:16,143 - INFO - Discovered new league section: MEXICO - LIGA NACIONAL DE BALONCESTO PROFESIONAL
2025-08-24 20:39:16,615 - INFO - Discovered new league section: BRAZIL - PAULISTA FPB U20
2025-08-24 20:39:16,900 - INFO - Discovered new league section: FIBA - AMERICUP
2025-08-24 20:39:17,594 - INFO - Discovered new league section: WORLD - CLUB FRIENDLIES
2025-08-24 20:39:17,846 - INFO - Discovered new league section: FIBA - AMERICUP
2025-08-24 20:39:18,

Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:39:25,445 - INFO - Saving combined data to local CSVs for WNBA...
2025-08-24 20:39:25,453 - INFO - 
--- Processing League: BRAZIL - CAMPEONATO PAULISTA (3 games found) ---
2025-08-24 20:39:25,453 - INFO - Downloading existing files for BRAZIL - CAMPEONATO PAULISTA...
2025-08-24 20:39:25,540 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/sao-jose-vs-paulistano/1613776437/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:39:45,923 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/sao-jose-vs-paulistano/1613776437/
2025-08-24 20:39:45,925 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/franca-basquetbol-clube-vs-corinthians-paulista/1613776438/
2025-08-24 20:40:06,211 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/franca-basquetbol-clube-vs-corinthians-paulista/1613776438/
2025-08-24 20:40:06,213 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/bauru-basket-sp-vs-liga-sorocabana/1613973422/
2025-08-24 20:40:26,575 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/brazil-campeonato-paulista/bauru-basket-sp-vs-liga-sorocabana/1613973422/
2025-08-24 20:40:26,576 - INFO - 
--- Processing League: BRAZIL - CAMP CARIOCA U19 (1 g

Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:40:47,174 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/brazil-camp-carioca-u19/escolinha-de-esportes-passo-zero-vs-jacarepagua/1613967630/
2025-08-24 20:40:47,176 - INFO - 
--- Processing League: FIBA - AMERICUP (1 games found) ---
2025-08-24 20:40:47,176 - INFO - Downloading existing files for FIBA - AMERICUP...
2025-08-24 20:40:47,279 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-americup/canada-vs-puerto-rico/1613591100/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:41:07,813 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/fiba-americup/canada-vs-puerto-rico/1613591100/
2025-08-24 20:41:07,814 - INFO - 
--- Processing League: MEXICO - LIGA NACIONAL DE BALONCESTO PROFESIONAL (2 games found) ---
2025-08-24 20:41:07,815 - INFO - Downloading existing files for MEXICO - LIGA NACIONAL DE BALONCESTO PROFESIONAL...
2025-08-24 20:41:07,896 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/mexico-liga-nacional-de-baloncesto-profesional/fuerza-regia-de-monterrey-vs-mineros-zacatecas/1613796679/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:41:13,162 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/mexico-liga-nacional-de-baloncesto-profesional/diablos-rojos-vs-abejas-de-leon/1613797005/
2025-08-24 20:41:18,273 - INFO - Saving combined data to local CSVs for MEXICO - LIGA NACIONAL DE BALONCESTO PROFESIONAL...
2025-08-24 20:41:18,276 - INFO - 
--- Processing League: BRAZIL - PAULISTA FPB U20 (1 games found) ---
2025-08-24 20:41:18,277 - INFO - Downloading existing files for BRAZIL - PAULISTA FPB U20...
2025-08-24 20:41:18,378 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/brazil-paulista-fpb-u20/sao-jose-basketball-vs-paulistano/1613870162/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:41:38,815 - ERROR - Could not load market data for URL: https://www.pinnacle.com/en/basketball/brazil-paulista-fpb-u20/sao-jose-basketball-vs-paulistano/1613870162/
2025-08-24 20:41:38,817 - INFO - 
--- Processing League: WORLD - CLUB FRIENDLIES (1 games found) ---
2025-08-24 20:41:38,818 - INFO - Downloading existing files for WORLD - CLUB FRIENDLIES...
2025-08-24 20:41:38,911 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/world-club-friendlies/changwon-lg-sakers-vs-up-fighting-maroons/1614058987/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:41:43,904 - INFO - Saving combined data to local CSVs for WORLD - CLUB FRIENDLIES...
2025-08-24 20:41:43,907 - INFO - 
--- Processing League: FIBA - EUROBASKET (6 games found) ---
2025-08-24 20:41:43,908 - INFO - Downloading existing files for FIBA - EUROBASKET...
2025-08-24 20:41:43,997 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/great-britain-vs-lithuania/1613559800/


Dataset URL: https://www.kaggle.com/datasets/zachht/wnba-odds-history


2025-08-24 20:41:48,777 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/czech-republic-vs-portugal/1613551688/
2025-08-24 20:41:53,370 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/montenegro-vs-germany/1613559801/
2025-08-24 20:41:57,867 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/latvia-vs-turkiye/1613551689/
2025-08-24 20:42:16,636 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/sweden-vs-finland/1613551690/
2025-08-24 20:42:21,544 - INFO - Scraping detailed odds from: https://www.pinnacle.com/en/basketball/fiba-eurobasket/serbia-vs-estonia/1613561299/
2025-08-24 20:42:26,164 - INFO - Saving combined data to local CSVs for FIBA - EUROBASKET...
2025-08-24 20:42:26,167 - INFO - 
--- Finalizing and Uploading to Kaggle ---
2025-08-24 20:42:26,168 - INFO - Pushing new dataset version. Automated odds updat

Starting upload for file mexico_liga_nacional_de_baloncesto_profesional_detailed_odds.csv


100%|██████████| 8.96k/8.96k [00:00<00:00, 37.8kB/s]


Upload successful: mexico_liga_nacional_de_baloncesto_profesional_detailed_odds.csv (9KB)
Starting upload for file .virtual_documents.zip


100%|██████████| 22.0/22.0 [00:00<00:00, 117B/s]


Upload successful: .virtual_documents.zip (22B)
Starting upload for file fiba_eurobasket_main_lines.csv


100%|██████████| 1.26k/1.26k [00:00<00:00, 7.04kB/s]


Upload successful: fiba_eurobasket_main_lines.csv (1KB)
Starting upload for file world_club_friendlies_detailed_odds.csv


100%|██████████| 4.63k/4.63k [00:00<00:00, 26.6kB/s]


Upload successful: world_club_friendlies_detailed_odds.csv (5KB)
Starting upload for file world_club_friendlies_main_lines.csv


100%|██████████| 414/414 [00:00<00:00, 2.30kB/s]


Upload successful: world_club_friendlies_main_lines.csv (414B)
Starting upload for file fiba_eurobasket_detailed_odds.csv


100%|██████████| 2.75k/2.75k [00:00<00:00, 14.5kB/s]


Upload successful: fiba_eurobasket_detailed_odds.csv (3KB)
Starting upload for file wnba_detailed_odds.csv


100%|██████████| 995/995 [00:00<00:00, 5.56kB/s]


Upload successful: wnba_detailed_odds.csv (995B)
Starting upload for file wnba_main_lines.csv


100%|██████████| 379/379 [00:00<00:00, 2.10kB/s]


Upload successful: wnba_main_lines.csv (379B)
Starting upload for file mexico_liga_nacional_de_baloncesto_profesional_main_lines.csv


100%|██████████| 684/684 [00:00<00:00, 3.67kB/s]


Upload successful: mexico_liga_nacional_de_baloncesto_profesional_main_lines.csv (684B)


2025-08-24 20:42:30,832 - INFO - Selenium driver closed.



--- Data Pipeline Execution Finished ---


SyntaxError: invalid syntax (2994937114.py, line 1)