In [25]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

We need to fetch the third table from the page.

Let's write a function to scrape tables for both the years:

In [26]:
def scrape_election_data(url, csv_filename):
    # Set up the Selenium WebDriver
    driver = webdriver.Chrome()

    # Navigate to the page
    driver.get(url)

    # Use WebDriverWait to wait for all tables to load
    try:
        # Wait until the number of <table> tags is at least 3
        WebDriverWait(driver, 20).until(
            lambda d: len(d.find_elements(By.TAG_NAME, 'table')) >= 3  # Wait until at least 3 <table> tags are present
        )
        
        # Get the page source after JavaScript has run
        html = driver.page_source

        # Use pandas to read the HTML
        dfs = pd.read_html(html)

        # Print the number of tables found
        print(f"Number of tables found: {len(dfs)}")

        # Check if at least one table is found
        if len(dfs) > 2:
            # Extract the table of interest (index 2 as per your original code)
            election_data = dfs[2]

            # Save the DataFrame to a CSV file
            election_data.to_csv(csv_filename, index=False)
            print(f"Data saved to {csv_filename}")
            return election_data
        else:
            print("Not enough tables found to extract data.")
            
    except Exception as e:
        print(f"An error occurred: {e}")
    
    finally:
        # Close the WebDriver
        driver.quit()
    

In [27]:

# Example usage
url_2024 = "https://www.indiavotes.com/lok-sabha/2024/all-states/18/0"
csv_filename_2024 = '../data/raw/turnout_data_2024.csv'

df_2024 = scrape_election_data(url_2024, csv_filename_2024)
df_2024



  dfs = pd.read_html(html)


Number of tables found: 3
Data saved to ../data/raw/turnout_data_2024.csv


Unnamed: 0_level_0,#,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Unnamed: 0_level_1,#,PC Name,No,Type,State,Winning Candidate,Party,Electors,Votes,Turnout,Margin,Margin %
0,1,Adilabad,1,ST,Telangana,Godam Nagesh,Bharatiya Janta Party,1650175,1235597,74.9 %,90652.0,7.3%
1,2,Agra,18,SC,Uttar Pradesh [2000 Onwards],Prof S P Singh Baghel,Bharatiya Janta Party,2072685,1123779,54.2 %,271294.0,24.1%
2,3,Ahmadnagar,37,GEN,Maharashtra,Nilesh Dnyandev Lanke,Nationalist Congress Party – Sharadchandra Pawar,1981866,1325477,66.9 %,28929.0,2.2%
3,4,Ahmedabad East,7,GEN,Gujarat,Hasmukhbhai Patel (H.S.PATEL),Bharatiya Janta Party,2038162,1128339,55.4 %,461755.0,40.9%
4,5,Ahmedabad West,8,SC,Gujarat,Dineshbhai Makwana (ADVOCATE),Bharatiya Janta Party,1726987,966646,56.0 %,286437.0,29.6%
...,...,...,...,...,...,...,...,...,...,...,...,...
538,539,Wardha,8,GEN,Maharashtra,Amar Sharadrao Kale,Nationalist Congress Party – Sharadchandra Pawar,1682771,1095012,65.1 %,81648.0,7.5%
539,540,Wayanad,4,GEN,Kerala,Rahul Gandhi,Indian National Congress,1462423,1084653,74.2 %,364422.0,33.6%
540,541,West Delhi,6,GEN,Delhi [1977 Onwards],Kamaljeet Sehrawat,Bharatiya Janta Party,2587977,1524494,58.9 %,199013.0,13.1%
541,542,Yavatmal-Washi,14,GEN,Maharashtra,Sanjay Uttamrao Deshmukh,Shiv Sena (Uddhav Balasaheb Thackrey),1940916,1225530,63.1 %,94473.0,7.7%


In [28]:
# You can use the same function for another URL
url_2019 = "https://www.indiavotes.com/lok-sabha/2019/all-states/17/0"
csv_filename_2019 = '../data/raw/turnout_data_2019.csv'

df_2019 = scrape_election_data(url_2019, csv_filename_2019)
df_2019

  dfs = pd.read_html(html)


Number of tables found: 3
Data saved to ../data/raw/turnout_data_2019.csv


Unnamed: 0_level_0,#,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Unnamed: 0_level_1,#,PC Name,No,Type,State,Winning Candidate,Party,Electors,Votes,Turnout,Margin,Margin %
0,1,Adilabad,1,ST,Telangana,Soyam Bapu Rao,Bharatiya Janta Party,1382837,1063730,77.9 %,58560,5.5%
1,2,Agra,18,SC,Uttar Pradesh [2000 Onwards],Satyapal Singh Baghel,Bharatiya Janta Party,1866262,1145323,61.7 %,211546,18.5%
2,3,Ahmadnagar,37,GEN,Maharashtra,Dr. Sujay Radhakrishna Vikhepatil,Bharatiya Janta Party,1793677,1203797,67.3 %,281474,23.4%
3,4,Ahmedabad East,7,GEN,Gujarat,Patel Hasmukhbhai Somabhai,Bharatiya Janta Party,1713598,1116367,65.7 %,434330,38.9%
4,5,Ahmedabad West,8,SC,Gujarat,Dr. Kirit P. Solanki,Bharatiya Janta Party,1580673,997024,64.0 %,321546,32.3%
...,...,...,...,...,...,...,...,...,...,...,...,...
536,537,Wardha,8,GEN,Maharashtra,Ramdas Chandrabhanji Tadas,Bharatiya Janta Party,1679788,1072570,64.2 %,187191,17.5%
537,538,Wayanad,4,GEN,Kerala,Rahul Gandhi,Indian National Congress,1306141,1092197,83.8 %,431770,39.5%
538,539,West Delhi,6,GEN,Delhi [1977 Onwards],Sant Prasad Sinha,Bharatiya Janta Party,2039410,1441601,71.1 %,578486,40.1%
539,540,Yavatmal-Washi,14,GEN,Maharashtra,Bhavana Pundlikrao Gawali,Shiv Sena,1812059,1174220,65.0 %,117939,10.0%
