In [143]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("ksrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("ksrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()


Navigating to page 2
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Bangalore to Kozhikode
Successfully completed data extraction for route: Kozhikode to Ernakulam
Successfully completed data extraction for route: Kozhikode to Bangalore
Successfully completed data extraction for route: Ernakulam to Kozhikode
Successfully completed data extraction for route: Kozhikode to Thrissur
Successfully completed data extraction for route: Kozhikode to Thiruvananthapuram
Successfully completed data extraction for route: Thrissur to Kozhikode
Successfully completed data extraction for route: Bangalore to Kalpetta (kerala)
Successfully completed data extraction for route: Kalpetta (kerala) to Bangalore
Successfully completed data extraction for route: Kottayam to Kozhikode
Successfully completed data extraction for route: Thiruvananthapuram to Kozhikode
Successfully completed data extraction for route: Kannur 

In [137]:
# path=r"D:/files/project/ksrtc1.csv"

# df_buses.to_csv(path,index=False)

In [139]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL ="https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile "

# Initialize WebDriver
def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

# Load the page
def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))

        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)
        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# Main scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("APsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # Now scrape detailed information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]

        # Open the route link
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # Handle the 'book now' button click if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating
        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)
        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")
        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)

        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("APsrtc_bus_details1.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()


Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Kakinada to Visakhapatnam
Successfully completed data extraction for route: Chittoor (Andhra Pradesh) to Bangalore
Successfully completed data extraction for route: Tirupati to Bangalore
Successfully completed data extraction for route: Hyderabad to Ongole
Successfully completed data extraction for route: Ongole to Hyderabad
Successfully completed data extraction for route: Visakhapatnam to Kakinada
Successfully completed data extraction for route: Kadiri to Bangalore
Successfully completed data extraction for route: Vinukonda to Hyderabad
Successfully completed data extraction for route: Bangalore to Kadiri
Successfully completed data extraction for route: Hyderabad to Narasaraopet
Successfully completed data extraction for route: Madanapalli to Bangalore
Successfull

In [145]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Tsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Tsrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()


Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Khammam to Hyderabad
Successfully completed data extraction for route: Hyderabad to Khammam
Successfully completed data extraction for route: Karimnagar to Hyderabad
Successfully completed data extraction for route: Hyderabad to Adilabad
Successfully completed data extraction for route: Kothagudem to Hyderabad
Successfully completed data extraction for route: Hyderabad to Mancherial
Successfully completed data extraction for route: Guntur (Andhra Pradesh) to Hyderabad
Successfully completed data extraction for route: Godavarikhani to Hyderabad
Successfully completed data extraction for route: Kodad to Hyderabad
Successfully completed data extraction for route: Hyderabad to Ongole
Successfully completed data extraction for route: Jagityal to Hyderabad
Successfully completed data extraction for route: Hyderabad

In [147]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Rsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Rsrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Jodhpur to Ajmer
Successfully completed data extraction for route: Beawar (Rajasthan) to Jaipur (Rajasthan)
Successfully completed data extraction for route: Udaipur to Jodhpur
Successfully completed data extraction for route: Jaipur (Rajasthan) to Jodhpur
Successfully completed data extraction for route: Sikar to Jaipur (Rajasthan)
Successfully completed data extraction for route: Aligarh (uttar pradesh) to Jaipur (Rajasthan)
Successfully completed data extraction for route: Kota(Rajasthan) to Jaipur (Rajasthan)
Successfully completed data extraction for route: Jaipur (Rajasthan) to Aligarh (uttar pradesh)
Successfully completed data extraction for route: Kishangarh to Jaipur (Rajasthan)
Successfully completed data extraction for route: Sikar to Bikaner
Successfully completed data extraction for route: Jodhpur to Beawar (Rajasth

In [149]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("SBsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("SBsrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Durgapur (West Bengal) to Kolkata
Successfully completed data extraction for route: Kolkata to Burdwan
Successfully completed data extraction for route: Kolkata to Durgapur (West Bengal)
Successfully completed data extraction for route: Haldia to Kolkata
Successfully completed data extraction for route: Kolkata to Haldia
Successfully completed data extraction for route: Midnapore to Kolkata
Successfully completed data extraction for route: Kolkata to Arambagh (West Bengal)
Successfully completed data extraction for route: Kolkata to Digha
Successfully completed data extraction for route: Digha to Kolkata
Successfully completed data extraction for route: Kolkata to Bankura
Successfully completed data extraction for route: Kolkata to Asansol (West Bengal)
Successfully c

In [151]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Hrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Hrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Delhi to Shimla
Successfully completed data extraction for route: Hamirpur (Himachal Pradesh) to Chandigarh
Successfully completed data extraction for route: Chandigarh to Hamirpur (Himachal Pradesh)
Successfully completed data extraction for route: Shimla to Delhi
Successfully completed data extraction for route: Hamirpur (Himachal Pradesh) to Delhi
Successfully completed data extraction for route: Delhi to Hamirpur (Himachal Pradesh)
Successfully completed data extraction for route: Kangra to Chandigarh
Successfully completed data extraction for route: Delhi to Chandigarh
Successfully completed data extraction for route: Palampur to Chandigarh
Successfully completed data extraction for route: Dharamshala (Himachal Pradesh) to Chandigarh
Successfully completed data extraction for route: 

In [153]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("UPsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("UPsrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Delhi to Bareilly (Uttar Pradesh)
Successfully completed data extraction for route: Delhi to Lucknow
Successfully completed data extraction for route: Bareilly (Uttar Pradesh) to Delhi
Successfully completed data extraction for route: Lucknow to Gorakhpur (uttar pradesh)
Successfully completed data extraction for route: Delhi to Aligarh (uttar pradesh)
Successfully completed data extraction for route: Lucknow to Delhi
Successfully completed data extraction for route: Delhi to Farrukhabad (Uttar Pradesh)
Successfully completed data extraction for route: Delhi to Sitapur (Uttar Pradesh)
Successfully completed data extraction for route: Delhi to Gorakhpur (uttar pradesh)
Successfully completed data extraction for route: Farrukhabad (Uttar Pradesh) to Delhi
Successfully c

In [163]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/bihar-state-road-transport-corporation-bsrtc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Bsrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Bsrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Patna (Bihar) to Motihari
Successfully completed data extraction for route: Bettiah to Patna (Bihar)
Successfully completed data extraction for route: Delhi to Motihari
Successfully completed data extraction for route: Motihari to Agra
Successfully completed data extraction for route: Agra to Motihari
Bus details saved to bus_details.csv


In [165]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Astc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Astc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
Navigating to page 5
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Tezpur to Guwahati
Successfully completed data extraction for route: Guwahati to Tezpur
Successfully completed data extraction for route: Nagaon (Assam) to Guwahati
Successfully completed data extraction for route: Guwahati to Nagaon (Assam)
Successfully completed data extraction for route: Jorhat to Tinsukia
Successfully completed data extraction for route: Tinsukia to Jorhat
Successfully completed data extraction for route: Guwahati to Kaliabor
Bus details saved to bus_details.csv


In [167]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("WBtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("WBtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
Navigating to page 4
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Digha to Barasat (West Bengal)
Successfully completed data extraction for route: Durgapur (West Bengal) to Kolkata
Successfully completed data extraction for route: Digha to Kolkata
Successfully completed data extraction for route: Barasat (West Bengal) to Digha
Successfully completed data extraction for route: Kolkata to Durgapur (West Bengal)
Successfully completed data extraction for route: Kolkata to Digha
Successfully completed data extraction for route: Barasat (West Bengal) to Contai (Kanthi)
Successfully completed data extraction for route: Barasat (West Bengal) to Nandakumar (west bengal)
Successfully completed data extraction for route: Barasat (West Bengal) to Kolaghat
Successfully completed data extraction for route: Kolkata to Suri
Successfully completed data extraction for r

In [169]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://redbus.in/online-booking/pepsu/?utm_source=rtchometile"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("Psrtc_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("Psrtc_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Patiala to Delhi
Successfully completed data extraction for route: Ludhiana to Delhi
Successfully completed data extraction for route: Delhi to Ludhiana
Successfully completed data extraction for route: Ludhiana to Delhi Airport
Successfully completed data extraction for route: Delhi Airport to Patiala
Successfully completed data extraction for route: Chandigarh to Patiala
Successfully completed data extraction for route: Jalandhar to Delhi
Successfully completed data extraction for route: Delhi Airport to Ludhiana
Successfully completed data extraction for route: Jalandhar to Delhi Airport
Successfully completed data extraction for route: Phagwara to Delhi
Successfully completed data extraction for route: Delhi Airport to Jalandhar
Successfully completed data extraction for route: Delhi to Amritsar
Successfu

In [188]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

def initialize_driver():
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  

# Scrape bus routes
def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements]
        bus_routes_name = [route.text.strip() for route in route_elements]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error scraping bus routes: {e}")
        return [], []

# Handle pagination
def handle_pagination(driver, wait):
    try:
        active_page_element = driver.find_element(By.XPATH, "//div[@class='DC_117_pageTabs DC_117_pageActive']")
        active_page_number = active_page_element.text
        next_page_number = str(int(active_page_number) + 1)
        
        next_page_button_xpath = f"//div[@class='DC_117_paginationTable']//div[text()='{next_page_number}']"
        next_page_button = wait.until(EC.presence_of_element_located((By.XPATH, next_page_button_xpath)))
        driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
        time.sleep(1)
        
        try:
            next_page_button.click()
        except ElementNotInteractableException:
            driver.execute_script("arguments[0].click();", next_page_button)        
        print(f"Navigating to page {next_page_number}")
        time.sleep(10) 
        return True  
    
    except (NoSuchElementException, TimeoutException):
        print("No more pages to paginate or pagination element not found.")
        return False  

# scraping function
def main():
    driver = initialize_driver()
    wait = WebDriverWait(driver, 10)
    load_page(driver, URL)

    all_routes_name = []
    all_routes_link = []

    while True:
        # Scrape routes on the current page
        bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
        all_routes_name.extend(bus_routes_name)
        all_routes_link.extend(bus_routes_link)
        
        # Handle pagination
        if not handle_pagination(driver, wait):
            break  

    # Save route data to CSV
    df_routes = pd.DataFrame({"Route_name": all_routes_name, "Route_link": all_routes_link})
    df_routes.to_csv("CTU_bus_routes.csv", index=False)
    print("Data saved to bus_routes.csv")
    
    # scrape information for each route
    bus_details = []

    for i, row in df_routes.iterrows():
        link = row["Route_link"]
        routes = row["Route_name"]
        
        driver.get(link)
        time.sleep(2)
        
        # Click on all the route elements
        elements = driver.find_elements(By.XPATH, f"//a[contains(@href, '{link}')]")
        for element in elements:
            element.click()
            time.sleep(2)

        # click 'view buses' button if available
        try:
            clicks = driver.find_element(By.XPATH, "//div[@class='button']")
            clicks.click()
        except:
            continue
        time.sleep(2)

        # Scroll until page content stops updating        
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)        

        # Extract bus details
        bus_name = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_type = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_time = driver.find_elements(By.XPATH, "//*[@class='dp-time f-19 d-color f-bold']")
        reaching_time = driver.find_elements(By.XPATH, "//*[@class='bp-time f-19 d-color disp-Inline']")
        total_duration = driver.find_elements(By.XPATH, "//*[@class='dur l-color lh-24']")
        star_rating = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price = driver.find_elements(By.XPATH, '//div[@class="fare d-block"]//span')
        seat_availability = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")        
        
        # Collect bus details
        for i in range(len(bus_name)):
            bus_detail = {
                "Route_Name": routes,
                "Route_Link": link,
                "Bus_Name": bus_name[i].text if i < len(bus_name) else 'N/A',
                "Bus_Type": bus_type[i].text if i < len(bus_type) else 'N/A',
                "Departing_Time": departing_time[i].text if i < len(departing_time) else 'N/A',
                "Duration": total_duration[i].text if i < len(total_duration) else 'N/A',
                "Reaching_Time": reaching_time[i].text if i < len(reaching_time) else 'N/A',
                "Star_Rating": star_rating[i].text if i < len(star_rating) else '0',
                "Price": price[i].text if i < len(price) else 'N/A',
                "Seat_Availability": seat_availability[i].text if i < len(seat_availability) else '0'
            }
            bus_details.append(bus_detail)
        print(f"Successfully completed data extraction for route: {routes}")

    # Save detailed bus data to CSV
    df_buses = pd.DataFrame(bus_details)
    df_buses.to_csv("CTU_bus_details.csv", index=False)
    print("Bus details saved to bus_details.csv")

    driver.quit()

if __name__ == "__main__":
    main()

Navigating to page 2
Navigating to page 3
No more pages to paginate or pagination element not found.
Data saved to bus_routes.csv
Successfully completed data extraction for route: Delhi to Chandigarh
Successfully completed data extraction for route: Chandigarh to Delhi
Successfully completed data extraction for route: Yamuna Nagar to Chandigarh
Successfully completed data extraction for route: Ludhiana to Chandigarh
Successfully completed data extraction for route: Chandigarh to Yamuna Nagar
Successfully completed data extraction for route: Chandigarh to Baijnath
Successfully completed data extraction for route: Hamirpur (Himachal Pradesh) to Chandigarh
Successfully completed data extraction for route: Chandigarh to Ludhiana
Successfully completed data extraction for route: Chandigarh to Dehradun
Successfully completed data extraction for route: Chandigarh to Pathankot
Successfully completed data extraction for route: Dehradun to Chandigarh
Successfully completed data extraction for ro