In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
import time
import mysql.connector
import streamlit as st
import pandas as pd

# Initialize the Chrome driver
def initialize_driver():
    # Set up Chrome options
    chrome_options = Options()
    # chrome_options.add_argument("--headless=new")
    # chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    # chrome_options.add_experimental_option("useAutomationExtension", False)
    # chrome_options.add_argument("--disable-extensions")  # Disable extensions
    # chrome_options.add_argument("--incognito")
    # chrome_options.add_argument("--disable-web-security")
    # chrome_options.add_argument("--ignore-certificate-errors")
    # chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
    
    # Block browser notifications by setting preferences
    prefs = {"profile.default_content_setting_values.notifications": 2}
    chrome_options.add_experimental_option("prefs", prefs)
    
    # Use ChromeDriverManager to auto-detect the ChromeDriver version that matches the installed Chrome
    service = Service(ChromeDriverManager().install())
    
    # Initialize Chrome WebDriver with the options
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    driver.maximize_window()
    return driver

# Launch RedBus URL
def launch_redbus(driver):
    redbus_url = "https://www.redbus.in/"
    driver.set_page_load_timeout(30)
    driver.get(redbus_url)
    WebDriverWait(driver, 10).until(lambda d: d.execute_script("return document.readyState") == "complete")


def store_xpath():
    # Storing the XPaths in a dictionary
    xpath_dict = {
        'apsrtc_click': "//*[@id='Carousel']/div[2]/div[1]/div[2]",
        'krstc_click': "//*[@id='Carousel']/div[3]/div[1]/div[2]",
        'route_clcik': "//div[@id='root']/div/div[4]/div[2]/div[1]/a",
        'viewbus_click': "//*[@id='result-section']/div[1]/div/div[2]/div/div[4]/div[2]",
        'orgin_place': "//div[@class='h2-tag-seo']/span[1]",
        'destination_place': "//div[@class='h2-tag-seo']/span[2]",
        'busname_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[1]/div[1]",
        'bustime_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[2]/div[1]",
        'duration_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[3]/div[1]",
        'offboard_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[4]/div[1]",
        'rating_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[5]/div[1]",
        'price_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[6]/div[1]/div[@class='fare d-block']/span",
        'seatavail_text': "//ul[@class='bus-items']/div/li/div/div/div[1]/div[7]/div[1]",
        'footer_logo': "//img[@class='rb_footer_logo']",
        'error1_viewbus': "//span[text()='Please try again']",
        'error2_viewbus': "//div[contains(text(),'unable to fetch results')]"
    }
    return xpath_dict


def click_element_by_xpath(driver, element_key):
    try:
        xpath_dict = store_xpath()
        xpath = xpath_dict.get(element_key)

        if xpath:
            # Wait for the element to be present
            element = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.XPATH, xpath))
            )
            
            # Scroll to the element to ensure it's visible
            driver.execute_script("arguments[0].scrollIntoView(true);", element)

            # Scroll a little bit further up to ensure no elements are covering it
            driver.execute_script("window.scrollBy(0, -100);")

            # Use JavaScript to click the element
            driver.execute_script("arguments[0].click();", element)

            print(f"Successfully clicked the element with XPath: {xpath}")
        else:
            print(f"No XPath found for the key: {element_key}")
        
    except Exception as e:
        print(f"Error occurred while clicking the element: {e}")

def alert_handle(driver):
    try:
        # Wait for the alert to be present (increase wait time if necessary)
        WebDriverWait(driver, 10).until(EC.alert_is_present(), 'Timed out waiting for alert to appear.')
        
        # Switch to the alert
        alert = driver.switch_to.alert
        
        # Either accept or dismiss the alert based on your requirement
        alert.accept() 
        # alert.dismiss() 
        
        print("Alert accepted/dismissed successfully")
    except TimeoutException:
        print("Alert did not appear within the wait time.")
    except NoAlertPresentException:
        print("No alert present when attempting to switch to it.")
    except Exception as e:
        # Catch any other exceptions and print them out for debugging purposes
        print(f"An error occurred: {e}")

def scroll_to_end(driver, timeout=10, max_scrolls=100):
       
    last_height = driver.execute_script("return document.body.scrollHeight")
    scroll_attempts = 0

    try:
        while scroll_attempts < max_scrolls:
           
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            
            # Wait for new content to load
            WebDriverWait(driver, timeout).until(lambda d: d.execute_script("return document.body.scrollHeight") > last_height)
            
            # Check the new scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")
            
            # If no new content is loaded (height hasn't changed), we are at the end of the page
            if new_height == last_height:
                print("Reached the bottom of the page. No more new content to load.")
                return True  # Successfully scrolled to the end
            
            # Update last_height for the next iteration
            last_height = new_height
            scroll_attempts += 1
            print(f"Scroll attempt {scroll_attempts}: New content loaded.")

        print(f"Max scrolls ({max_scrolls}) reached. Stopping early.")
        return False  # Stopped due to max scroll attempts

    except TimeoutException:
        print(f"Timeout: Content did not load within {timeout} seconds after scroll.")
        return False  # Stopped due to timeout



def retrieve_bus_data(driver, element_key, timeout=10):    
    try:
        
        xpath_dict = store_xpath() 
        xpath = xpath_dict.get(element_key)
        
        # Initialize an empty list 
        texts_list = []
        
        WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((By.XPATH, xpath))
        )

        scroll_to_end(driver)
        
        
        # Find all elements matching the XPath
        elements = driver.find_elements(By.XPATH, xpath)
        
        
        # Iterate over the found elements and retrieve their text
        for i, element in enumerate(elements, 1):
            try:
                text = element.text
                texts_list.append(text)  # Add the retrieved text to the list
                print(f"Text {i}: {text}")  # Optional: Print the retrieved text for debugging
            except StaleElementReferenceException:
                print(f"Element {i} became stale. Skipping this element.")
                continue  

        return texts_list

    except TimeoutException:
        print(f"Timed out waiting for elements matching XPath: {xpath}")
        return [] 
    
    except NoSuchElementException:
        print(f"No such element found for the XPath: {xpath}")
        return []  

    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")
        return [] 


def store_texts_in_mysql(orgin, destination, busname, onboardtime, travel_time, arrival_time, rating, price, seatavail, table_name="bus_data_table"):
    
    # Find the maximum length of the lists
    max_length = max(len(busname), len(onboardtime), len(travel_time), len(arrival_time), len(rating), len(price), len(seatavail))
    
    # Function to pad lists to the maximum length with a default value
    def pad_list(lst, length, default_value=None):
        return lst + [default_value] * (length - len(lst))
    
    # Pad all lists to the maximum length with appropriate default values
    busname = pad_list(busname, max_length, "")
    onboardtime = pad_list(onboardtime, max_length, "")
    travel_time = pad_list(travel_time, max_length, "")
    arrival_time = pad_list(arrival_time, max_length, "")
    rating = pad_list(rating, max_length, "0") 
    price = pad_list(price, max_length, "0")    
    seatavail = pad_list(seatavail, max_length, "0")  
    
    # Repeat orgin and destination values to match the length of other lists
    orgin = [orgin[0]] * max_length
    destination = [destination[0]] * max_length
    
    # Establish a connection to the MySQL database
    conn = mysql.connector.connect(
        host="127.0.0.1",      
        user="root",        
        password="vicky",  
        database="test"   
    )
    cursor = conn.cursor()
    
    cursor.execute(f'''
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INT AUTO_INCREMENT PRIMARY KEY,
            Orgin_Place VARCHAR(255) NOT NULL,
            Destination_Place VARCHAR(255) NOT NULL,
            Bus_Name VARCHAR(255) NOT NULL,
            Onboard_Time VARCHAR(255) NOT NULL,
            Travel_Time VARCHAR(255) NOT NULL,
            Arrival_Time VARCHAR(255) NOT NULL,
            Rating VARCHAR(255) NOT NULL,
            Price VARCHAR(255) NOT NULL,
            Seat_Avail VARCHAR(255) NOT NULL
        )
    ''')

    # Insert values into the table
    for i in range(max_length):
        cursor.execute(f'''
            INSERT INTO {table_name} (Orgin_Place, Destination_Place, Bus_Name, Onboard_Time, Travel_Time, Arrival_Time, Rating, Price, Seat_Avail)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        ''', (orgin[i], destination[i], busname[i], onboardtime[i], travel_time[i], arrival_time[i], rating[i], price[i], seatavail[i]))

    # Commit the changes and close the connection
    conn.commit()
    conn.close()
    print(f"Stored {max_length} records in the '{table_name}' table.")

def first_Data_wrap():
    click_element_by_xpath(driver, 'apsrtc_click')
    click_element_by_xpath(driver, 'route_clcik')
    scroll_to_end(driver, timeout=10, max_scrolls=30)
    orgin=retrieve_bus_data(driver,'orgin_place', timeout=10)
    destination=retrieve_bus_data(driver,'destination_place', timeout=10)
    busname=retrieve_bus_data(driver,'busname_text', timeout=10)   
    onboardtime=retrieve_bus_data(driver,'bustime_text', timeout=10)    
    travel_time=retrieve_bus_data(driver,'duration_text', timeout=10)
    arrival_time=retrieve_bus_data(driver,'offboard_text', timeout=10)
    rating=retrieve_bus_data(driver,'rating_text', timeout=10)
    price=retrieve_bus_data(driver,'price_text', timeout=10)
    seatavail=retrieve_bus_data(driver,'seatavail_text', timeout=10)
    store_texts_in_mysql(orgin,destination,busname, onboardtime, travel_time, arrival_time, rating, price, seatavail, table_name="RedBus_DataScraping_09")

def second_Data_wrap():
    launch_redbus(driver)
    click_element_by_xpath(driver, 'krstc_click')
    click_element_by_xpath(driver, 'route_clcik')
    scroll_to_end(driver, timeout=10, max_scrolls=30)
    orgin=retrieve_bus_data(driver,'orgin_place', timeout=10)
    destination=retrieve_bus_data(driver,'destination_place', timeout=10)
    busname=retrieve_bus_data(driver,'busname_text', timeout=10)   
    onboardtime=retrieve_bus_data(driver,'bustime_text', timeout=10)    
    travel_time=retrieve_bus_data(driver,'duration_text', timeout=10)
    arrival_time=retrieve_bus_data(driver,'offboard_text', timeout=10)
    rating=retrieve_bus_data(driver,'rating_text', timeout=10)
    price=retrieve_bus_data(driver,'price_text', timeout=10)
    seatavail=retrieve_bus_data(driver,'seatavail_text', timeout=10)
    store_texts_in_mysql(orgin,destination,busname, onboardtime, travel_time, arrival_time, rating, price, seatavail, table_name="RedBus_DataScraping_09")

def close_browser(driver):
    time.sleep(5)
    driver.close()

    

if __name__ == "__main__":  
    driver = initialize_driver()
    launch_redbus(driver)
    first_Data_wrap()
    second_Data_wrap()
    time.sleep(10)    
    close_browser(driver)
    
    
    

    
    
   


Successfully clicked the element with XPath: //*[@id='Carousel']/div[2]/div[1]/div[2]
Successfully clicked the element with XPath: //div[@id='root']/div/div[4]/div[2]/div[1]/a
Timeout: Content did not load within 10 seconds after scroll.
Scroll attempt 1: New content loaded.
Scroll attempt 2: New content loaded.
Scroll attempt 3: New content loaded.
Scroll attempt 4: New content loaded.
Scroll attempt 5: New content loaded.
Scroll attempt 6: New content loaded.
Scroll attempt 7: New content loaded.
Scroll attempt 8: New content loaded.
Scroll attempt 9: New content loaded.
Scroll attempt 10: New content loaded.
Scroll attempt 11: New content loaded.
Scroll attempt 12: New content loaded.
Scroll attempt 13: New content loaded.
Scroll attempt 14: New content loaded.
Scroll attempt 15: New content loaded.
Scroll attempt 16: New content loaded.
Scroll attempt 17: New content loaded.
Scroll attempt 18: New content loaded.
Scroll attempt 19: New content loaded.
Scroll attempt 20: New content