# Redbus details

In [1]:
state_links=["https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile",
             "https://www.redbus.in/online-booking/wbtc-ctc/?utm_source=rtchometile"]

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time 
import pandas as pd

In [3]:
# Setup the reusable code
def setup(url):
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)
    time.sleep(5)
    return driver


def teardown(driver):
    driver.quit()
    

In [5]:
# scrape the route and links for redbus
# def scrape_redbus_routes(driver):
#     route_links = []
#     route_name = []
#     route_element = driver.find_elements(by=By.XPATH, value="//a[@class='route']")  
#     for route in route_element:
#         if  route.is_enabled():
#             route_links.append(route.get_attribute('href'))
#             route_name.append(route.text.strip())
#     return (route_name, route_links)

In [19]:
# scrape redbus details 
def scrape_redbus_details(driver, url, name):
    try:
        driver.get(url)
        time.sleep(5)  
        
        # Click the "View Buses" button if it exists
        try:
            view_buses_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            print(name)
            driver.execute_script("arguments[0].click();", view_buses_button)
            time.sleep(5)  # Wait for buses to load
            
            # Scroll down to load all bus items
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(5)  # Wait for the page to load more content

            # Find bus item details
            bus_name_elements = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
            bus_type_elements = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
            departing_time_elements = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
            duration_elements = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
            reaching_time_elements = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
            star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
            price_elements = driver.find_elements(By.CLASS_NAME, "fare.d-block")
            

            # Use XPath to handle both seat availability classes
            seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left m-top-30') or contains(@class, 'seat-left m-top-16')]")

            bus_details = []
            for i in range(len(bus_name_elements)):
                bus_detail = {
                    "Route_Name": name,
                    "Route_Link": url,
                    "Bus_Name": bus_name_elements[i].text,
                    "Bus_Type": bus_type_elements[i].text,
                    "Departing_Time": departing_time_elements[i].text,
                    "Duration": duration_elements[i].text,
                    "Reaching_Time": reaching_time_elements[i].text,
                    "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                    "Price": price_elements[i].text,
                    "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0'
                }
                bus_details.append(bus_detail)
            return bus_details
        
        except Exception as e:
            print(f"Error occurred while scraping bus details for {url}: {str(e)}")
            return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

        
        
    

In [20]:
# scrape the bus details 

def scrape_redbus(URL):
    # initialize 
    all_redbus_details = []
    
    driver = setup(URL)  
    
    # initialize webdriverwait object
    wait_instance = WebDriverWait(driver, 10)
    
    # pagination_element = driver.find_element(by=By.XPATH, value="//div[@class='DC_117_paginationTable']")
    pagination_element = wait_instance.until(EC.presence_of_element_located((By.XPATH, "//div[@class='DC_117_paginationTable']")))
    total_pages = len(pagination_element.text.split("\n"))
    print(total_pages)
    
    route_names = []
    route_links = []
    # loop through the pagination
    for page in range(1, total_pages+1):
        # navigate to the page
                
        # names, links = scrape_redbus_routes(driver) # load the 1st pagination first
        
        route_element = driver.find_elements(by=By.XPATH, value="//a[@class='route']")  
        for route in route_element:
            if  route.is_enabled():
                route_links.append(route.get_attribute('href'))
                route_names.append(route.text.strip())
       
        if page < total_pages:
            try:
                
                page = str(page+1)
                print(f"Moving to page {page}")
                
                next_page_data = wait_instance.until(EC.presence_of_element_located((By.XPATH, f"//div[@class='DC_117_paginationTable']//*[text()='{page}']")))
            
                if next_page_data.is_enabled():
                    driver.execute_script("arguments[0].click();", next_page_data)
                    time.sleep(3)
                
                             
            except Exception as e:
                print(f"Error occurred while navigating to page {page}: {str(e)}")
                # print(traceback.format_exc())
                # driver.quit()
                break
            
    # print(route_links)
    # print(route_names)
    # get all redbus details 
    for name, link in zip(route_names, route_links):
            redbus_details = scrape_redbus_details(driver, link, name)
            if redbus_details:
                all_redbus_details.extend(redbus_details)    
                
                
    return all_redbus_details    
    
                

In [None]:
result = scrape_redbus(state_links[0]) # Kerala bus details

## Kerala Bus details

In [None]:
data = result
k_data = pd.DataFrame(data)
# data formatting
# k_data
k_data.to_csv('./redbus_details/kerala_bus_details.csv',index=False)

## Andhra pradesh bus details

In [89]:
result = scrape_redbus(state_links[1]) # andhra pradesh 

5
Moving to page 2
Moving to page 3
Moving to page 4
Moving to page 5
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/madanapalli-to-bangalore: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF71A23FDA5+29557]
	(No symbol) [0x00007FF71A1B2240]
	(No symbol) [0x00007FF71A06B6EA]
	(No symbol) [0x00007FF71A0BFA15]
	(No symbol) [0x00007FF71A0BFC6C]
	(No symbol) [0x00007FF71A10BB07]
	(No symbol) [0x00007FF71A0E753F]
	(No symbol) [0x00007FF71A1088A3]
	(No symbol) [0x00007FF71A0E72A3]
	(No symbol) [0x00007FF71A0B12DF]
	(No symbol) [0x00007FF71A0B2451]
	GetHandleVerifier [0x00007FF71A56DCBD+3363469]
	GetHandleVerifier [0x00007FF71A5B9B47+3674391]
	GetHandleVerifier [0x00007FF71A5AEAEB+3629243]
	GetHandleVerifier [0x00007FF71A2FFC66+815670]
	(No symbol) [0x00007FF71A1BD6EF]
	(No symbol) [0x00007FF71A1B92B4]
	(No symbol) [0x00007FF71A1B9450]
	(No symbol) [0x00007FF71A1A81FF]
	BaseThreadInitThunk [0x00007FFEE1467374+20]
	RtlUserThreadStart [0x00007FFEE291CC91+33]


In [98]:
ap_data = pd.DataFrame(result)
ap_data.to_csv('./redbus_details/ap_bus_details.csv',index=False)

## Telangana bus details

In [13]:
result = scrape_redbus(state_links[2]) # telangana  

3
Moving to page 2
Moving to page 3
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW SEATS
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW SEATS
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW SEATS
VIEW BUSES
VIEW BUSES
VIEW BUSES
VIEW BUSES


In [18]:
telangana_data = pd.DataFrame(result)
telangana_data.to_csv('./redbus_details/telangana_bus_details.csv',index=False)

## Kadamba bus details

In [21]:
result = scrape_redbus(state_links[3]) # Kadamba

4
Moving to page 2
Moving to page 3
Moving to page 4
Pune to Goa
Goa to Pune
Mumbai to Goa
Bangalore to Goa
Goa to Bangalore
Goa to Mumbai
Pandharpur to Goa
Goa to Pandharpur
Solapur to Goa
Calangute (goa) to Goa Airport
Goa to Kolhapur(Maharashtra)
Goa to Solapur
Goa to Sangli
Goa Airport to Calangute (goa)
Goa to Sangola (Solapur)
Sangola (Solapur) to Goa
Mopa Airport to Calangute (goa)
Calangute (goa) to Mopa Airport
Goa to Mopa Airport
Goa to Miraj
Panaji to Goa Airport
Mopa Airport to Goa
Shivamogga to Goa
Margao to Mopa Airport
Goa Airport to Goa
Goa to Satara
Goa to Goa Airport
Goa Airport to Panaji
Goa to Shirdi
Calangute (goa) to Panaji
Mopa Airport to Margao
Goa to Shivamogga
Calangute (goa) to Goa
Belagavi to Goa


In [25]:
kadamba_data = pd.DataFrame(result)
kadamba_data.to_csv('./redbus_details/kadamba_bus_details.csv',index=False)

## Rajasthan bus details

In [26]:
result = scrape_redbus(state_links[4]) # rajasthan 

2
Moving to page 2
Udaipur to Jodhpur
Jodhpur to Ajmer
Beawar (Rajasthan) to Jaipur (Rajasthan)
Sikar to Jaipur (Rajasthan)
Jaipur (Rajasthan) to Jodhpur
Aligarh (uttar pradesh) to Jaipur (Rajasthan)
Jaipur (Rajasthan) to Aligarh (uttar pradesh)
Jodhpur to Beawar (Rajasthan)
Jaipur (Rajasthan) to Pilani
Kishangarh to Jaipur (Rajasthan)
Pali (Rajasthan) to Udaipur
Udaipur to Pali (Rajasthan)
Kota(Rajasthan) to Udaipur
Jaipur (Rajasthan) to Bhilwara
Sikar to Bikaner
Jaipur (Rajasthan) to Bharatpur
Jaipur (Rajasthan) to Mathura
Jaipur (Rajasthan) to Kota(Rajasthan)


In [28]:
rajasthan_data = pd.DataFrame(result)
rajasthan_data.to_csv('./redbus_details/rajasthan_bus_details.csv',index=False)

## South bengal bus details 

In [29]:
result = scrape_redbus(state_links[5]) # south bengal

5
Moving to page 2
Moving to page 3
Moving to page 4
Moving to page 5
Durgapur to Calcutta
Kolkata to Burdwan
Haldia to Calcutta
Kolkata to Haldia
Kolkata to Durgapur (West Bengal)
Kolkata to Arambagh (West Bengal)
Midnapore to Kolkata
Kolkata to Digha
Digha to Calcutta
Kolkata to Bankura
Kolkata to Midnapore
Kolkata to Asansol (West Bengal)
Kolkata to Nimtouri
Jhargram to Kolkata
Kolkata to Contai (Kanthi)
Kolkata to Kolaghat
Kolkata to Nandakumar (west bengal)
Kolkata to Mecheda (West Bengal)
Digha to Durgapur (West Bengal)
Midnapore to Barasat (West Bengal)
Durgapur (West Bengal) to Digha
Kolkata to Chandipur (West Bengal)
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-midnapore: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7773BFDA5+29557]
	(No symbol) [0x00007FF777332240]
	(No symbol) [0x00007FF7771EB6EA]
	(No symbol) [0x00007FF77723FA15]
	(No symbol) [0x00007FF77723FC6C]
	(No symbol) [0x00007FF77728BB07]
	(No symbol) [

In [32]:
bengal_data = pd.DataFrame(result)
bengal_data.to_csv('./redbus_details/south_bengal_bus_details.csv',index=False)

## himachal bus details

In [33]:
result = scrape_redbus(state_links[6]) # himachal

4
Moving to page 2
Moving to page 3
Moving to page 4
Delhi to Shimla
Chandigarh to Hamirpur (Himachal Pradesh)
Hamirpur (Himachal Pradesh) to Chandigarh
Shimla to Delhi
Delhi to Chandigarh
Hamirpur (Himachal Pradesh) to Delhi
Chamba (Himachal Pradesh) to Chandigarh
Delhi to Hamirpur (Himachal Pradesh)
Chandigarh to Dharamshala (Himachal Pradesh)
Delhi to Chamba (Himachal Pradesh)
Chamba (Himachal Pradesh) to Delhi
Kangra to Chandigarh
Shimla to Chandigarh
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/delhi-to-baddi-himachal-pradesh: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7773BFDA5+29557]
	(No symbol) [0x00007FF777332240]
	(No symbol) [0x00007FF7771EB6EA]
	(No symbol) [0x00007FF77723FA15]
	(No symbol) [0x00007FF77723FC6C]
	(No symbol) [0x00007FF77728BB07]
	(No symbol) [0x00007FF77726753F]
	(No symbol) [0x00007FF7772888A3]
	(No symbol) [0x00007FF7772672A3]
	(No symbol) [0x00007FF7772312DF]
	(No symbol) [0x00007FF777232451]
	GetHandleVerifier 

In [35]:
himachal_data = pd.DataFrame(result)
himachal_data.to_csv('./redbus_details/himachal_bus_details.csv',index=False)

##  Assam bus details

In [36]:
result = scrape_redbus(state_links[7])  # assam

5
Moving to page 2
Moving to page 3
Moving to page 4
Moving to page 5
Tezpur to Guwahati
Guwahati to Tezpur
Guwahati to Nagaon (Assam)
Nagaon (Assam) to Guwahati
Goalpara to Guwahati
Jorhat to North Lakhimpur
Dhubri to Guwahati
Jorhat to Dibrugarh
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/north-lakhimpur-to-jorhat: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7773BFDA5+29557]
	(No symbol) [0x00007FF777332240]
	(No symbol) [0x00007FF7771EB6EA]
	(No symbol) [0x00007FF77723FA15]
	(No symbol) [0x00007FF77723FC6C]
	(No symbol) [0x00007FF77728BB07]
	(No symbol) [0x00007FF77726753F]
	(No symbol) [0x00007FF7772888A3]
	(No symbol) [0x00007FF7772672A3]
	(No symbol) [0x00007FF7772312DF]
	(No symbol) [0x00007FF777232451]
	GetHandleVerifier [0x00007FF7776EDCBD+3363469]
	GetHandleVerifier [0x00007FF777739B47+3674391]
	GetHandleVerifier [0x00007FF77772EAEB+3629243]
	GetHandleVerifier [0x00007FF77747FC66+815670]
	(No symbol) [0x00007FF77733D6EF]
	(No symbol)

In [40]:
assam_data = pd.DataFrame(result)
assam_data.to_csv('./redbus_details/assam_bus_details.csv',index=False)

## uttar pradesh  bus details

In [41]:
result = scrape_redbus(state_links[8]) # uttar pradesh

5
Moving to page 2
Moving to page 3
Moving to page 4
Moving to page 5
Delhi to Bareilly
Bareilly to Delhi
Aligarh (uttar pradesh) to Delhi
Delhi to Aligarh (uttar pradesh)
Lucknow to Allahabad
Lucknow to Delhi
Delhi to Farrukhabad (Uttar Pradesh)
Farrukhabad (Uttar Pradesh) to Delhi
Badaun to Delhi
Allahabad to Lucknow
Lucknow to Agra
Sitapur (Uttar Pradesh) to Delhi
Delhi to Badaun
Delhi to Sitapur (Uttar Pradesh)
Agra to Delhi
Delhi to Moradabad
Delhi to Lucknow
Agra to Lucknow
Lucknow to Varanasi
Agra to Bareilly
Lucknow to Bareilly
Bareilly to Agra
Delhi to Agra
Varanasi to Lucknow
Moradabad to Delhi
Kanpur (Uttar Pradesh) to Jhansi
Delhi to Shahjahanpur (Uttar Pradesh)
Shahjahanpur (Uttar Pradesh) to Delhi
Gorakhpur (uttar pradesh) to Lucknow
Kanpur (Uttar Pradesh) to Bareilly
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/lucknow-to-aligarh: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7773BFDA5+29557]
	(No symbol) [0x00007FF777332240]
	(No 

In [44]:
uttar_data = pd.DataFrame(result)
uttar_data.to_csv('./redbus_details/uttar_bus_details.csv',index=False)

## West Bengal bus details

In [45]:
resutl = scrape_redbus(state_links[9]) # West Bengal

4
Moving to page 2
Moving to page 3
Moving to page 4
Digha to Barasat (West Bengal)
Durgapur to Calcutta
Digha to Calcutta
Kolkata to Digha
Barasat (West Bengal) to Digha
Kolkata to Suri
Error occurred while scraping bus details for https://www.redbus.in/bus-tickets/barasat-west-bengal-to-midnapore: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7773BFDA5+29557]
	(No symbol) [0x00007FF777332240]
	(No symbol) [0x00007FF7771EB6EA]
	(No symbol) [0x00007FF77723FA15]
	(No symbol) [0x00007FF77723FC6C]
	(No symbol) [0x00007FF77728BB07]
	(No symbol) [0x00007FF77726753F]
	(No symbol) [0x00007FF7772888A3]
	(No symbol) [0x00007FF7772672A3]
	(No symbol) [0x00007FF7772312DF]
	(No symbol) [0x00007FF777232451]
	GetHandleVerifier [0x00007FF7776EDCBD+3363469]
	GetHandleVerifier [0x00007FF777739B47+3674391]
	GetHandleVerifier [0x00007FF77772EAEB+3629243]
	GetHandleVerifier [0x00007FF77747FC66+815670]
	(No symbol) [0x00007FF77733D6EF]
	(No symbol) [0x00007FF7773392B4]
	(No symbol) [0x00007FF777339450

In [47]:
west_b = pd.DataFrame(result)
west_b.to_csv('./redbus_details/wst_bengal_bus_details.csv',index=False)