In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementClickInterceptedException
import time
import re
import pandas as pd

In [14]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url = "https://www.uber.com/in/en/ride/"
driver.get(url)

In [49]:
location_mapping = {
    # Bangalore Locations
    "Indiranagar": [
        "ITPL, Whitefield", 
        "Electronic City", "HSR Layout", "Banashankari"
    ],
    "ITPL, Whitefield": [
        "4th Block, Jayanagar", "Terminal 1, Kempegowda International Airport", "KSR SBC Bengaluru", 
        "MG Road", "Nexus Mall Koramangala", "Indiranagar", 
        "Electronic City", "HSR Layout", "Banashankari"
    ],
    "Electronic City": [
        "4th Block, Jayanagar", "Terminal 1, Kempegowda International Airport", "KSR SBC Bengaluru", 
        "MG Road", "Nexus Mall Koramangala", "Indiranagar", 
        "ITPL, Whitefield", "HSR Layout", "Banashankari"
    ],
    "HSR Layout": [
        "4th Block, Jayanagar", "Terminal 1, Kempegowda International Airport", "KSR SBC Bengaluru", 
        "MG Road", "Nexus Mall Koramangala", "Indiranagar", 
        "ITPL, Whitefield","Banashankari", "Electronic City"
    ],
    "Banashankari": [
        "4th Block, Jayanagar", "Terminal 1, Kempegowda International Airport", "KSR SBC Bengaluru", 
        "MG Road", "Nexus Mall Koramangala", "Indiranagar", 
        "ITPL, Whitefield", "Electronic City", "HSR Layout"
    ]
}

In [50]:
def check_and_click():
    confirm_button = driver.find_element(By.CSS_SELECTOR, "button[class='_css-hUNeqW']")
    confirm_button.click()
    continue_button = driver.find_element(By.CSS_SELECTOR, "button[class='_css-hUNeqW']")
    continue_button.click()

def wait_and_click(driver, css_selector, timeout=10, retries=3):
    for attempt in range(retries):
        try:
            element = WebDriverWait(driver, timeout).until(EC.element_to_be_clickable((By.CSS_SELECTOR, css_selector)))
            driver.execute_script("arguments[0].scrollIntoView(true);", element)
            time.sleep(2)
            element.click()
            return True
        except ElementClickInterceptedException:
            if attempt == retries - 1:
                raise
            time.sleep(1)
    return False

In [51]:
def clear_and_fill_location(field_selector, location):
    location_field = driver.find_element(By.CSS_SELECTOR, field_selector)
    location_field.send_keys(location)
    time.sleep(2)
    chosen_location = driver.find_element(By.CSS_SELECTOR, "div[data-tracking-name='list-item']")
    chosen_location.click() 
    

def clear_and_fill_next_location(field_selector, location):
    location_field = driver.find_element(By.CSS_SELECTOR, field_selector)
    location_field.send_keys(Keys.CONTROL + "a")  # Clear any existing text
    location_field.send_keys(Keys.DELETE)
    location_field.send_keys(location)
    time.sleep(2)
    chosen_location = driver.find_element(By.CSS_SELECTOR, "div[data-tracking-name='list-item']")
    chosen_location.click() 

In [52]:
def select_start_location(start_location):
    start_ip = driver.find_element(By.CSS_SELECTOR, "button[data-testid='pudo-button-pickup']")
    start_ip.click()
    clear_and_fill_location("input[placeholder='Pickup location']", start_location)

def select_drop_location(drop_location):
    clear_and_fill_location("input[placeholder='Dropoff location']", drop_location)

def select_next_start_location(start_location):
    start_ip = driver.find_element(By.CSS_SELECTOR, "div._css-eenitn")
    start_ip.click()
    clear_and_fill_next_location("input[placeholder='Pickup location']", start_location)

def select_next_drop_location(drop_location):
    clear_and_fill_next_location("input[placeholder='Dropoff location']", drop_location)

In [53]:
def get_prices_and_count(driver):
    try:
        car_blocks = driver.find_elements(By.CSS_SELECTOR, "div._css-kgVjoa")

        if car_blocks:
            prices=[]
            seater_types=[]

            for block in car_blocks:
                seater_type_element = block.find_element(By.CSS_SELECTOR, "p._css-jsRibq")
                seater_type_text = seater_type_element.text.strip()
                seater_type = ''.join(filter(str.isdigit, seater_type_text))

                if seater_type.isdigit() and int(seater_type) >= 3:
                    price_element = block.find_element(By.CSS_SELECTOR, "p._css-jeMYle")
                    price_text = price_element.text.strip()
                    if price_text.startswith("₹") and price_text.replace("₹", "").replace(",", "").replace(".", "").isdigit():
                        # Remove the rupee symbol and any commas, then convert to a float
                        price_number = float(price_text.replace("₹", "").replace(",", ""))
                        prices.append(price_number)
                        seater_types.append(seater_type)
                    else:
                        print(f"Skipping invalid price text: {price_text}")

            # Calculate average, min, and max prices
            total_available_vehicles = len(prices)
            average_price = sum(prices) / len(prices)
            min_price = min(prices)
            max_price = max(prices)
            
            # Find seater types for min and max prices
            min_price_seater_type = seater_types[prices.index(min_price)]
            max_price_seater_type = seater_types[prices.index(max_price)]

            return total_available_vehicles,average_price, min_price, max_price, min_price_seater_type, max_price_seater_type
        else:
            print("Car blocks not found")
            return None,None,None,None,None,None
        
    except Exception as e:
        print(f"Error extracting prices: {e}")
        return None,None,None,None,None,None



In [54]:
def get_wait_times(driver):
    try:
        wait_time_elements = driver.find_elements(By.CSS_SELECTOR, "p[data-testid='product_selector.list_item.eta_string']")

        if wait_time_elements:
            wait_times = []
            unavailable_count = 0
            for element in wait_time_elements:
                wait_time_text = element.text.strip()

                # Extract the wait time in minutes using regular expression
                match = re.search(r'(\d+)\s*min', wait_time_text)  # Capture only the number before 'min'
                if match:
                    wait_time_min = int(match.group(1))  # Extract and convert to an integer
                elif "Unavailable" in wait_time_text:
                    wait_time_min = 15  # Set to 15 for "Unavailable"
                    unavailable_count += 1
                elif "Longer wait" in wait_time_text:
                    wait_time_min = 15  # Set to 15 for "Longer wait"
                else:
                    continue  # Skip if the wait time is in an unexpected format
                
                wait_times.append(wait_time_min)
            
            # Calculate the average wait time
            if wait_times:
                average_wait_time = sum(wait_times) / len(wait_times)
                return average_wait_time, unavailable_count
            else:
                print("No valid wait times found.")
                return None  # Return None if no valid wait times were found
        else:
            print("Wait Time elements not found.")
            return None  # Return None if no wait time elements were found
    except Exception as e:
        print(f"Error extracting wait times: {e}")
        return None  # Return None if there's an error


In [55]:
def get_locations(driver):
    try:
        # Extract the 'From' location
        from_location_element = driver.find_element(By.CSS_SELECTOR, "div._css-dAHUka:nth-child(2) div._css-gLtoBF")
        from_location = from_location_element.text.replace("From ", "").strip()

        # Extract the 'To' location
        to_location_element = driver.find_element(By.CSS_SELECTOR, "div._css-dAHUka:nth-child(1) div._css-gLtoBF")
        to_location = to_location_element.text.replace("To ", "").strip()

        return from_location, to_location  # Return as a tuple

    except Exception as e:
        print(f"Error extracting locations: {e}")
        return None, None  # Return None if there is an error


In [56]:
df = pd.DataFrame(columns=['Total Vehicles', 'Start Point', 'End Point', 'Unavailable Count', 'Average Wait Time', 'Average Price', 'Minimum Price', 'Seater with Minimum Price', 'Maximum Price', 'Seater with Maximum Price'])

def create_dataframe(driver):
    global df  # Reference the global df variable

    # Call the functions to get the necessary data
    from_location, to_location = get_locations(driver)  # Extract locations
    average_wait_time, unavailable_count = get_wait_times(driver)  # Extract wait times
    total_available_vehicles,average_price, min_price, max_price, min_price_seater_type, max_price_seater_type = get_prices_and_count(driver)  # Extract prices
    
    # Check if valid data is retrieved
    if from_location and to_location and average_wait_time is not None and average_price is not None:
        # Create the DataFrame with retrieved values
        data = {
            'Total Vehicles': [total_available_vehicles],
            'Start Point': [from_location],
            'End Point': [to_location],
            'Unavailable Count': [unavailable_count],
            'Average Wait Time': [average_wait_time],
            'Average Price': [average_price],
            'Minimum Price': [min_price],
            'Seater with Minimum Price': [min_price_seater_type],
            'Maximum Price': [max_price],
            'Seater with Maximum Price': [max_price_seater_type]
        }

        new_df = pd.DataFrame(data)

        # Append new data to the global DataFrame
        df = pd.concat([df, new_df], ignore_index=True)
    else:
        print("Some data could not be extracted.")

In [57]:
def automate_locations(driver, location_mapping):

    first_start = next(iter(location_mapping))
    select_start_location(first_start)
    time.sleep(3)
    if driver.find_elements(By.CSS_SELECTOR, "button[class='_css-hUNeqW']"):
                check_and_click()

    for end_location in location_mapping[first_start]:
        if end_location == location_mapping[first_start][0]:
            select_drop_location(end_location)
            time.sleep(5)
            if driver.find_elements(By.CSS_SELECTOR, "button[class='_css-hUNeqW']"):
                check_and_click()
        else:
            try:
                 wait_and_click(driver, "div._css-eInHRS")
            except:
                 drop_next_ip = driver.find_elements(By.CSS_SELECTOR, "div._css-eInHRS")
                 drop_next_ip.click()
            select_next_drop_location(end_location)
            time.sleep(5)
            if driver.find_elements(By.CSS_SELECTOR, "button[class='_css-hUNeqW']"):
                check_and_click()
        time.sleep(2)
        create_dataframe(driver)
        time.sleep(2)

    remaining_starts = list(location_mapping.keys())[1:]
    for start_location in remaining_starts:
        select_next_start_location(start_location)
        time.sleep(3)
        if driver.find_elements(By.CSS_SELECTOR, "button[class='_css-hUNeqW']"):
                check_and_click()

        for end_location in location_mapping[start_location]:
            try:
                 wait_and_click(driver, "div._css-eInHRS")
            except:
                 drop_next_ip = driver.find_elements(By.CSS_SELECTOR, "div._css-eInHRS")
                 drop_next_ip.click()
            select_next_drop_location(end_location)
            time.sleep(5)
            if driver.find_elements(By.CSS_SELECTOR, "button[class='_css-hUNeqW']"):
                check_and_click()
            time.sleep(2)
            create_dataframe(driver)
            time.sleep(2)

In [None]:
automate_locations(driver,location_mapping) # Run automation script
df 

In [None]:
df.to_csv(r"D:\Data Scraping\Metro Cities\Bangalore\Blr.csv",index=False) # Save collected df as csv