In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time


def scrape_homes(county_to_search):
    data_set = {}
    chrome_options = Options()

       # chrome_options.add_argument("--headless")  # Run headless
    chrome_options.add_argument(
        "--disable-blink-features=AutomationControlled")  # Prevent bot detection
    chrome_options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36")
    
    driver = webdriver.Chrome(service=Service(), options=chrome_options)

    # Go to the website
    driver.get("https://www.homes.com/")  # Replace with the actual URL

    # Wait for the body tag to load to ensure the page is ready
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

    # Locate the search box using the class name
    search_box = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable(
            (By.CLASS_NAME, "multiselect-search"))  # Adjust if needed
    )

    # Ensure the input is in view and interactable
    driver.execute_script("arguments[0].scrollIntoView();", search_box)

    # Send search query
    search_box.send_keys(county_to_search)
    search_box.send_keys(Keys.RETURN)

    # Wait for the initial results container to load
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "placardContainer"))
    )

    # Now capture the page source after the search has resulted in new content
    page_source = driver.page_source

    # Parse the page with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Extract the information from the placard-container
    property_list = []

    # Loop through each property placard on the page
    placards = soup.find_all('li', class_='placard-container')
    for placard in placards:
        # Initialize a dictionary to store the property details
        property_details = {}

        # Get the price
        price = placard.find('p', class_='price-container').text.strip()
        property_details['price'] = price

        # Extract property features
        details = placard.find('ul', class_='detailed-info-container')
        if details:
            features = details.find_all('li')
            for feature in features:
                text = feature.text.strip()
                if "Beds" in text:
                    property_details['beds'] = text
                elif "Baths" in text:
                    property_details['baths'] = text
                elif "Sq Ft" in text:
                    property_details['sq_ft'] = text

        # Extract the property address
        address = placard.find('p', class_='property-name').text.strip()
        property_details['address'] = address

        # Extract the description of the property
        description = placard.find(
            'p', class_='property-description').text.strip()
        property_details['description'] = description

        agent = placard.find('div', class_='agent-info-container')
        if agent:
            # Extract agent information
            agent_name = placard.find('p', class_='agent-name').text.strip()
            agency_name = placard.find('p', class_='agency-name').text.strip()
            agent_number = placard.find(
                'p', class_='agency-number').text.strip()

            property_details['Agent Name'] = agent_name
            property_details['Agency Number'] = agency_name
            property_details['Agent Number'] = agent_number

            # Store the property details in the list
            property_list.append(property_details)

    # Output the scraped information
    index = 0
    for property in property_list:
        data_set["property"+str(index)] = property
        index += 1
            
    driver.quit()
    return data_set


# Main function to run the scraper
if __name__ == "__main__":
    county = input("Enter the county to search for: ")
    print(scrape_homes(" " + county))
    

Enter the county to search for:  Boulder County


{'property0': {'price': '$550,000', 'beds': '2 Beds', 'baths': '1.5 Baths', 'sq_ft': '1,056 Sq Ft', 'address': '1170 Monroe Dr Unit C, Boulder, CO 80303', 'description': 'Charming Townhome - Offers a perfect blend of convenience and nature, with shopping, dining, and a creek with bike path steps away - Enjoy nearby green spaces and breathtaking Flatirons views - Newer appliances - Open bright floor plan for relaxing/entertaining - Well managed amenities, featuring a garden, outdoor pool, and stunning mountain views - Offers easy access to public transportation -', 'Agent Name': 'David Scott', 'Agency Number': 'Slifer Smith & Frampton-Bldr', 'Agent Number': '(720) 664-6265'}, 'property1': {'price': '$110,000', 'beds': '3 Beds', 'baths': '2 Baths', 'sq_ft': '1,078 Sq Ft', 'address': '3003 Valmont Rd Unit 84, Boulder, CO 80301', 'description': 'Welcome to this well loved 3 bed 2 bath home in Boulder! Enter into this cozy open living area ready to make your own and a built in perfect for y

In [None]:
scrape_property_info_redfin("Boulder")