In [45]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import pandas as pd

driver = webdriver.Chrome()

# Navigate to the website
driver.get("https://registrantsearch.reco.on.ca/")

# Wait for the page to load and elements to be clickable
wait = WebDriverWait(driver, 10)

# Click the "Brokerage Name" button
brokerage_name_button = wait.until(EC.element_to_be_clickable((By.ID, "btnBrokerage")))
brokerage_name_button.click()

# Ensure "Unregistered/Formerly Registered" checkbox is unchecked
unregistered_checkbox = driver.find_element(By.ID, "IsTerminated")
if unregistered_checkbox.is_selected():
    unregistered_checkbox.click()

# Enter 'inc' in the "Brokerage Name" text field
brokerage_name_field = wait.until(EC.presence_of_element_located((By.ID, "b-name")))
brokerage_name_field.clear()
brokerage_name_field.send_keys("inc")

# Click the "Search" button
search_button = driver.find_element(By.ID, "searchBrokerage")
search_button.click()

# Initialize a list to store data
data = []

while True:
    # Wait for the results to load and be present
    cards = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "card.mt-2")))

    # Loop through each card to extract details
    for card in cards:
        header = card.find_element(By.CLASS_NAME, "card-header")
        
        # Scroll the card into view
        driver.execute_script("arguments[0].scrollIntoView(true);", header)
        sleep(1)  # Small delay to ensure the element is fully scrolled into view
        
        # Click to expand the card
        header.click()
        sleep(2)  # Adding a sleep to ensure the card content is fully loaded
        
        # Extract the information
        legal_name = card.find_element(By.XPATH, ".//h4[contains(@class, 'mb-0')]").text.strip()
        try:
            # Attempt to find the Broker of Record
            broker_of_record_element = card.find_element(By.XPATH, ".//p[contains(., 'Broker of Record :')]")
            broker_of_record = broker_of_record_element.text.split(":")[1].strip()
        except (NoSuchElementException, IndexError):
            try:
                # If Broker of Record is not found, attempt to find the Branch Manager
                branch_manager_element = card.find_element(By.XPATH, ".//p[contains(., 'Branch Manager :')]")
                broker_of_record = branch_manager_element.text.split(":")[1].strip()
            except (NoSuchElementException, IndexError):
                broker_of_record = "N/A"

        brokerage_email = card.find_element(By.XPATH, ".//p[contains(., 'Brokerage Email:')]").text.split(":")[1].strip()
        brokerage_phone = card.find_element(By.XPATH, ".//p[contains(., 'Brokerage Phone :')]").text.split(":")[1].strip()

        # Append data to list
        data.append({
            "Legal Name": legal_name,
            "Broker of Record/Branch Manager": broker_of_record,
            "Brokerage Email": brokerage_email,
            "Brokerage Phone": brokerage_phone
        })

    # Check if there's a "Next" button and click it to go to the next page
    try:
        next_button = driver.find_element(By.CLASS_NAME, "next_link")
        next_button.click()
        sleep(2)  # Wait for the next page to load
    except NoSuchElementException:
        print("No more pages left.")
        break  # Exit the loop if there are no more pages

# Convert the list to a DataFrame
df = pd.DataFrame(data)

# Export DataFrame to a CSV file
df.to_csv("reco_brokerage.csv", index=False)

# Close the browser
driver.quit()


ElementNotInteractableException: Message: element not interactable
  (Session info: chrome=128.0.6613.85)
Stacktrace:
	GetHandleVerifier [0x00007FF7B228B632+29090]
	(No symbol) [0x00007FF7B21FE6E9]
	(No symbol) [0x00007FF7B20BAFF9]
	(No symbol) [0x00007FF7B2110432]
	(No symbol) [0x00007FF7B2102D61]
	(No symbol) [0x00007FF7B21366EA]
	(No symbol) [0x00007FF7B21026A6]
	(No symbol) [0x00007FF7B2136900]
	(No symbol) [0x00007FF7B21565D9]
	(No symbol) [0x00007FF7B2136493]
	(No symbol) [0x00007FF7B21009B1]
	(No symbol) [0x00007FF7B2101B11]
	GetHandleVerifier [0x00007FF7B25A881D+3294093]
	GetHandleVerifier [0x00007FF7B25F4403+3604339]
	GetHandleVerifier [0x00007FF7B25EA2C7+3563063]
	GetHandleVerifier [0x00007FF7B2346F16+797318]
	(No symbol) [0x00007FF7B220986F]
	(No symbol) [0x00007FF7B2205454]
	(No symbol) [0x00007FF7B22055E0]
	(No symbol) [0x00007FF7B21F4A7F]
	BaseThreadInitThunk [0x00007FF929DD257D+29]
	RtlUserThreadStart [0x00007FF92B7CAF28+40]


In [57]:
from bs4 import BeautifulSoup
import pandas as pd

# Read the HTML content from the text file
with open('C:/Users/nemes/python juypter/reco_brokerage.txt', 'r', encoding='utf-8') as file:
    html_content = file.read()

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Initialize a list to store data
data = []

# Find all the cards
cards = soup.find_all('div', class_='card mt-2')

total_cards = len(cards)
print(f"Total number of cards found: {total_cards}")

# Loop through each card to extract details
for index, card in enumerate(cards, start=1):
    print(f"Processing card {index} of {total_cards}...")

    # Extract the legal name from the h4 element
    legal_name = card.find('h4', class_='mb-0').get_text(strip=True)
    print(f"Legal Name: {legal_name}")

    # Initialize empty fields
    broker_of_record = "N/A"
    brokerage_email = "N/A"
    brokerage_phone = "N/A"

    # Extract the Broker of Record or Branch Manager
    broker_record_element = card.find('strong', text='Broker of Record :')
    if broker_record_element:
        broker_of_record = broker_record_element.next_sibling.strip()
    else:
        branch_manager_element = card.find('strong', text='Branch Manager :')
        if branch_manager_element:
            broker_of_record = branch_manager_element.next_sibling.strip()

    print(f"Broker of Record/Branch Manager: {broker_of_record}")

    # Extract the Brokerage Email
    brokerage_email_element = card.find('strong', text='Brokerage Email:')
    if brokerage_email_element:
        brokerage_email = brokerage_email_element.next_sibling.strip()

    # Extract the Brokerage Phone
    brokerage_phone_element = card.find('strong', text='Brokerage Phone :')
    if brokerage_phone_element:
        brokerage_phone = brokerage_phone_element.next_sibling.strip()

    print(f"Brokerage Email: {brokerage_email}")
    print(f"Brokerage Phone: {brokerage_phone}")

    # Append data to list
    data.append({
        "Legal Name": legal_name,
        "Broker of Record/Branch Manager": broker_of_record,
        "Brokerage Email": brokerage_email,
        "Brokerage Phone": brokerage_phone
    })

    print(f"Card {index} processed successfully.")

# Convert the list to a DataFrame
df = pd.DataFrame(data)

# Export DataFrame to a CSV file
df.to_csv("reco_brokerage.csv", index=False)

print("Data extraction complete and saved to reco_brokerage.csv")


Total number of cards found: 4121
Processing card 1 of 4121...
Legal Name: 100 ACRES REALTY INC.
Broker of Record/Branch Manager: Rajwinder KAUR Dhillon
Brokerage Email: info@100acres.ca
Brokerage Phone: 905-929-3000
Card 1 processed successfully.
Processing card 2 of 4121...
Legal Name: RE/MAX DASH REALTY
Broker of Record/Branch Manager: Dror DEAN Aronovici
Brokerage Email: sharon@thedashgroup.ca
Brokerage Phone: 
Card 2 processed successfully.
Processing card 3 of 4121...
Legal Name: SAVE MAX PIONEER REALTY
Broker of Record/Branch Manager: Ashutosh KUMAR Pandey
Brokerage Email: info@savemaxpioneer.com
Brokerage Phone: 905-909-3333
Card 3 processed successfully.
Processing card 4 of 4121...
Legal Name: SAVE MAX ACE REAL ESTATE
Broker of Record/Branch Manager: Abhishek Grover
Brokerage Email: admin@savemax.com
Brokerage Phone: 905-819-5393
Card 4 processed successfully.
Processing card 5 of 4121...
Legal Name: HOMELIFE PRINCIPLE REAL ESTATE
Broker of Record/Branch Manager: Luigi D Tass

  broker_record_element = card.find('strong', text='Broker of Record :')
  brokerage_email_element = card.find('strong', text='Brokerage Email:')
  brokerage_phone_element = card.find('strong', text='Brokerage Phone :')
  branch_manager_element = card.find('strong', text='Branch Manager :')


Broker of Record/Branch Manager: Bruno Zaffino
Brokerage Email: brunozaffino1@gmail.com
Brokerage Phone: 905-897-3388
Card 732 processed successfully.
Processing card 733 of 4121...
Legal Name: CUSHMAN & WAKEFIELD OTTAWA
Broker of Record/Branch Manager: Alain RHEAULT Desmarais
Brokerage Email: adesmarais@cwottawa.com
Brokerage Phone: 613-780-1566
Card 733 processed successfully.
Processing card 734 of 4121...
Legal Name: C. ASHLEY REALTY INC.
Broker of Record/Branch Manager: Andjelka Granic
Brokerage Email: agranic@trebnet.com
Brokerage Phone: 905-897-8777
Card 734 processed successfully.
Processing card 735 of 4121...
Legal Name: CAINE REAL ESTATE SERVICES INC
Broker of Record/Branch Manager: Leslie J Caine
Brokerage Email: lesliecaine@rogers.com
Brokerage Phone: 905-451-7111
Card 735 processed successfully.
Processing card 736 of 4121...
Legal Name: CAKE REALTY
Broker of Record/Branch Manager: Rose Marie Callaghan
Brokerage Email: callaghanworks@gmail.com
Brokerage Phone: 905-939-932