In [1]:
!pip install selenium
!pip install beautifulsoup4
!pip install pandas
!pip install matplotlib



In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# Initialize the Chrome WebDriver
driver = webdriver.Chrome()
driver.get("https://apps.douglas.co.us/assessor/web#/")
data_set = {}

# Wait for the search box to be visible and locate it
search_box = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, 'app-input-debounce input[type="text"]'))
)

# Send search query
search_box.send_keys("2719 Castle Glen Dr")
search_box.send_keys(Keys.RETURN)

# Wait for the results to load (you may need to adjust this depending on the page)
driver.implicitly_wait(5)

# Wait until the table rows are loaded (anchor tags with 'table-row' class)
WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'a.table-row'))
)

# Locate the first row (anchor tag with class 'table-row') and click it
first_row = driver.find_element(By.CSS_SELECTOR, 'a.table-row')
first_row.click()

# Get page source to parse the HTML after clicking the first row
page_source = driver.page_source

# BeautifulSoup to parse the HTML for further scraping
soup = BeautifulSoup(page_source, 'html.parser')

In [5]:
# Wait for the pop-up dialog to appear and then click the "Close" button
wait = WebDriverWait(driver, 10)

# Wait for the "Close" button to be clickable
close_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button/span[text()='Close']")))

# Click the "Close" button to dismiss the pop-up
close_button.click()

# Wait for the account summary section to be loaded
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='dropdown-content']")))

# Extract HTML content
html_content = driver.page_source

# Use BeautifulSoup to parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

In [7]:
# Extract Toggle Button and Links before Account Summary
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')

# Initialize a dictionary to store toggle button and links
key_value_pairs = {}

# Extract the toggle button text (key) and status (value)
toggle_button = soup.find('span', class_='ui-button-text')
if toggle_button:
    key_value_pairs["Toggle Button"] = toggle_button.text.strip()

# Extract the anchor tags (links) and their href attributes
links = soup.find_all('a', href=True)
for link in links:
    link_text = link.get_text(strip=True)
    link_url = link['href']
    key_value_pairs[link_text] = f'<a href="{link_url}">{link_text}</a>'

# Now proceed with the Account Summary logic
# Target the dropdown for Account Summary by using its ID
dropdown_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[@id='SummaryAccountInfo']//span[@class='bar faux-button']")))

# Click the dropdown to expand it
dropdown_button.click()

# Wait for the Account Summary content to load
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@id='SummaryAccountInfo']//div[@class='dropdown-content']")))

# Extract HTML content again after the dropdown is expanded
html_content = driver.page_source

# Parse the HTML with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Find the dropdown content specifically under the 'SummaryAccountInfo' ID
dropdown_content = soup.find('div', id='SummaryAccountInfo').find('div', class_='dropdown-content')

# Extract the key-value pairs from Account Summary
account_summary_pairs = {}

# Find all the rows with the class 'skinny-row' which contain the label and value pairs
rows = dropdown_content.find_all('div', class_='skinny-row')

for row in rows:
    # Extract the label (key) and value (value)
    label = row.find('div', class_='col-xs-4').text.strip() if row.find('div', class_='col-xs-4') else None
    value = row.find('div', class_='col-xs-8').text.strip() if row.find('div', class_='col-xs-8') else None
    
    # Add to dictionary if both label and value exist
    if label and value:
        account_summary_pairs[label] = value

# Print the Account Summary key-value pairs
print(f"Account Summary:")
for key, value in account_summary_pairs.items():
    # Skip the "Update Mailing Address" entry
    if key != 'Update Mailing Address':
        print(f"{key}: {value}")
        data_set[key] = value

# Extract additional data (Location Description, Owner Info, PLSS Location)
location_description = soup.find('div', string='Location Description').find_next('div').text.strip()

# For Owner Info, we need to extract and clean it
owner_info_div = soup.find('div', string='Owner Info').find_next('div')

# Extract owner name and address
owner_info_raw = owner_info_div.text.strip()

# Split owner info into lines
owner_info_parts = owner_info_raw.split("\n")

# Clean up and extract the name and address properly
owner_name = owner_info_parts[0].strip() 
owner_address = " ".join(owner_info_parts[1:]).strip()  

# If "Update Mailing Address" appears in the address, remove it
if "Update Mailing Address" in owner_address:
    owner_address = owner_address.split("Update Mailing Address")[0].strip()

# Extract PLSS Location
plss_location = soup.find('div', string='Public Land Survey System (PLSS) Location').find_next('div').text.strip()

# Clean the PLSS Location
plss_location_cleaned = ' '.join(plss_location.split())

# Optionally, reformat for better readability (if you want to format it neatly)
plss_location_cleaned = plss_location_cleaned.replace("Quarter:", "\nQuarter:").replace("Section:", "\nSection:").replace("Township:", "\nTownship:").replace("Range:", "\nRange:")

# Extract the Section PDF Map link (if it exists)
section_pdf_map = None

# Find all the div elements with class 'skinny-row'
pdf_map_rows = soup.find_all('div', class_='skinny-row')

for row in pdf_map_rows:
    # Look for an anchor tag within the row
    link = row.find('a', href=True)
    if link and "SectionMap" in link['href']:  # Check if the href contains "SectionMap"
        section_pdf_map = link['href']
        break 

# Print other extracted information
print(f"\nLocation Description: {location_description}")
data_set["Location Description"] = location_description
print(f"\nOwner Info:")
print(f"Owner Name: {owner_name}")
data_set["Owner Name"] = owner_name
print(f"Owner Address: {owner_address}")
data_set["Owner Address"] = owner_address
print(f"\nPublic Land Survey System (PLSS) Location: {plss_location_cleaned}")
data_set["PLSS Location"] = {plss_location_cleaned}
print(f"\nSection PDF Map Link: {section_pdf_map}")
data_set["PDF Map Link"] = {section_pdf_map}

print(data_set)

# Close the browser
# driver.quit()

Account Summary:
Account #:: R0396965
State Parcel #:: 2351-163-01-115
Account Type:: Residential
Tax District:: 3473
Neighborhood-Ext:: 410-I
Building Count:: 1
Building Permit Authority:: Douglas County (website )
Phone:: 303-660-7497
Name:: CASTLE PINES
Reception No:: 9607889

Location Description: LOT 19A CASTLE PINES # 1A 16TH AMENDMENT    0.23 AM/L

Owner Info:
Owner Name: DENNIS R HENDRIX 2020 COLORADO PERSONAL RESIDENCE TRUST & JENNIE M HENDRIX 2020 COLORADO PERSONAL RESIDENCE TRUST
Owner Address: 2719 CASTLE GLEN DRCASTLE ROCK, CO 80108

Public Land Survey System (PLSS) Location: 
Quarter: SW; 
Section: 16; 
Township: 7; 
Range: 67

Section PDF Map Link: /realware/SectionMaps/TWP2351/DC_2351_16.pdf
