In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')

# Initialize WebDriver - browser opens in background
driver_chrome = webdriver.Chrome(options=chrome_options)
driver_chrome.minimize_window()

# Navigate to FBI Crime Data Explorer
str_url = 'https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/explorer/crime/crime-trend'
driver_chrome.get(str_url)

# Wait for the page to load (JavaScript-based site)
time.sleep(2)

# Wait for specific elements to load
wait_obj = WebDriverWait(driver_chrome, 3)

print("Page loaded successfully")
print(f"Page title: {driver_chrome.title}")
print(f"Current URL: {driver_chrome.current_url}")


Page loaded successfully
Page title: CDE
Current URL: https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/explorer/crime/crime-trend


In [2]:
# Find Location Select dropdown and show all options

elem_location_text = driver_chrome.find_element(By.XPATH, "//*[contains(text(), 'Location Select')]")
elem_location_select = elem_location_text.find_element(By.XPATH, "./following-sibling::*[1]")
print(f"Found: <{elem_location_select.tag_name}> id='{elem_location_select.get_attribute('id')}'")

# Find button inside and click to open dropdown
elem_location_button = elem_location_select.find_element(By.TAG_NAME, "button")
elem_location_button.click()
time.sleep(2)

# Wait for options to appear and find them
list_options = wait_obj.until(EC.presence_of_all_elements_located((By.TAG_NAME, "nb-option")))
list_states = [elem_option.text for elem_option in list_options]

print(f"\nFound {len(list_states)} states:")
for idx, str_state in enumerate(list_states):
    print(f"{idx}: {str_state}")

# Find Time Frame dropdown and show all options

elem_timeframe_select = driver_chrome.find_element(By.ID, "yr-month-dz-select")
print(f"\nFound: <{elem_timeframe_select.tag_name}> id='{elem_timeframe_select.get_attribute('id')}'")

# Find button inside and click to open dropdown
elem_timeframe_button = elem_timeframe_select.find_element(By.TAG_NAME, "button")
elem_timeframe_button.click()
time.sleep(2)

# Wait for options to appear and find them
list_timeframe_options = wait_obj.until(EC.presence_of_all_elements_located((By.TAG_NAME, "nb-option")))
print(f"\nFound {len(list_timeframe_options)} time frame options:")
for idx, elem_option in enumerate(list_timeframe_options):
    print(f"{idx}: {elem_option.text}")

# Find hamburger menu button and click it

elem_menu_button = driver_chrome.find_element(By.CLASS_NAME, "hamburger")
print(f"\n\nFound hamburger button: <{elem_menu_button.tag_name}>")

# Click the hamburger button
elem_menu_button.click()
time.sleep(2)

# Find what appears in the menu
list_menu_items = driver_chrome.find_elements(By.XPATH, "//*[contains(@class, 'menu') or contains(@class, 'popover')]")
print(f"\nClicked menu, found {len(list_menu_items)} menu-related elements")
for idx, elem in enumerate(list_menu_items[:10]):
    str_text = elem.text.strip()[:50]
    str_tag = elem.tag_name
    if str_text:
        print(f"{idx}: <{str_tag}> '{str_text}'")

# Locate the Download as CSV element
elem_download_csv = driver_chrome.find_element(By.XPATH, "//li[@title='Download as CSV']")
print(f"\n\nFound Download as CSV element:")
print(f"  Tag: <{elem_download_csv.tag_name}>")
print(f"  Text: '{elem_download_csv.text}'")
print(f"  Title: '{elem_download_csv.get_attribute('title')}'")
print(f"  Is displayed: {elem_download_csv.is_displayed()}")

Found: <nb-select> id='locsel-defaultloctype'

Found 57 states:
0: United States
1: Alabama
2: Alaska
3: Arizona
4: Arkansas
5: California
6: Colorado
7: Connecticut
8: Delaware
9: District of Columbia
10: Florida
11: Georgia
12: Hawaii
13: Idaho
14: Illinois
15: Indiana
16: Iowa
17: Kansas
18: Kentucky
19: Louisiana
20: Maine
21: Maryland
22: Massachusetts
23: Michigan
24: Minnesota
25: Mississippi
26: Missouri
27: Montana
28: Nebraska
29: Nevada
30: New Hampshire
31: New Jersey
32: New Mexico
33: New York
34: North Carolina
35: North Dakota
36: Ohio
37: Oklahoma
38: Oregon
39: Pennsylvania
40: Rhode Island
41: South Carolina
42: South Dakota
43: Tennessee
44: Texas
45: Utah
46: Vermont
47: Virginia
48: Washington
49: West Virginia
50: Wisconsin
51: Wyoming
52: American Samoa
53: Guam
54: Mariana Islands
55: Puerto Rico
56: U.S. Virgin Islands

Found: <nb-select> id='yr-month-dz-select'

Found 7 time frame options:
0: 3 Months
1: 6 Months
2: 1 Year
3: 2 Years
4: 5 Years
5: 10 Years
6:

In [3]:
# Cell 2: Iterate through all regions and download CSV for each

from tqdm.auto import tqdm

# Loop through all states from Cell 1
with tqdm(total=len(list_states), position=0, leave=True) as pbar:
    for str_state in list_states:
        print(f"\n=== Processing {str_state} ===")
        
        # 1. Click location drop menu
        elem_location_select = driver_chrome.find_element(By.ID, "locsel-defaultloctype")
        elem_location_button = elem_location_select.find_element(By.TAG_NAME, "button")
        elem_location_button.click()
        time.sleep(2)
        
        # 2. Select state
        list_location_options = driver_chrome.find_elements(By.TAG_NAME, "nb-option")
        for elem_option in list_location_options:
            if elem_option.text == str_state:
                elem_option.click()
                print(f"Selected {str_state}")
                break
        time.sleep(2)
        
        # 3. Click time drop menu
        elem_timeframe_select = driver_chrome.find_element(By.ID, "yr-month-dz-select")
        elem_timeframe_button = elem_timeframe_select.find_element(By.TAG_NAME, "button")
        elem_timeframe_button.click()
        time.sleep(2)
        
        # 4. Select 10 Years
        list_timeframe_options = driver_chrome.find_elements(By.TAG_NAME, "nb-option")
        for elem_option in list_timeframe_options:
            if elem_option.text == "10 Years":
                elem_option.click()
                print("Selected 10 Years")
                break
        time.sleep(2)
        
        # 5. Click hamburger button
        elem_hamburger = driver_chrome.find_element(By.CLASS_NAME, "hamburger")
        elem_hamburger.click()
        print("Clicked hamburger menu")
        time.sleep(2)
        
        # 6. Click Download CSV
        elem_csv_download = driver_chrome.find_element(By.XPATH, "//li[@title='Download as CSV']")
        elem_csv_download.click()
        print(f"Downloaded CSV for {str_state}")
        time.sleep(3)
        
        # Update progress bar
        pbar.update(1)

print("\n=== All downloads completed ===")


  0%|          | 0/57 [00:00<?, ?it/s]


=== Processing United States ===
Selected United States
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for United States

=== Processing Alabama ===
Selected Alabama
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Alabama

=== Processing Alaska ===
Selected Alaska
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Alaska

=== Processing Arizona ===
Selected Arizona
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Arizona

=== Processing Arkansas ===
Selected Arkansas
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Arkansas

=== Processing California ===
Selected California
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for California

=== Processing Colorado ===
Selected Colorado
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Colorado

=== Processing Connecticut ===
Selected Connecticut
Selected 10 Years
Clicked hamburger menu
Downloaded CSV for Connecticut

=== Processing Delaware ===
Selected Delaware
Selecte