In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd

# Update this path to the location where Chrome is installed on your system
chrome_binary_path = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'

options = webdriver.ChromeOptions()
options.binary_location = chrome_binary_path
options.add_argument('--headless')  # Run Chrome in headless mode
options.add_argument('--no-sandbox')  # Bypass OS security model
options.add_argument('--disable-dev-shm-usage')  # Overcome limited resource problems
options.add_argument('--disable-gpu')  # Applicable to Windows OS only; disable GPU hardware acceleration

def extract_park_data(city):
    url = f"https://www.parksforcalifornia.org/communities/?address={city}"
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
    driver.get(url)
    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    parks_area = None
    parks_per_1000_people = None
    
    try:
        parks_area_span = soup.find('span', {'data-field': 'parks_acres_total'})
        parks_area = parks_area_span.text.strip() if parks_area_span else None
        
        parks_per_1000_span = soup.find('span', {'data-field': 'pp1k'})
        parks_per_1000_people = parks_per_1000_span.text.strip() if parks_per_1000_span else None
    except AttributeError:
        print(f"Data not found for {city}")

    driver.quit()

    return {
        'City': city,
        'Parks Area': parks_area,
        'Parks per 1000 People': parks_per_1000_people
    }

# Read the cities from the Excel file
df_cities = pd.read_csv('https://raw.githubusercontent.com/wanning-lu/MATH-189-Final-Project/master/park_access/parks_data.csv')
city_list = df_cities['City'].tolist()

# List to store the results
results = []

for city in city_list:
    print(f"Extracting data for {city}...")
    city_data = extract_park_data(city)
    results.append(city_data)

# Convert the results to a DataFrame
df_results = pd.DataFrame(results)

# Save the results to an Excel file
df_results.to_excel('parks_data.xlsx', index=False)

print("Data extraction complete. Results saved to 'parks_data.xlsx'.")


Extracting data for Fontana,California...
Extracting data for Stockton,California...


KeyboardInterrupt: 