In [12]:
import requests
from bs4 import BeautifulSoup

def fetch_event_urls(page_url):
    response = requests.get(page_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extracting event detail page URLs
    event_urls = [a['href'] for a in soup.find_all('a', href=True) if 'events' in a['href']]
    
    return event_urls

# Example usage
page_url = 'https://visitseattle.org/events/page/1'
event_urls = fetch_event_urls(page_url)
print(event_urls[:9])  # Print the first 9 URLs to verify




['https://visitseattle.org/things-to-do/events/', 'https://visitseattle.org/things-to-do/events/', 'https://visitseattle.org/things-to-do/events/festivals/', 'https://visitseattle.org/things-to-do/events/submit-your-event/', '/?s=&frm=events', 'https://visitseattle.org/events/an-evening-with-lucia-micarelli/', 'https://visitseattle.org/events/an-evening-with-lucia-micarelli/', 'https://visitseattle.org/events/dylan-leblanc/', 'https://visitseattle.org/events/dylan-leblanc/']


In [17]:
def scrape_all_event_urls(base_url, start_page=1, end_page=41):
    all_event_urls = []
    for page in range(start_page, end_page + 1):
        page_url = f"{base_url}page/{page}"
        event_urls = fetch_event_urls(page_url)
        all_event_urls.extend(event_urls)
        # print(f"Scraped {len(event_urls)} URLs from {page_url}")
    
    return all_event_urls

# Base URL for pagination
base_url = 'https://visitseattle.org/events/'
all_event_urls = scrape_all_event_urls(base_url)
print(f"Total URLs scraped: {len(all_event_urls)}")


Total URLs scraped: 1336


In [14]:
def fetch_event_details(url):
    if not url.startswith('http'):
        url = 'https://visitseattle.org' + url
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    name = soup.find('h1').get_text(strip=True) if soup.find('h1') else 'N/A'
    
    # Example of adjusting based on actual page structure; these will need to be customized
    date_element = soup.find(class_='event-date')
    date = date_element.get_text(strip=True) if date_element else 'N/A'
    
    location_element = soup.find(class_='event-location')
    location = location_element.get_text(strip=True) if location_element else 'N/A'
    
    event_type_element = soup.find(class_='event-type')
    event_type = event_type_element.get_text(strip=True) if event_type_element else 'N/A'
    
    region_element = soup.find(class_='event-region')
    region = region_element.get_text(strip=True) if region_element else 'N/A'
    
    return {
        'Name': name,
        'Date': date,
        'Location': location,
        'Type': event_type,
        'Region': region
    }



In [15]:
import pandas as pd

def compile_events_to_csv(event_urls, csv_filename):
    events_data = [fetch_event_details(url) for url in event_urls]
    
    # Convert list of dictionaries to a DataFrame
    df = pd.DataFrame(events_data)
    
    # Save DataFrame to CSV
    df.to_csv(csv_filename, index=False)
    print(f"Data saved to {csv_filename}")

# Compile and save data
csv_filename = 'events.csv'
compile_events_to_csv(all_event_urls[:9], csv_filename)  # Limiting to first 9 for demonstration


Data saved to events.csv


In [16]:
import requests

def lookup_location_coordinates(location):
    nominatim_url = "https://nominatim.openstreetmap.org/search"
    params = {
        'q': location,
        'format': 'json',
        'limit': 1
    }
    response = requests.get(nominatim_url, params=params)
    data = response.json()
    if data:
        latitude = data[0]['lat']
        longitude = data[0]['lon']
        return latitude, longitude
    else:
        return None, None


In [None]:
import requests

def lookup_location_coordinates(location):
    nominatim_url = "https://nominatim.openstreetmap.org/search"
    params = {
        'q': location,
        'format': 'json',
        'limit': 1
    }
    response = requests.get(nominatim_url, params=params)
    data = response.json()
    if data:
        latitude = data[0]['lat']
        longitude = data[0]['lon']
        return latitude, longitude
    else:
        return None, None
