<a href="https://colab.research.google.com/github/yshgboop/Canadian-Federal-Election-Data-Analysis-Project/blob/master/pythonwebscraping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd

# Define the API URL
url = "https://services1.arcgis.com/x4h61KaW16vFs7PM/arcgis/rest/services/HI_State_Ignitions_2000_2020_Trim/FeatureServer/0/query"

# Initialize parameters
params = {
    'where': '1=1',          # Select all records
    'outFields': '*',        # Return all fields
    'f': 'json',             # Output format
    'returnGeometry': 'true', # Include geometry
    'outSR': '4326',         # Spatial reference system (WGS84)
}

# Retrieve maxRecordCount from layer metadata
metadata_url = "https://services1.arcgis.com/x4h61KaW16vFs7PM/arcgis/rest/services/HI_State_Ignitions_2000_2020_Trim/FeatureServer/0?f=json"

metadata_response = requests.get(metadata_url)
if metadata_response.status_code == 200:
    metadata = metadata_response.json()
    max_record_count = metadata.get('maxRecordCount', 1000)
else:
    print(f"Failed to retrieve metadata. Using default maxRecordCount of 1000.")
    max_record_count = 1000

# Prepare to collect all features
all_features = []
offset = 0

while True:
    print(f"Fetching records {offset} to {offset + max_record_count}...")
    # Update parameters with paging
    params.update({
        'resultOffset': offset,
        'resultRecordCount': max_record_count
    })

    # Make the API request
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        if not features:
            break  # No more records to fetch
        all_features.extend(features)
        offset += max_record_count
    else:
        print(f"Error fetching data: {response.status_code}")
        break

print(f"Total records fetched: {len(all_features)}")

# Extract attributes
attributes_list = [feature['attributes'] for feature in all_features]

# Convert to DataFrame
df = pd.DataFrame(attributes_list)

# Save to CSV
df.to_csv('HI_State_Ignitions_All.csv', index=False)

print("Data saved to HI_State_Ignitions_All.csv")


Fetching records 0 to 2000...
Fetching records 2000 to 4000...
Fetching records 4000 to 6000...
Fetching records 6000 to 8000...
Fetching records 8000 to 10000...
Fetching records 10000 to 12000...
Fetching records 12000 to 14000...
Fetching records 14000 to 16000...
Fetching records 16000 to 18000...
Fetching records 18000 to 20000...
Total records fetched: 16117
Data saved to HI_State_Ignitions_All.csv
