#### Setup

In [None]:
# setup
import os
import requests
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
# local path
out_dir = Path("Data")
# api url
derq_api_url = 'https://api-external.cloud.derq.com'
# Set the API key as an environment variable
headers = {
    "x-api-key": os.getenv('derq-api-key')
}
# global variables
all_event_types = 'IC, WWD, STPV, TV, LCV, RLV, NM-VV, NM-VRU, CRSH'
default_speed_buckets = '5,10,15,20,25'


In [None]:
### DERQ API FUNCTIONS ###

# Get DERQ locations for Tahoe
def get_derq_locations():
    url = derq_api_url + '/locations'
    return requests.get(url, headers=headers).json()
# parse as dataframe
def parse_locations_response(response: dict) -> pd.DataFrame:
    """Parse the DERQ API response to extract location data."""
    locations = response.get("body", [])
    return pd.DataFrame(locations)
# get veh counts for a location
def get_derq_veh_counts(location_id: str, start_date: str, end_date: str) -> dict:
    """
    Fetch vehicle count data for a given location and time range.

    Parameters:
        location_id (str): Location ID to query
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format

    Returns:
        dict: JSON response from the API containing vehicle counts
    """
    url = f"{derq_api_url}/counts/vehicle?locationId={location_id}&startDate={start_date}&endDate={end_date}"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

# Get vehicle counts for multiple locations
def parse_vehicle_counts_response(response: dict) -> pd.DataFrame:
    all_data = []
    for location_name, payload in response.items():
        if payload.get("statusCode") == "200":
            for entry in payload.get("body", []):
                entry["LocationName"] = location_name
                all_data.append(entry)
    return pd.DataFrame(all_data)

def get_derq_safety_insights(location_id: str, start_date: str, end_date: str) -> dict:
    """
    Fetch safety insights data for a given location and time range.

    Parameters:
        location_id (str): Location ID to query
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format

    Returns:
        dict: JSON response from the API containing safety insights
    """
    url = f"{derq_api_url}/safety-insights?locationId={location_id}&startDate={start_date}&endDate={end_date}"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

def parse_safety_insights_response(response: dict) -> pd.DataFrame:
    if response.get("statusCode") == "200":
        return pd.DataFrame(response.get("body", []))
    else:
        return pd.DataFrame()
# get derq vru counts function
def get_derq_vru_counts(location_id: str, start_date: str, end_date: str) -> dict:
    """
    Fetch VRU count data for a given location and time range.

    Parameters:
        location_id (str): Location ID to query
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format

    Returns:
        dict: JSON response from the API containing VRU counts
    """
    url = f"{derq_api_url}/counts/vru?locationId={location_id}&startDate={start_date}&endDate={end_date}"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

# parse vru counts response
def parse_vru_counts_response(response: dict) -> pd.DataFrame:
    if response.get("statusCode") == "200":
        return pd.DataFrame(response.get("body", []))
    else:
        return pd.DataFrame()

# get derq single location vehicle counts function
def parse_single_location_vehicle_counts(response: dict) -> pd.DataFrame:
    if response.get("statusCode") == "200":
        return pd.DataFrame(response.get("body", []))
    else:
        return pd.DataFrame()  # Return empty DataFrame if error

# get derq speed counts function
def get_derq_speeds(location_id: str, start_date: str, end_date: str, buckets=default_speed_buckets, unit='mph') -> dict:
    """
    Fetch speed distribution data for a given location and time range.

    Parameters:
        location_id (str): Location ID to query
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format
        buckets (str): Speed buckets for the distribution
        unit (str): Speed unit (e.g., 'mph', 'km/h')

    Returns:
        dict: JSON response from the API containing speed distribution data
    """
    url = f"{derq_api_url}/speed-distribution?locationId={location_id}&startDate={start_date}&endDate={end_date}&speedBuckets={buckets}&speedUnit={unit}"
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()
# parse speed response
def parse_speed_response(response: dict) -> pd.DataFrame:
    if response.get("statusCode") == "200":
        return pd.DataFrame(response.get("body", []))
    else:
        return pd.DataFrame()
    
## Reed's work ###
def get_derq_events(location, start, end, event_types=all_event_types):
    url = derq_api_url + f'/safety-insights?locationId={location}&startDate={start}&endDate={end}&eventTypes={event_types}'
    return requests.get(url, headers=headers).json()

def get_derq_veh_counts(location, start, end):
    url = derq_api_url + f'/counts/vehicle?locationId={location}&startDate={start}&endDate={end}'
    return requests.get(url, headers=headers).json()

def get_derq_vru_counts(location, start, end):
    url = derq_api_url + f'/counts/vru?locationId={location}&startDate={start}&endDate={end}'
    return requests.get(url, headers=headers).json()

def get_derq_speeds(location, start, end, buckets=default_speed_buckets, unit='mph'):
    url = derq_api_url + f'/speed-distribution?locationId={location}&startDate={start}&endDate={end}&speedBuckets={buckets}&speedUnit={unit}'
    return requests.get(url, headers=headers).json()

def process_response_derq(response, intersection_id, df_list):
    if response:
        data = response.get('body', [])
        if data:
            df = pd.DataFrame(data)
            df['intersection_id'] = intersection_id
            df_list.append(df)


### Update All

In [None]:
# Fetch location data
df_locations = parse_locations_response(get_derq_locations())
df_locations

In [None]:
# Fetch location data
df_locations = parse_locations_response(get_derq_locations())

### API will only return 30 days of data at a time, so set a date range accordingly ###

# date range for DERQ API calls
start_date = "06/28/2025"
end_date   = "07/14/2025"

### Fetch DERQ Vehicle Counts ###

# Retrieve vehicle counts for all locations in the last 30 days
veh_data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching traffic count data for: {location_name}")
    veh_resp = get_derq_veh_counts(location_id, start_date, end_date)
    df_counts = parse_single_location_vehicle_counts(veh_resp)
    df_counts['LocationId'] = location_id
    df_counts['LocationName'] = location_name
    veh_data.append(df_counts)
# Combine all results into a single DataFrame
df_vehicle_counts = pd.concat(veh_data, ignore_index=True)
# Optional: Convert timestamp column to datetime if it exists
if 'timestamp' in df_vehicle_counts.columns:
    df_vehicle_counts['timestamp'] = pd.to_datetime(df_vehicle_counts['timestamp'])
# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'vehicle_counts_{start_date_f}_to_{end_date_f}.csv'
df_vehicle_counts.to_csv(out_dir / csv_name, index=False)


### Fetch DERQ Safety Insights ###

# Retrieve safety insights for all locations
safety_data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching safety insights for: {location_name}")
    safety_resp = get_derq_safety_insights(location_id, start_date, end_date)
    df_safety = parse_safety_insights_response(safety_resp)
    df_safety['LocationId'] = location_id
    df_safety['LocationName'] = location_name
    safety_data.append(df_safety)
# Combine all results into a single DataFrame
df_safety_insights = pd.concat(safety_data, ignore_index=True)
# Optional: Convert datetime columns if present
for col in ['timestamp', 'event_time']:
    if col in df_safety_insights.columns:
        df_safety_insights[col] = pd.to_datetime(df_safety_insights[col])
# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'safety_insights_{start_date_f}_to_{end_date_f}.csv'
df_safety_insights.to_csv(out_dir / csv_name, index=False)


### Fetch DERQ VRU Counts ###

# Get VRU counts for all locations in the last 30 days
vru_data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching VRU data for: {location_name}")
    vru_resp = get_derq_vru_counts(location_id, start_date, end_date)
    df_vru = parse_single_location_vehicle_counts(vru_resp)
    df_vru['LocationId'] = location_id
    df_vru['LocationName'] = location_name
    vru_data.append(df_vru)
# Combine all results into a single DataFrame
df_vru_counts = pd.concat(vru_data, ignore_index=True)
# Optional: Convert timestamp column to datetime if it exists
if 'timestamp' in df_vru_counts.columns:
    df_vru_counts['timestamp'] = pd.to_datetime(df_vru_counts['timestamp'])
# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'vru_counts_{start_date_f}_to_{end_date_f}.csv'
df_vru_counts.to_csv(out_dir / csv_name, index=False)


### Fetch DERQ Speed Counts ###
# get derq speed counts url
buckets = '5,10,15,20,25'

# get speed data for all locations in the last 30 days
speed_data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching speed data for: {location_name}")
    speed_resp = get_derq_speeds(location_id, start_date, end_date)
    df_speed = parse_speed_response(speed_resp)
    df_speed['LocationId'] = location_id
    df_speed['LocationName'] = location_name
    speed_data.append(df_speed)
# Combine all results into a single DataFrame
df_speed_distribution = pd.concat(speed_data, ignore_index=True)
# Optional: Convert datetime columns if present
for col in ['timestamp']:
    if col in df_speed_distribution.columns:
        df_speed_distribution[col] = pd.to_datetime(df_speed_distribution[col])
# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f   = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'speed_distribution_{start_date_f}_to_{end_date_f}.csv'
df_speed_distribution.to_csv(out_dir / csv_name, index=False)

print("DERQ data retrieval and processing complete.")

In [None]:
df.info()

In [None]:
# build daily average table by location
import pandas as pd
from arcgis.features import FeatureLayer

# get traffic volume data
traffic_service = "https://maps.trpa.org/server/rest/services/Transportation_SMART/FeatureServer/3"

# Gets data from the TRPA server
def get_fs_data(service_url):
    feature_layer = FeatureLayer(service_url)
    query_result = feature_layer.query()
    # Convert the query result to a list of dictionaries
    feature_list = query_result.features
    # Create a pandas DataFrame from the list of dictionaries
    all_data = pd.DataFrame([feature.attributes for feature in feature_list])
    # return data frame
    return all_data

# get fs data as a df
df = get_fs_data(traffic_service)

# Group by Date and Location, then sum the counts to get daily totals
daily_totals = df.groupby(['Date', 'LocationName'])['counts'].sum().reset_index()

# Now get the average daily count per location
daily_avg = daily_totals.groupby('LocationName')['counts'].mean().reset_index()
daily_avg.rename(columns={'counts': 'Avg_Daily_Count'}, inplace=True)

print(daily_avg)


### Dev

#### Location Data

In [None]:
# create data frame from response: LocationID, LocationName, Latitude, Longitude
locations = get_derq_locations()
df = parse_locations_response(locations)
df.to_csv(out_dir/'locations.csv', index=False)
df

#### Vehicle Count Data

In [None]:
# Get current UTC date and date 30 days ago
end_date = datetime.utcnow().date().isoformat()
start_date = (datetime.utcnow() - timedelta(days=30)).date().isoformat()
location_id = '6786b290ddb02855b751179d'  # Example location ID
veh_data = get_derq_veh_counts(location_id, start_date, end_date)
df = parse_single_location_vehicle_counts(veh_data)
df

In [None]:
# Fetch location data
df_locations = parse_locations_response(get_derq_locations())

# Retrieve vehicle counts for all locations in the last 30 days
data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching data for: {location_name}")
    veh_data = get_derq_veh_counts(location_id, start_date, end_date)
    df_counts = parse_single_location_vehicle_counts(veh_data)
    df_counts['LocationId'] = location_id
    df_counts['LocationName'] = location_name
    data.append(df_counts)

# Combine all results into a single DataFrame
df_vehicle_counts = pd.concat(data, ignore_index=True)

# Optional: Convert timestamp column to datetime if it exists
if 'timestamp' in df_vehicle_counts.columns:
    df_vehicle_counts['timestamp'] = pd.to_datetime(df_vehicle_counts['timestamp'])

# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'vehicle_counts_{start_date_f}_to_{end_date_f}.csv'
df_vehicle_counts.to_csv(out_dir / csv_name, index=False)


####  Safety Data

In [None]:
# # get locations from csv
# df_locations = pd.read_csv(out_dir / 'locations.csv')
# # last 30 days
# start_date = (datetime.utcnow() - timedelta(days=30)).date().isoformat()
# end_date = datetime.utcnow().date().isoformat()

# Retrieve safety insights for all locations
safety_data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching safety insights for: {location_name}")
    safety_resp = get_derq_safety_insights(location_id, start_date, end_date)
    df_safety = parse_safety_insights_response(safety_resp)
    df_safety['LocationId'] = location_id
    df_safety['LocationName'] = location_name
    safety_data.append(df_safety)

# Combine all results into a single DataFrame
df_safety_insights = pd.concat(safety_data, ignore_index=True)

# Optional: Convert datetime columns if present
for col in ['timestamp', 'event_time']:
    if col in df_safety_insights.columns:
        df_safety_insights[col] = pd.to_datetime(df_safety_insights[col])

# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'safety_insights_{start_date_f}_to_{end_date_f}.csv'
df_safety_insights.to_csv(out_dir / csv_name, index=False)


#### VRU Counts

In [None]:
vru_url = derq_api_url + f'/counts/vru?locationId={location_id}&startDate={start_date}&endDate={end_date}'
vru_data = get_derq_vru_counts(location_id, start_date, end_date)
vru_data

In [None]:

# Get VRU counts for all locations in the last 30 days
data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching VRU data for: {location_name}")
    vru_data = get_derq_vru_counts(location_id, start_date, end_date)
    df_vru = parse_single_location_vehicle_counts(vru_data)
    df_vru['LocationId'] = location_id
    df_vru['LocationName'] = location_name
    data.append(df_vru)

# Combine all results into a single DataFrame
df_vru_counts = pd.concat(data, ignore_index=True)
# Optional: Convert timestamp column to datetime if it exists
if 'timestamp' in df_vru_counts.columns:
    df_vru_counts['timestamp'] = pd.to_datetime(df_vru_counts['timestamp'])

# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'vru_counts_{start_date_f}_to_{end_date_f}.csv'
df_vru_counts.to_csv(out_dir / csv_name, index=False)


#### Speed Data

In [None]:
speed_url = derq_api_url + f'/speed-distribution?locationId={location_id}&startDate={start_date}&endDate={end_date}'
speed_data = get_derq_speeds(location_id, start_date, end_date)
speed_data

In [None]:
# get derq speed counts url
buckets = '5,10,15,20,25'

# get speed data for all locations in the last 30 days
data = []
for _, row in df_locations.iterrows():
    location_id = row['LocationId']
    location_name = row['LocationName']
    print(f"Fetching speed data for: {location_name}")
    speed_data = get_derq_speeds(location_id, start_date, end_date)
    df_speed = parse_speed_response(speed_data)
    df_speed['LocationId'] = location_id
    df_speed['LocationName'] = location_name
    data.append(df_speed)
# Combine all results into a single DataFrame
df_speed_distribution = pd.concat(data, ignore_index=True)
# Optional: Convert datetime columns if present
for col in ['timestamp']:
    if col in df_speed_distribution.columns:
        df_speed_distribution[col] = pd.to_datetime(df_speed_distribution[col])

# Save to CSV
format = "%m_%d_%Y"
start_date_f = datetime.strptime(start_date, "%m/%d/%Y").strftime(format)
end_date_f   = datetime.strptime(end_date, "%m/%d/%Y").strftime(format)
csv_name = f'speed_distribution_{start_date_f}_to_{end_date_f}.csv'
df_speed_distribution.to_csv(out_dir / csv_name, index=False)