# NYC Street Camera Analytics Notebook

This notebook analyzes NYC traffic camera data streamed via Snowpipe Streaming High Speed v2.

**Data Sources:**
- 511NY API (https://511ny.org/)
- Snowpipe Streaming REST API

**Tables:**
- `DEMO.DEMO.NYC_CAMERA_DATA` - Standard table
- `DEMO.DEMO.NYC_CAMERA_ICEBERG` - Iceberg table

In [None]:
import snowflake.snowpark as snowpark
from snowflake.snowpark.context import get_active_session
import pandas as pd

session = get_active_session()
print(f"Connected to Snowflake")
print(f"Current database: {session.get_current_database()}")
print(f"Current schema: {session.get_current_schema()}")

## 1. Data Overview

In [None]:
overview_sql = """
SELECT 
    COUNT(*) as total_records,
    COUNT(DISTINCT camera_id) as unique_cameras,
    COUNT(DISTINCT roadway_name) as unique_roadways,
    MIN(image_timestamp) as first_capture,
    MAX(image_timestamp) as last_capture
FROM DEMO.DEMO.NYC_CAMERA_DATA
"""

overview = session.sql(overview_sql).to_pandas()
display(overview)

In [None]:
recent_sql = """
SELECT 
    camera_id, name, roadway_name, direction_of_travel,
    latitude, longitude, image_timestamp
FROM DEMO.DEMO.NYC_CAMERA_DATA
WHERE image_timestamp >= DATEADD('hour', -1, CURRENT_TIMESTAMP())
QUALIFY ROW_NUMBER() OVER (PARTITION BY camera_id ORDER BY image_timestamp DESC) = 1
ORDER BY image_timestamp DESC
LIMIT 20
"""
recent = session.sql(recent_sql).to_pandas()
display(recent)

## 2. Camera Distribution by Roadway

In [None]:
roadway_sql = """
SELECT 
    roadway_name,
    COUNT(DISTINCT camera_id) as camera_count,
    COUNT(*) as total_captures,
    ROUND(AVG(latitude), 4) as avg_latitude,
    ROUND(AVG(longitude), 4) as avg_longitude
FROM DEMO.DEMO.NYC_CAMERA_DATA
WHERE roadway_name IS NOT NULL AND roadway_name != ''
GROUP BY roadway_name
ORDER BY camera_count DESC
LIMIT 25
"""
roadways = session.sql(roadway_sql).to_pandas()
display(roadways)

In [None]:
import altair as alt

chart = alt.Chart(roadways.head(15)).mark_bar().encode(
    x=alt.X('CAMERA_COUNT:Q', title='Number of Cameras'),
    y=alt.Y('ROADWAY_NAME:N', sort='-x', title='Roadway'),
    color=alt.Color('CAMERA_COUNT:Q', scale=alt.Scale(scheme='blues'))
).properties(
    title='Top 15 Roadways by Camera Count',
    width=600,
    height=400
)
chart

## 3. Geographic Analysis - Camera Locations

In [None]:
locations_sql = """
SELECT DISTINCT
    camera_id, name, roadway_name, latitude, longitude
FROM DEMO.DEMO.NYC_CAMERA_DATA
WHERE latitude IS NOT NULL AND longitude IS NOT NULL
    AND latitude BETWEEN 40 AND 45
    AND longitude BETWEEN -75 AND -72
"""
locations = session.sql(locations_sql).to_pandas()
print(f"Total unique camera locations: {len(locations)}")

scatter = alt.Chart(locations).mark_circle(size=60).encode(
    x=alt.X('LONGITUDE:Q', scale=alt.Scale(domain=[-74.5, -73.5])),
    y=alt.Y('LATITUDE:Q', scale=alt.Scale(domain=[40.5, 41.2])),
    color=alt.Color('ROADWAY_NAME:N', legend=None),
    tooltip=['NAME', 'ROADWAY_NAME', 'LATITUDE', 'LONGITUDE']
).properties(title='NYC Camera Locations', width=600, height=500)
scatter