# SQL Queries: NYC Collisions Sample

**Author:** Yougi Jain  
**Date:** June 11, 2025  
**Project:** ds-fundamentals-ingest-clean  


### Setup

In [1]:
import sqlite3
import glob                 # To find and grap sql files easily
import pandas as pd
from pathlib import Path
from IPython.display import display, Markdown


db_path = Path("../data/clean/data.db")       # Path to DB
connection = sqlite3.connect(db_path)

## SQL Script Validation

In [2]:
sql_dir   = Path("../sql")
sql_files = sorted(sql_dir.glob("*.sql"))

for sql_file in sql_files:
    name = sql_file.name
    display(Markdown(f"## `{name}`"))        # in-line header

    query = sql_file.read_text()
    df = pd.read_sql_query(query, connection)

    assert not df.empty, f"`{name}` returned no rows"
    display(df.head(7))

connection.close()

## `01_filter.sql`

Unnamed: 0,borough,zip_code,latitude,longitude,on_street_name,cross_street_name,off_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,...,number_of_motorist_injured,number_of_motorist_killed,contributing_factor_vehicle_1,contributing_factor_vehicle_2,contributing_factor_vehicle_3,collision_id,vehicle_type_code_1,vehicle_type_code_2,vehicle_type_code_3,crash_datetime
0,,,,,LINDEN BOULEVARD,SOUTH CONDUIT AVENUE,,1,0,0,...,1,0,Traffic Control Disregarded,Unspecified,,4508554,Station Wagon/Sport Utility Vehicle,Station Wagon/Sport Utility Vehicle,,2022-03-08 23:00:00
1,BROOKLYN,11221.0,40.68942,-73.92197,,,1366 BROADWAY,1,0,0,...,1,0,Steering Failure,Unspecified,Unspecified,4601423,Sedan,Station Wagon/Sport Utility Vehicle,Station Wagon/Sport Utility Vehicle,2023-01-22 16:16:00
2,,,40.751526,-73.859184,108 STREET,,,1,0,0,...,0,0,Traffic Control Disregarded,Unspecified,,4441397,E-Bike,Station Wagon/Sport Utility Vehicle,,2021-07-24 21:20:00
3,QUEENS,11377.0,40.756298,-73.89893,32 AVENUE,68 STREET,,1,0,1,...,0,0,Driver Inattention/Distraction,,,4514997,Station Wagon/Sport Utility Vehicle,,,2022-03-31 09:00:00
4,BROOKLYN,11236.0,40.655132,-73.899506,,,10624 AVENUE D,1,0,0,...,1,0,Brakes Defective,Unspecified,Unspecified,4529541,Station Wagon/Sport Utility Vehicle,Station Wagon/Sport Utility Vehicle,Sedan,2022-05-19 15:51:00
5,QUEENS,11377.0,40.746483,-73.894936,BROOKLYN QUEENS EXPRESSWAY,ROOSEVELT AVENUE,,1,0,0,...,1,0,Driver Inattention/Distraction,Unspecified,,4445295,Sedan,PK,,2021-08-09 11:30:00
6,MANHATTAN,10019.0,40.76709,-73.97962,,,210 CENTRAL PARK SOUTH,1,0,0,...,0,0,Other Vehicular,Unspecified,,4484570,Station Wagon/Sport Utility Vehicle,Bike,,2021-12-07 20:30:00


## `02_aggregate.sql`

Unnamed: 0,borough,crash_count,total_injuries,avg_injuries_per_crash
0,,8739,4837,0.553496
1,BROOKLYN,6406,3276,0.511396
2,QUEENS,4904,2302,0.469413
3,BRONX,3224,1517,0.470533
4,MANHATTAN,3179,1283,0.403586
5,STATEN ISLAND,712,293,0.411517


## `03_time_analysis.sql`

Unnamed: 0,hour_label,crash_count
0,12 AM,1197
1,1 AM,626
2,2 AM,475
3,3 AM,456
4,4 AM,486
5,5 AM,521
6,6 AM,731
