# 🧠 Day 3 – SQL via Python: NYC School Data Exploration
In this notebook, you'll connect to a PostgreSQL database and execute SQL queries to explore NYC school data.

## 🔌 Step 1: Import Libraries

In [23]:
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt

## 🔐 Step 2: Connect to the Database

In [24]:
# DB connection setup using hardcoded credentials (for onboarding only)
conn = psycopg2.connect(
    dbname="neondb",
    user="neondb_owner",
    password="npg_CeS9fJg2azZD",
    host="ep-falling-glitter-a5m0j5gk-pooler.us-east-2.aws.neon.tech",
    port="5432",
    sslmode="require"
)
cur = conn.cursor()

## 🔍 Step 3: Run a Test Query

In [25]:
query = "SELECT * FROM nyc_schools.high_school_directory LIMIT 5;"
df = pd.read_sql(query, conn)
df.head()

  df = pd.read_sql(query, conn)


Unnamed: 0,dbn,school_name,borough,building_code,phone_number,fax_number,grade_span_min,grade_span_max,expgrade_span_min,expgrade_span_max,...,number_programs,Location 1,Community Board,Council District,Census Tract,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,27Q260,Frederick Douglass Academy VI High School,Queens,Q465,718-471-2154,718-471-2890,9.0,12,,,...,1,"{'latitude': '40.601989336', 'longitude': '-73...",14,31,100802,20529,51,3,47,59
1,21K559,Life Academy High School for Film and Music,Brooklyn,K400,718-333-7750,718-333-7775,9.0,12,,,...,1,"{'latitude': '40.593593811', 'longitude': '-73...",13,47,306,17616,21,2,45,35
2,16K393,Frederick Douglass Academy IV Secondary School,Brooklyn,K026,718-574-2820,718-574-2821,9.0,12,,,...,1,"{'latitude': '40.692133704', 'longitude': '-73...",3,36,291,18181,69,2,49,52
3,08X305,Pablo Neruda Academy,Bronx,X450,718-824-1682,718-824-1663,9.0,12,,,...,1,"{'latitude': '40.822303765', 'longitude': '-73...",9,18,16,11611,58,5,31,26
4,03M485,Fiorello H. LaGuardia High School of Music & A...,Manhattan,M485,212-496-0700,212-724-5748,9.0,12,,,...,6,"{'latitude': '40.773670507', 'longitude': '-73...",7,6,151,12420,20,4,19,12


## ✅ Task Queries Below

In [26]:
# Example: Count schools by borough
query_schools_per_borough = """
SELECT borough, COUNT(DISTINCT dbn) AS school_count
FROM nyc_schools.high_school_directory
GROUP BY borough
ORDER BY school_count DESC;
"""
df_schools_per_borough = pd.read_sql(query_schools_per_borough, conn)

print("Schools per Borough (Unique DBNs)")
display(df_schools_per_borough)


Schools per Borough (Unique DBNs)


  df_schools_per_borough = pd.read_sql(query_schools_per_borough, conn)


Unnamed: 0,borough,school_count
0,Brooklyn,121
1,Bronx,118
2,Manhattan,106
3,Queens,80
4,Staten Island,10


In [19]:
# How many schools are there in each borough?

query_schools_per_borough = """
SELECT borough, COUNT(*) AS school_count
FROM nyc_schools.high_school_directory
GROUP BY borough
ORDER BY school_count DESC;
"""
df_schools_per_borough = pd.read_sql(query_schools_per_borough, conn)
print("Schools per Borough")
display(df_schools_per_borough)

Schools per Borough


  df_schools_per_borough = pd.read_sql(query_schools_per_borough, conn)


Unnamed: 0,borough,school_count
0,Brooklyn,121
1,Bronx,118
2,Manhattan,106
3,Queens,80
4,Staten Island,10


In [20]:
# What is the average % of English Language Learners (ELL) per borough?
query_avg_ell_per_borough = """
SELECT d.borough, ROUND(AVG(s.ell_percent::numeric), 2) AS avg_ell_percent
FROM nyc_schools.school_demographics s
JOIN nyc_schools.high_school_directory d
    ON s.dbn = d.dbn
GROUP BY d.borough
ORDER BY avg_ell_percent DESC;
"""
df_avg_ell_per_borough = pd.read_sql(query_avg_ell_per_borough, conn)
df_avg_ell_per_borough

  df_avg_ell_per_borough = pd.read_sql(query_avg_ell_per_borough, conn)


Unnamed: 0,borough,avg_ell_percent
0,Manhattan,7.57


In [28]:
query_top_sped_schools = """
WITH latest_year_per_borough AS (
    SELECT d.borough, MAX(s.schoolyear) AS latest_year
    FROM nyc_schools.school_demographics s
    JOIN nyc_schools.high_school_directory d
        ON d.dbn = s.dbn
    GROUP BY d.borough
),
ranked_sped AS (
    SELECT 
        d.borough, 
        d.school_name, 
        s.sped_percent,
        ROW_NUMBER() OVER (
            PARTITION BY d.borough
            ORDER BY s.sped_percent::numeric DESC
        ) AS rn
    FROM nyc_schools.school_demographics s
    JOIN nyc_schools.high_school_directory d
        ON d.dbn = s.dbn
    JOIN latest_year_per_borough l
        ON d.borough = l.borough
       AND s.schoolyear = l.latest_year
)
SELECT borough, school_name, sped_percent
FROM ranked_sped
WHERE rn <= 3
ORDER BY borough, rn;
"""

df_top_sped = pd.read_sql(query_top_sped_schools, conn)

print("Top 3 Schools per Borough (Latest Year) - Special Education %")
display(df_top_sped)

Top 3 Schools per Borough (Latest Year) - Special Education %


  df_top_sped = pd.read_sql(query_top_sped_schools, conn)


Unnamed: 0,borough,school_name,sped_percent
0,Manhattan,East Side Community School,26.4
1,Manhattan,Marta Valle High School,25.9
2,Manhattan,Henry Street School for International Studies,24.9


## 🧠 Insights

Write your observations, findings, and answers to the task questions here.