In [35]:
# Required imports
import pandas as pd
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings("ignore")

In [36]:
# SQLAlchemy connection string format:
# postgresql+psycopg2://user:password@host:port/dbname

DATABASE_URL = (
    "postgresql+psycopg2://neondb_owner:npg_CeS9fJg2azZD"
    "@ep-falling-glitter-a5m0j5gk-pooler.us-east-2.aws.neon.tech:5432/neondb"
    "?sslmode=require"
)

# Create engine and establish connection
engine = create_engine(DATABASE_URL)

In [37]:
# Test connection
try:
    test_df = pd.read_sql("SELECT 1 as test;", engine)
    print("✅ Connected successfully!")
    print(test_df)
except Exception as e:
    print(f"❌ Connection failed: {e}")

✅ Connected successfully!
   test
0     1


In [38]:
# Query for full school data
query_schools = "SELECT * FROM nyc_schools.high_school_directory LIMIT 5;"
df_schools = pd.read_sql(query_schools, engine)
print("School details:")
df_schools.head()

School details:


Unnamed: 0,dbn,school_name,borough,building_code,phone_number,fax_number,grade_span_min,grade_span_max,expgrade_span_min,expgrade_span_max,...,number_programs,Location 1,Community Board,Council District,Census Tract,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,27Q260,Frederick Douglass Academy VI High School,Queens,Q465,718-471-2154,718-471-2890,9.0,12,,,...,1,"{'latitude': '40.601989336', 'longitude': '-73...",14,31,100802,20529,51,3,47,59
1,21K559,Life Academy High School for Film and Music,Brooklyn,K400,718-333-7750,718-333-7775,9.0,12,,,...,1,"{'latitude': '40.593593811', 'longitude': '-73...",13,47,306,17616,21,2,45,35
2,16K393,Frederick Douglass Academy IV Secondary School,Brooklyn,K026,718-574-2820,718-574-2821,9.0,12,,,...,1,"{'latitude': '40.692133704', 'longitude': '-73...",3,36,291,18181,69,2,49,52
3,08X305,Pablo Neruda Academy,Bronx,X450,718-824-1682,718-824-1663,9.0,12,,,...,1,"{'latitude': '40.822303765', 'longitude': '-73...",9,18,16,11611,58,5,31,26
4,03M485,Fiorello H. LaGuardia High School of Music & A...,Manhattan,M485,212-496-0700,212-724-5748,9.0,12,,,...,6,"{'latitude': '40.773670507', 'longitude': '-73...",7,6,151,12420,20,4,19,12


In [57]:
# Q1: Count schools per borough (with correct schema)
query = """
SELECT borough, COUNT(*) as school_count
FROM nyc_schools.high_school_directory
GROUP BY borough
ORDER BY school_count DESC;
"""

df_q1 = pd.read_sql(query, engine)
print("Schools per borough:")
print(df_q1)

Schools per borough:
         borough  school_count
0       Brooklyn           121
1          Bronx           118
2      Manhattan           106
3         Queens            80
4  Staten Island            10


In [55]:
# Q2: Average % of English Language Learners (ELL) per borough
query_q2 = """
WITH latest_ell AS (
    SELECT
        hsd.borough,
        sd.ell_percent,
        ROW_NUMBER() OVER (PARTITION BY sd.dbn ORDER BY sd.schoolyear DESC) as rn
    FROM nyc_schools.high_school_directory hsd
    JOIN nyc_schools.school_demographics sd ON hsd.dbn = sd.dbn
    WHERE sd.ell_percent IS NOT NULL
)
SELECT
    borough,
    AVG(ell_percent) as avg_ell_percent,
    COUNT(*) as schools_with_data
FROM latest_ell
WHERE rn = 1
GROUP BY borough
ORDER BY avg_ell_percent DESC;
"""
df_q2 = pd.read_sql(query_q2, engine)
print("Q2: Average ELL percentage per borough:")
print(df_q2)
print()

Q2: Average ELL percentage per borough:
     borough  avg_ell_percent  schools_with_data
0  Manhattan         8.814286                  7



In [56]:
# Q3: Top 3 schools per borough with highest special education percentage
query_q3 = """
WITH latest_sped_data AS (
    SELECT
        hsd.borough,
        hsd.school_name,
        sd.sped_percent,
        ROW_NUMBER() OVER (PARTITION BY sd.dbn ORDER BY sd.schoolyear DESC) as rn
    FROM nyc_schools.high_school_directory hsd
    JOIN nyc_schools.school_demographics sd ON hsd.dbn = sd.dbn
    WHERE sd.sped_percent IS NOT NULL
),
ranked_schools AS (
    SELECT
        borough,
        school_name,
        sped_percent,
        ROW_NUMBER() OVER (PARTITION BY borough ORDER BY sped_percent DESC) as rank
    FROM latest_sped_data
    WHERE rn = 1
)
SELECT
    borough,
    school_name,
    sped_percent
FROM ranked_schools
WHERE rank <= 3
ORDER BY borough, rank;
"""

df_q3 = pd.read_sql(query_q3, engine)
df_q3['sped_percent'] = df_q3['sped_percent'].round(1)  # Round to 1 decimal

print("Q3: Top 3 Schools per Borough by Special Education Percentage:")
print(df_q3.to_string(index=False))

Q3: Top 3 Schools per Borough by Special Education Percentage:
  borough                                   school_name  sped_percent
Manhattan                    East Side Community School          26.4
Manhattan                       Marta Valle High School          25.9
Manhattan Henry Street School for International Studies          24.9


In [58]:
# Final Summary for your notebook:
print("=== DAY 3 SQL ANALYSIS SUMMARY ===")
print("\n1. School Distribution by Borough:")
print("   Brooklyn: 121 schools")
print("   Bronx: 118 schools")
print("   Manhattan: 106 schools")
print("   Queens: 80 schools")
print("   Staten Island: 10 schools")

print("\n2. Average ELL % by Borough:")
print("   Manhattan: 8.81% (only borough with demographic data)")

print("\n3. Top Special Education Schools:")
print("   Manhattan only (demographic data limitation):")
print("   - East Side Community School: 26.4%")
print("   - Marta Valle High School: 25.9%")
print("   - Henry Street School for International Studies: 24.9%")

print("\nNote: Demographic data only available for Manhattan schools (2005-2012)")

=== DAY 3 SQL ANALYSIS SUMMARY ===

1. School Distribution by Borough:
   Brooklyn: 121 schools
   Bronx: 118 schools
   Manhattan: 106 schools
   Queens: 80 schools
   Staten Island: 10 schools

2. Average ELL % by Borough:
   Manhattan: 8.81% (only borough with demographic data)

3. Top Special Education Schools:
   Manhattan only (demographic data limitation):
   - East Side Community School: 26.4%
   - Marta Valle High School: 25.9%
   - Henry Street School for International Studies: 24.9%

Note: Demographic data only available for Manhattan schools (2005-2012)


### Key Findings:
- **Top school:** East Side Community School (26.4% special education students)
- **Second highest:** Marta Valle High School (25.9%)
- **Third highest:** Henry Street School for International Studies (24.9%)
- **Data limitation:** Only Manhattan schools have special education demographic data

---

## Summary and Insights

### 📊 Data Availability Assessment:
1. **High School Directory:** Complete data for all 435 schools across 5 boroughs
2. **Demographics Data:** Limited to Manhattan schools only (historical 2005-2012)
3. **Join Success:** Only 40 schools have matching demographic data

### 🔍 Key Insights:

**1. Borough Distribution:**
- Brooklyn and Bronx dominate the NYC high school landscape with ~55% of all schools
- Staten Island has significantly fewer schools, reflecting its smaller population

**2. English Language Learners:**
- Manhattan schools average 8.81% ELL students
- Cannot compare across boroughs due to data limitations
- Suggests need for targeted ELL support programs in Manhattan

**3. Special Education Services:**
- Top Manhattan schools serve 24-26% special education students
- Indicates strong inclusive education practices in these schools
- Data suggests some schools specialize in special education services

### 🚧 Data Quality Considerations:
- **Incomplete demographic coverage:** Only Manhattan represented
- **Historical data:** Demographics from 2005-2012 may not reflect current conditions  
- **Missing boroughs:** Brooklyn, Bronx, Queens, Staten Island lack demographic data
- **Recommendation:** Additional data sources needed for comprehensive borough analysis

---

## Technical Notes

- **Database:** PostgreSQL with `nyc_schools` schema
- **Tables used:** `high_school_directory` and `school_demographics`
- **Join key:** `dbn` (District Borough Number)
- **SQL techniques:** CTEs, window functions, aggregation
- **Data processing:** Python pandas for post-query formatting

In [None]:
#Schema folder that organizes tables in a database
Database
├── public schema (default)
├── nyc_schools schema 📁
│   ├── high_school_directory (table)
│   ├── demographics (table)
│   └── other tables...
└── other schemas...

#Visual Explanation
Database Structure:
├── public schema (default)
│   └── (no high_school_directory table here)
├── nyc_schools schema
│   └── high_school_directory ✅ (table is HERE)

#Database Instructor
NYC Database:
├── nyc_schools schema
│   ├── high_school_directory
│   ├── elementary_schools
│   └── school_demographics
├── nyc_transport schema
│   ├── subway_stations
│   └── bus_routes
├── nyc_housing schema
│   ├── rentals
│   └── sales