In [1]:
import pandas as pd
import psycopg2

In [2]:
# DB connection setup using hardcoded credentials (for onboarding only)
# Using a context manager (`with` statement) to ensure the connection is closed properly
try:
    with psycopg2.connect(
        dbname="neondb",
        user="neondb_owner",
        password="npg_CeS9fJg2azZD",
        host="ep-falling-glitter-a5m0j5gk-pooler.us-east-2.aws.neon.tech",
        port="5432",
        sslmode="require"
    ) as conn:
        cur = conn.cursor()

except psycopg2.Error as e:
    print(f"Database error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

You’ll be querying these tables:

high_school_directory – School names, locations, types, programs

school_demographics – Enrollment data, ELL, FRPL, disabilities, etc.

school_safety_report – Reported incidents by type and location.

In [3]:
query = "SELECT * FROM nyc_schools.high_school_directory LIMIT 5;"
df = pd.read_sql(query, conn)
df.head()

  df = pd.read_sql(query, conn)


Unnamed: 0,dbn,school_name,borough,building_code,phone_number,fax_number,grade_span_min,grade_span_max,expgrade_span_min,expgrade_span_max,...,number_programs,Location 1,Community Board,Council District,Census Tract,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,27Q260,Frederick Douglass Academy VI High School,Queens,Q465,718-471-2154,718-471-2890,9.0,12,,,...,1,"{'latitude': '40.601989336', 'longitude': '-73...",14,31,100802,20529,51,3,47,59
1,21K559,Life Academy High School for Film and Music,Brooklyn,K400,718-333-7750,718-333-7775,9.0,12,,,...,1,"{'latitude': '40.593593811', 'longitude': '-73...",13,47,306,17616,21,2,45,35
2,16K393,Frederick Douglass Academy IV Secondary School,Brooklyn,K026,718-574-2820,718-574-2821,9.0,12,,,...,1,"{'latitude': '40.692133704', 'longitude': '-73...",3,36,291,18181,69,2,49,52
3,08X305,Pablo Neruda Academy,Bronx,X450,718-824-1682,718-824-1663,9.0,12,,,...,1,"{'latitude': '40.822303765', 'longitude': '-73...",9,18,16,11611,58,5,31,26
4,03M485,Fiorello H. LaGuardia High School of Music & A...,Manhattan,M485,212-496-0700,212-724-5748,9.0,12,,,...,6,"{'latitude': '40.773670507', 'longitude': '-73...",7,6,151,12420,20,4,19,12


🧮 School Distribution

How many schools are there in each borough?

In [4]:
# Count schools by borough
query1 = """
SELECT borough, COUNT(*) AS school_count
FROM nyc_schools.high_school_directory
GROUP BY borough
ORDER BY school_count DESC;
"""
df_result = pd.read_sql(query1, conn)
df_result

  df_result = pd.read_sql(query1, conn)


Unnamed: 0,borough,school_count
0,Brooklyn,121
1,Bronx,118
2,Manhattan,106
3,Queens,80
4,Staten Island,10


In [5]:
query = "SELECT * FROM nyc_schools.school_demographics LIMIT 5;"
df = pd.read_sql(query, conn)
df.info()

  df = pd.read_sql(query, conn)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 38 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   dbn                5 non-null      object 
 1   Name               5 non-null      object 
 2   schoolyear         5 non-null      int64  
 3   fl_percent         5 non-null      object 
 4   frl_percent        1 non-null      float64
 5   total_enrollment   5 non-null      int64  
 6   prek               5 non-null      object 
 7   k                  5 non-null      object 
 8   grade1             5 non-null      object 
 9   grade2             5 non-null      object 
 10  grade3             5 non-null      object 
 11  grade4             5 non-null      object 
 12  grade5             5 non-null      object 
 13  grade6             5 non-null      object 
 14  grade7             5 non-null      object 
 15  grade8             5 non-null      object 
 16  grade9             5 non-null 

🎓 Language Learners

What is the average % of English Language Learners (ELL) per borough?

In [6]:
query2 = """
SELECT di.borough, AVG(de.ell_percent) AS avg_pct_ell
FROM nyc_schools.high_school_directory di
JOIN nyc_schools.school_demographics de
ON di.dbn = de.dbn
GROUP BY di.borough;
"""
df_result = pd.read_sql(query2, conn)
df_result

  df_result = pd.read_sql(query2, conn)


Unnamed: 0,borough,avg_pct_ell
0,Manhattan,7.5725


🔗School supporting special needs

Using the data from the school demographics and high school directory, write a query to find the top 3 schools in each borough with the highest percentage of special education students (sped_percent)


In [7]:
query3 = """
WITH sped_data AS (
    SELECT
        di.borough,
        di.school_name,
        de.sped_percent,
        ROW_NUMBER() OVER (PARTITION BY de.dbn ORDER BY de.schoolyear DESC) as rn
    FROM nyc_schools.high_school_directory di
    JOIN nyc_schools.school_demographics de ON di.dbn = de.dbn
    WHERE de.sped_percent IS NOT NULL -- Filter out rows with NULL sped_percent
),
ranked_schools AS (
    SELECT
        borough,
        school_name,
        sped_percent,
        -- Use DENSE_RANK to handle ties correctly within the top 3
        ROW_NUMBER() OVER (PARTITION BY borough ORDER BY sped_percent DESC) as rank
    FROM sped_data
    WHERE rn = 1 -- Only consider the most recent year for each school
)
SELECT
    borough,
    school_name,
    sped_percent
FROM ranked_schools
WHERE rank <= 3 -- Select the top 3 schools in each borough
ORDER BY borough, rank;
"""
df_result = pd.read_sql(query3, conn)
df_result.index = df_result.index + 1
df_result

  df_result = pd.read_sql(query3, conn)


Unnamed: 0,borough,school_name,sped_percent
1,Manhattan,East Side Community School,26.4
2,Manhattan,Marta Valle High School,25.9
3,Manhattan,Henry Street School for International Studies,24.9


Summary of insights:

1. Need to re-establish the database connection again and again.
2. Brooklyn has highest number of schools.
3. English language learners and schools with highest % of special education students are in Manhatten borough.