### **Import Libraries**

In [5]:
# Import necessary libraries
import pandas as pd
from sqlalchemy import create_engine

### **Set Up Connection to the Database**

In [6]:
# DB connection setup
db_url = (
    "postgresql+psycopg2://neondb_owner:npg_CeS9fJg2azZD"
    "@ep-falling-glitter-a5m0j5gk-pooler.us-east-2.aws.neon.tech:5432/neondb"
    "?sslmode=require"
)

# Create engine and establish connection
conn = create_engine(db_url)

In [7]:
query = "SELECT * FROM nyc_schools.high_school_directory LIMIT 3;"
df = pd.read_sql(query, conn)
df.head()

Unnamed: 0,dbn,school_name,borough,building_code,phone_number,fax_number,grade_span_min,grade_span_max,expgrade_span_min,expgrade_span_max,...,number_programs,Location 1,Community Board,Council District,Census Tract,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,27Q260,Frederick Douglass Academy VI High School,Queens,Q465,718-471-2154,718-471-2890,9.0,12,,,...,1,"{'latitude': '40.601989336', 'longitude': '-73...",14,31,100802,20529,51,3,47,59
1,21K559,Life Academy High School for Film and Music,Brooklyn,K400,718-333-7750,718-333-7775,9.0,12,,,...,1,"{'latitude': '40.593593811', 'longitude': '-73...",13,47,306,17616,21,2,45,35
2,16K393,Frederick Douglass Academy IV Secondary School,Brooklyn,K026,718-574-2820,718-574-2821,9.0,12,,,...,1,"{'latitude': '40.692133704', 'longitude': '-73...",3,36,291,18181,69,2,49,52


In [8]:
query_sd = "SELECT * FROM nyc_schools.school_demographics LIMIT 3;"
df = pd.read_sql(query_sd, conn)
df.head()

Unnamed: 0,dbn,Name,schoolyear,fl_percent,frl_percent,total_enrollment,prek,k,grade1,grade2,...,black_num,black_per,hispanic_num,hispanic_per,white_num,white_per,male_num,male_per,female_num,female_per
0,01M015,P.S. 015 ROBERTO CLEMENTE,20052006,89.4,,281,15,36,40,33,...,74,26.3,189,67.3,5,1.8,158,56.2,123,43.8
1,01M015,P.S. 015 ROBERTO CLEMENTE,20062007,89.4,,243,15,29,39,38,...,68,28.0,153,63.0,4,1.6,140,57.6,103,42.4
2,01M015,P.S. 015 ROBERTO CLEMENTE,20072008,89.4,,261,18,43,39,36,...,77,29.5,157,60.2,7,2.7,143,54.8,118,45.2


### **School Distribution**

#### How many Schools are in each borough?
##### Explanation: This query counts the number of schools in each borough and orders the results by the count in descending order.

In [9]:
query1 = """
SELECT borough, COUNT(DISTINCT school_name) AS unique_school_count
FROM nyc_schools.high_school_directory
GROUP BY borough
ORDER BY unique_school_count DESC;
"""
df = pd.read_sql(query1, conn)
df.head()

Unnamed: 0,borough,unique_school_count
0,Brooklyn,121
1,Bronx,118
2,Manhattan,106
3,Queens,80
4,Staten Island,10


### **Language Learners**

#### What is the average % of English Language Learners (ELL) per borough?
##### Explanation: This query calculates the average percentage of English Language Learners (ELL) for each borough and orders the results by the average in descending order.

In [10]:
query2 = """
SELECT hsd.borough, AVG(sd.ell_percent) AS avg_ell_percent
FROM nyc_schools.high_school_directory hsd, nyc_schools.school_demographics sd
WHERE hsd.dbn = sd.dbn
GROUP BY hsd.borough
ORDER BY avg_ell_percent DESC;
"""
df = pd.read_sql(query2, conn)
df.head()

Unnamed: 0,borough,avg_ell_percent
0,Manhattan,7.5725


### **School Supporting Special Needs**

##### Explanation: This query finds the top 3 schools in each borough with the highest percentage of special education students (sped_percent). It uses a Common Table Expression (CTE) to first aggregate the maximum sped_percent for each school and then ranks them within each borough.


In [11]:
query3 = """
WITH ranked_schools AS (
    SELECT 
        hsd.borough,
        hsd.school_name,
        sd.sped_percent,
        ROW_NUMBER() OVER (PARTITION BY hsd.borough ORDER BY sd.sped_percent DESC) AS rank
    FROM 
        nyc_schools.high_school_directory hsd
    JOIN 
        nyc_schools.school_demographics sd
    ON 
        hsd.dbn = sd.dbn
)
SELECT 
    borough,
    school_name,
    sped_percent
FROM 
    ranked_schools
WHERE 
    rank <= 3
ORDER BY 
    borough, sped_percent DESC;
"""

df = pd.read_sql(query3, conn)
df.head()

Unnamed: 0,borough,school_name,sped_percent
0,Manhattan,East Side Community School,28.8
1,Manhattan,East Side Community School,27.7
2,Manhattan,East Side Community School,26.7


### **Insights from the Analysis**

1. **School Distribution**:
   - Boroughs with the most unique schools reflect higher population density or better resource allocation. Boroughs with fewer unique schools may indicate areas requiring additional educational infrastructure.

2. **ELL Distribution**:
   - Boroughs with higher percentages of English Language Learners (ELL) highlight areas needing targeted language support programs to improve inclusivity and learning outcomes.

3. **Special Education**:
   - Top schools with the highest special education percentages indicate where specialized resources and funding are most needed to support students with special needs.

4. **Key Trends**:
   - Disparities in unique school distribution, ELL percentages, and special education needs emphasize the importance of borough-specific strategies to address educational inequities.