## Set up a connection to the training database using Python

In [52]:
# Import necessary libraries
import pandas as pd
from sqlalchemy import create_engine

In [53]:
# SQLAlchemy connection string format:

DATABASE_URL = (
    "postgresql+psycopg2://neondb_owner:npg_CeS9fJg2azZD"
    "@ep-falling-glitter-a5m0j5gk-pooler.us-east-2.aws.neon.tech:5432/neondb"
    "?sslmode=require"
)

# Create engine and establish connection
engine = create_engine(DATABASE_URL)

## How many schools are there in each borough?

In [54]:
# SQL query
query = "SELECT borough,COUNT (DISTINCT dbn) FROM nyc_schools.high_school_directory GROUP BY borough;"

In [55]:
df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,borough,count
0,Bronx,118
1,Brooklyn,121
2,Manhattan,106
3,Queens,80
4,Staten Island,10


## What is the average % of English Language Learners (ELL) per borough?

In [56]:
# SQL query
query = """SELECT dir.borough,AVG(dem.ell_percent) AS avg_ell_percent 
FROM nyc_schools.school_demographics AS dem 
JOIN nyc_schools.high_school_directory AS dir 
ON dem.dbn = dir.dbn
GROUP BY dir.borough;"""

In [57]:
df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,borough,avg_ell_percent
0,Manhattan,7.5725


## Top 3 schools in each borough with the highest percentage of special education students (sped_percent)

In [58]:
# SQL query

query = """WITH ranked AS (
    SELECT h.borough,
           h.school_name,
           d.sped_percent,

        -- Assign a row number (rank) within each borough
        -- Ordered by sped_percent in descending order (highest first)

           ROW_NUMBER() OVER (PARTITION BY h.borough ORDER BY d.sped_percent DESC) AS rn

    FROM nyc_schools.school_demographics d
    JOIN nyc_schools.high_school_directory h
         ON d.dbn = h.dbn
) 
--  Select only the top 3 schools per borough

SELECT borough,
       school_name,
       sped_percent
FROM ranked 
WHERE rn <= 3
ORDER BY borough, rn;"""

In [59]:
df = pd.read_sql(query, engine)
df.head()

Unnamed: 0,borough,school_name,sped_percent
0,Manhattan,East Side Community School,28.8
1,Manhattan,East Side Community School,27.7
2,Manhattan,East Side Community School,26.7


## Summary

- Successfully connected to the SQL database via Python using `pandas` and `SQLAlchemy`.  
- Created a Jupyter Notebook in VS Code and loaded the dataset for analysis.  
- Explored the data and wrote queries to answer questions; however, **questions 2, 3, and the assignment could not be fully answered**, because the dataset only contains information for the **Manhattan** area.  
- Basic analyses, such as average percentages and top schools by special education, were completed for the available data.