In [None]:
# Import functions from project1.py
from project1 import read_air_quality, read_uhf

# Load data
uhf_dict, date_dict = read_air_quality("air_quality.csv")
zip_to_uhf, borough_to_uhf = read_uhf("uhf.csv")

print("Data successfully loaded!")
print(f"Number of UHF regions: {len(uhf_dict)}")
print(f"Number of unique dates: {len(date_dict)}")

In [None]:
def get_measurements_for_zip(zip_code):
    """Return all air quality measurements for a given ZIP."""
    uhf_ids = zip_to_uhf.get(zip_code, [])
    results = []
    for uid in uhf_ids:
        if uid in uhf_dict:
            results.extend(uhf_dict[uid])
    return results


def get_measurements_for_borough(borough):
    """Return all air quality measurements for a given borough."""
    uhf_ids = borough_to_uhf.get(borough, [])
    results = []
    for uid in uhf_ids:
        if uid in uhf_dict:
            results.extend(uhf_dict[uid])
    return results


def get_measurements_for_year(year):
    """Return all measurements for a specific year."""
    results = []
    for date, entries in date_dict.items():
        if str(year) in date:
            results.extend(entries)
    return results


def avg_pm(measurements):
    """Return the average PM2.5 for a list of measurements."""
    if not measurements:
        return None
    total = 0
    count = 0
    for (_, _, _, pm) in measurements:
        try:
            total += float(pm)
            count += 1
        except ValueError:
            pass
    return total / count if count > 0 else None

In [None]:
#hello

In [None]:
zip_code = "10463"  # example ZIP

data = get_measurements_for_zip(zip_code)
if data:
    max_entry = max(data, key=lambda x: float(x[3]))
    min_entry = min(data, key=lambda x: float(x[3]))
    
    print(f"ZIP {zip_code} → Highest PM2.5: {max_entry[3]} mcg/m³ on {max_entry[2]} ({max_entry[1]})")
    print(f"ZIP {zip_code} → Lowest PM2.5: {min_entry[3]} mcg/m³ on {min_entry[2]} ({min_entry[1]})")
else:
    print(f"No data found for ZIP {zip_code}.")

In [None]:
year = 2019
year_data = get_measurements_for_year(year)

uhf_averages = {}
for geo_id, entries in uhf_dict.items():
    relevant = [e for e in entries if str(year) in e[2]]
    avg = avg_pm(relevant)
    if avg is not None:
        uhf_averages[geo_id] = avg

if uhf_averages:
    worst_uhf = max(uhf_averages, key=uhf_averages.get)
    print(f"Worst UHF ID in {year}: {worst_uhf} with average PM2.5 = {uhf_averages[worst_uhf]:.2f} mcg/m³")
else:
    print(f"No UHF data found for {year}.")

In [None]:
for year in [2008, 2019]:
    print(f"\nAverage PM2.5 by borough in {year}:")
    for borough in borough_to_uhf.keys():
        measurements = []
        for m in get_measurements_for_borough(borough):
            if str(year) in m[2]:
                measurements.append(m)
        avg = avg_pm(measurements)
        if avg:
            print(f"  {borough}: {avg:.2f} mcg/m³")
        else:
            print(f"  {borough}: No data")

In [None]:
borough_avgs = {}
for borough in borough_to_uhf.keys():
    measurements = get_measurements_for_borough(borough)
    avg = avg_pm(measurements)
    if avg:
        borough_avgs[borough] = avg

if borough_avgs:
    cleanest = min(borough_avgs, key=borough_avgs.get)
    print(f"Cleanest borough overall: {cleanest} ({borough_avgs[cleanest]:.2f} mcg/m³)")
else:
    print("No borough data found.")

In [None]:
date_avgs = {}
for date, entries in date_dict.items():
    avg = avg_pm(entries)
    if avg:
        date_avgs[date] = avg

if date_avgs:
    worst_date = max(date_avgs, key=date_avgs.get)
    print(f"Most polluted day: {worst_date} with citywide average {date_avgs[worst_date]:.2f} mcg/m³")
else:
    print("No date averages found.")