# 02 · EDA & Trends — Toronto 311\nLoads CSV exports to explore top complaint types, YTD vs last year, and quick visuals.

In [None]:
import os, pandas as pd, numpy as np
import matplotlib.pyplot as plt
from datetime import date

EXPORT_DIR = "export"
FIG_DIR = "figures"
os.makedirs(FIG_DIR, exist_ok=True)

df_counts = pd.read_csv(os.path.join(EXPORT_DIR, "311_counts_year_type_division_ward_2021_2025.csv"))
top_by_year = pd.read_csv(os.path.join(EXPORT_DIR, "311_top_types_by_year_2021_2025.csv"))
daily = pd.read_csv(os.path.join(EXPORT_DIR, "311_daily_totals_2021_2025.csv"), parse_dates=["day"])

# Helper: current year & YTD cutoff
today = pd.Timestamp(date.today())
this_year = today.year
cutoff = today.normalize()

# Cumulative YTD by year
daily['year'] = daily['day'].dt.year
daily['yday'] = daily['day'].dt.dayofyear
ytd = (daily[daily['day'] <= cutoff]
       .groupby(['year'], as_index=False)['n'].sum()
       .sort_values('year'))
ytd


In [None]:
# Plot: Top 10 complaint types overall (2021–2025 YTD)
overall_top = (df_counts.groupby('type', as_index=False)['n'].sum()
                         .sort_values('n', ascending=False)
                         .head(10))
plt.figure()
plt.barh(overall_top['type'], overall_top['n'])
plt.title("Top 10 Toronto 311 Request Types (2021–2025 YTD)")
plt.xlabel("Count"); plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig(os.path.join(FIG_DIR, "top10_types_overall.png"), dpi=160)
plt.show()


In [None]:
# Plot: Cumulative YTD vs Last Year
last_year = this_year - 1
df_cur = daily[daily['year'] == this_year].copy()
df_prev = daily[daily['year'] == last_year].copy()

df_cur = df_cur.sort_values('day').assign(cum=lambda d: d['n'].cumsum())
df_prev = df_prev.sort_values('day').assign(cum=lambda d: d['n'].cumsum())

plt.figure()
if not df_prev.empty:
    plt.plot(df_prev['day'], df_prev['cum'], label=str(last_year))
if not df_cur.empty:
    plt.plot(df_cur['day'], df_cur['cum'], label=str(this_year))
plt.title("Cumulative 311 Requests — YTD vs Last Year")
plt.xlabel("Date"); plt.ylabel("Cumulative Requests")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(FIG_DIR, "cumulative_ytd_vs_last_year.png"), dpi=160)
plt.show()


In [None]:
# Heatmap-style table: YoY % change per type (2023–2025 YTD focus)
recent_years = [this_year-2, this_year-1, this_year]
by_type_year = (df_counts.groupby(['year','type'], as_index=False)['n'].sum()
                           .query('year in @recent_years'))
pivot = by_type_year.pivot(index='type', columns='year', values='n').fillna(0).astype(int)
for y in recent_years[1:]:
    prev = y-1
    pivot[f"YoY%_{prev}->{y}"] = np.where(pivot[prev]==0, np.nan, (pivot[y]-pivot[prev])*100.0/pivot[prev])
pivot.to_csv(os.path.join(EXPORT_DIR, "types_yoy_change_recent.csv"))
pivot.head(15)
