In [4]:
# --- installs ---
import sys, subprocess, importlib
def _ensure(pkg, import_name=None):
    try:
        importlib.import_module(import_name or pkg.split("==")[0])
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
for p in ["pandas>=2.0","numpy>=1.25","matplotlib>=3.7","requests>=2.31","ruptures>=1.1.9","jinja2>=3.1"]:
    _ensure(p)

# --- imports ---
from pathlib import Path
from datetime import date
import pandas as pd
import numpy as np
import requests, time
import ruptures as rpt
import matplotlib
matplotlib.use("Agg")  # avoid GUI backend issues in some kernels
import matplotlib.pyplot as plt
from jinja2 import Template

# --- config/paths ---
REPORT_DATE = date.today().isoformat()
OWID_URL = "https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv"
COUNTRY = "Canada"
RAW = Path("data/raw"); PROC = Path("data/processed"); CHARTS = Path("outputs/charts"); REPORTS = Path("outputs/reports")
for d in (RAW, PROC, CHARTS, REPORTS): d.mkdir(parents=True, exist_ok=True)

# --- fetch (with retry + offline guidance) ---
raw_csv = RAW / "owid_co2_data.csv"
if not raw_csv.exists():
    last_err = None
    for i in range(3):
        try:
            r = requests.get(OWID_URL, timeout=60)
            r.raise_for_status()
            raw_csv.write_bytes(r.content)
            break
        except Exception as e:
            last_err = e
            time.sleep(1.5)
    if not raw_csv.exists():
        raise SystemExit(
            f"Couldn't download OWID CSV ({last_err}). "
            f"If your environment is offline, manually upload the file to: {raw_csv}"
        )
df = pd.read_csv(raw_csv)

# --- select Canada + essential cols ---
cols = ["country","year","co2","co2_per_capita","gdp","population","co2_growth_prct","primary_energy_consumption","energy_per_capita"]
keep = [c for c in cols if c in df.columns]
can = (
    df.loc[df["country"] == COUNTRY, keep]
      .dropna(subset=["year"])
      .sort_values("year")
      .reset_index(drop=True)
)
proc_csv = PROC / f"canada_co2_{REPORT_DATE}.csv"
can.to_csv(proc_csv, index=False)

# --- features ---
out = can.copy()
if "co2" in out.columns:
    out["co2_yoy_pct"] = out["co2"].pct_change() * 100
if "co2_per_capita" in out.columns:
    out["co2_pc_rolling"] = out["co2_per_capita"].rolling(5, min_periods=2).mean()
if {"gdp","co2"}.issubset(out.columns):
    denom = (out["gdp"] / 1e6).replace(0, np.nan)
    out["co2_intensity_t_per_musd"] = (out["co2"] * 1e6 / denom)
    out["co2_intensity_rolling"] = out["co2_intensity_t_per_musd"].rolling(5, min_periods=2).mean()

# --- changepoints on per-capita (mapped back across NaNs) ---
def detect_cpd(series, max_k=5, min_years=15):
    mask = series.notna().to_numpy()
    s = series.dropna().to_numpy()
    if s.size < min_years:
        return []
    algo = rpt.Pelt(model="rbf").fit(s)
    bkps = [b for b in algo.predict(pen=np.log(len(s)) * 3) if b < len(s)]
    if len(bkps) > max_k:
        bkps = bkps[-max_k:]
    valid_idx = np.flatnonzero(mask)
    return [int(valid_idx[b]) for b in bkps if b < len(valid_idx)]

out["changepoint"] = False
if "co2_per_capita" in out.columns:
    for i in detect_cpd(out["co2_per_capita"]):
        if 0 <= i < len(out):
            out.loc[i, "changepoint"] = True

anal_csv = PROC / f"canada_co2_analyzed_{REPORT_DATE}.csv"
out.to_csv(anal_csv, index=False)

# --- charts (each in its own figure, no explicit colors) ---
def line(df, x, y, title, path, mark=True):
    plt.figure()
    plt.plot(df[x], df[y])
    if mark and "changepoint" in df.columns:
        for yr in df.loc[df["changepoint"] == True, x].tolist():
            plt.axvline(yr, linestyle="--", linewidth=1)
    plt.title(title)
    plt.xlabel(x); plt.ylabel(y)
    plt.tight_layout()
    path.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(path, dpi=160)
    plt.close()

if "co2" in out.columns:
    line(out, "year", "co2", f"{COUNTRY}: CO₂ (Mt)", CHARTS / "co2_total.png")
if "co2_per_capita" in out.columns:
    line(out, "year", "co2_per_capita", f"{COUNTRY}: CO₂ per capita (t)", CHARTS / "co2_per_capita.png")
if "co2_intensity_rolling" in out.columns:
    line(out, "year", "co2_intensity_rolling", f"{COUNTRY}: CO₂ intensity (t/MUSD, 5y avg)", CHARTS / "co2_intensity.png")

# --- markdown report ---
def fmt(x, nd=2):
    try:
        return "n/a" if x is None or pd.isna(x) else f"{round(float(x), nd)}"
    except Exception:
        return "n/a"

latest = out.tail(1).iloc[0]

# Explicit fill_method=None to avoid FutureWarning
yoy_total_num = (
    out["co2"].pct_change(fill_method=None).iloc[-1] * 100
    if "co2" in out.columns and out["co2"].notna().sum() >= 2 else None
)
yoy_pc_num = (
    out["co2_per_capita"].pct_change(fill_method=None).iloc[-1] * 100
    if "co2_per_capita" in out.columns and out["co2_per_capita"].notna().sum() >= 2 else None
)

# Decide trend in Python to avoid str<int comparisons inside Jinja
trend_word = "decreasing" if (pd.notna(yoy_total_num) and float(yoy_total_num) < 0) else "increasing"

# Pre-format strings for display
yoy_total = fmt(yoy_total_num)
yoy_pc = fmt(yoy_pc_num)
cps = out.loc[out["changepoint"] == True, "year"].astype(int).tolist()

from jinja2 import Template
T = Template("""# 🇨🇦 Canada Weekly Environmental Briefing – {{ d }}

**Scope:** CO₂ trends from the OWID dataset. Figures auto-generated.

## Highlights
- Latest year: **{{ y }}**.
- Total CO₂: **{{ co2 }} Mt** (YoY: {{ yoy }}%).
- CO₂ per capita: **{{ pc }} t/person** (YoY: {{ yoypc }}%).
- CO₂ intensity (5y avg): **{{ inten }} t/MUSD**.

## Notable changes
{% if cps %}- Changepoints detected around: **{{ cps|join(', ') }}**.
{% else %}- No strong changepoints detected (series stable or short).
{% endif %}

## Narrative summary
CO₂ totals are {{ trend_word }} in the latest year.
{{ "Multiple changepoints were detected." if cps|length>1 else ("A changepoint was detected." if cps else "No clear changepoints detected.") }}
Per-capita emissions include a 5-year smoothing for stability.

## Figures
{% if has_total %}![CO2 total](../charts/co2_total.png){% endif %}
{% if has_pc %}![CO2 per capita](../charts/co2_per_capita.png){% endif %}
{% if has_int %}![CO2 intensity](../charts/co2_intensity.png){% endif %}

*Data: Our World in Data (owid-co2-data.csv). Generated {{ d }}.*
""")

md = T.render(
    d=REPORT_DATE,
    y=int(latest["year"]),
    co2=fmt(latest.get("co2")),
    yoy=yoy_total,
    pc=fmt(latest.get("co2_per_capita")),
    yoypc=yoy_pc,
    inten=fmt(latest.get("co2_intensity_rolling")),
    cps=cps,
    trend_word=trend_word,
    has_total=("co2" in out.columns),
    has_pc=("co2_per_capita" in out.columns),
    has_int=("co2_intensity_rolling" in out.columns),
)

out_md = REPORTS / f"canada_weekly_briefing_{REPORT_DATE}.md"
out_md.write_text(md, encoding="utf-8")
print(f"Report -> {out_md}")

Report -> outputs/reports/canada_weekly_briefing_2025-10-28.md


In [5]:
# -----------------------------
# Initialize Git and push repo
# -----------------------------

# 1️⃣ Create .gitignore
%%writefile .gitignore
__pycache__/
.ipynb_checkpoints/
*.zip
outputs/
data/

# 2️⃣ Initialize Git
!git init

# 3️⃣ Add remote
!git remote add origin https://github.com/zareenrahman/canada-co2-report.git

# 4️⃣ Stage and commit all files
!git add .
!git commit -m "Initial commit"

# 5️⃣ Rename branch to main
!git branch -M main

# 6️⃣ Push to GitHub
!git push -u origin main

SyntaxError: invalid syntax (74615016.py, line 7)

In [5]:
import shutil

# Create a zip archive of the 'data' folder
shutil.make_archive('data', 'zip', 'data')

'/home/jovyan/data.zip'

In [None]:
import shutil

# Zip all files/folders in the current directory
shutil.make_archive('workspace_backup', 'zip', '.')