# Weather Data Retrieval
This notebook retrieves weather data from Environment Canada API.

## 1. Setup

In [1]:
import os
import time
import pandas as pd
import requests

In [2]:
# Output directory
RAW_OUTPUT_DIR = 'data/weather_data/raw'
os.makedirs(RAW_OUTPUT_DIR, exist_ok=True)

In [3]:
# Vancouver local time period settings
weather_start_date = '2025-12-23'
weather_end_date = '2025-12-29'

## 2. Weather Data Retrieval

In [4]:
# Generate date range (Vancouver time)
dates = pd.date_range(start=weather_start_date, end=weather_end_date, freq='D', tz='America/Vancouver')

weekly_data = []
week_start_date = dates[0]

base_url = "https://api.weather.gc.ca/collections/swob-realtime/items"
bbox = "-123.35,49.00,-122.40,49.40"
properties = "date_tm-value,stn_nam-value,air_temp,air_temp-qa,dwpt_temp,dwpt_temp-qa,rel_hum,rel_hum-qa,avg_wnd_spd_10m_pst10mts,avg_wnd_spd_10m_pst1hr,rnfl_amt_pst1mt,rnfl_amt_pst1mt-qa,rnfl_amt_pst1hr,rnfl_amt_pst1hr-qa,vis,vis-qa,avg_vis_pst10mts"

print(f"Fetching data from {dates[0]} to {dates[-1]} (Vancouver Time)...")

for i in range(len(dates) - 1):
    start_dt = dates[i]
    end_dt = dates[i+1]
    
    # Convert to UTC for API
    start_utc = start_dt.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')
    end_utc = end_dt.tz_convert('UTC').strftime('%Y-%m-%dT%H:%M:%SZ')
    
    print(f"Processing: {start_utc} -> {end_utc}")
    
    params = {
        "f": "json",
        "bbox": bbox,
        "datetime": f"{start_utc}/{end_utc}",
        "properties": properties,
        "sortby": "date_tm-value",
        "limit": "10000"
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        if 'features' in data and len(data['features']) > 0:
            df = pd.json_normalize(data['features'])
            # Clean column names
            df.columns = [c.replace('properties.', '') for c in df.columns]
            weekly_data.append(df)
            print(f"  Fetched {len(df)} records")
        else:
            print("  No data found")
            
    except Exception as e:
        print(f"  Error: {e}")
        
    time.sleep(3)  # Rate limiting

    # Save weekly or at the end of the loop
    if (i + 1) % 7 == 0 or i == len(dates) - 2:
        if weekly_data:
            weather_chunk = pd.concat(weekly_data, ignore_index=True)
            
            # Drop unnecessary columns
            cols_to_drop = ['id', 'type', 'geometry.type']
            weather_chunk = weather_chunk.drop(columns=[c for c in cols_to_drop if c in weather_chunk.columns])
            
            # Generate filename (start_date_end_date)
            filename = f"{RAW_OUTPUT_DIR}/weather_vancouver_{week_start_date.strftime('%Y%m%d')}_{end_dt.strftime('%Y%m%d')}.parquet"
            weather_chunk.to_parquet(filename, index=False)
            
            print(f"Saved {len(weather_chunk)} records to {filename}")
            
            # Reset for next week
            weekly_data = []
            weather = weather_chunk
        
        # Set start date for next week
        if i < len(dates) - 2:
            week_start_date = dates[i+1]

print("Data collection complete.")

Fetching data from 2025-12-23 00:00:00-08:00 to 2025-12-29 00:00:00-08:00 (Vancouver Time)...
Processing: 2025-12-23T08:00:00Z -> 2025-12-24T08:00:00Z
  Fetched 8968 records
Processing: 2025-12-24T08:00:00Z -> 2025-12-25T08:00:00Z
  Fetched 9056 records
Processing: 2025-12-25T08:00:00Z -> 2025-12-26T08:00:00Z
  Fetched 9121 records
Processing: 2025-12-26T08:00:00Z -> 2025-12-27T08:00:00Z
  Fetched 9234 records
Processing: 2025-12-27T08:00:00Z -> 2025-12-28T08:00:00Z
  Fetched 9237 records
Processing: 2025-12-28T08:00:00Z -> 2025-12-29T08:00:00Z
  Fetched 9125 records
Saved 54741 records to data/weather_data/raw/weather_vancouver_20251223_20251229.parquet
Data collection complete.


In [5]:
if 'weather' in locals():
    print(f"Last chunk shape: {weather.shape}")
    display(weather.head())
else:
    print("No data collected.")

Last chunk shape: (54741, 18)


Unnamed: 0,geometry.coordinates,date_tm-value,stn_nam-value,air_temp,air_temp-qa,dwpt_temp,rel_hum,rel_hum-qa,avg_wnd_spd_10m_pst10mts,avg_wnd_spd_10m_pst1hr,rnfl_amt_pst1mt,rnfl_amt_pst1mt-qa,rnfl_amt_pst1hr,rnfl_amt_pst1hr-qa,dwpt_temp-qa,avg_vis_pst10mts,vis,vis-qa
0,"[-123.264704, 49.330352, 15.0]",2025-12-23T08:00:00.000Z,POINT ATKINSON,6.0,100.0,1.6,73.0,100.0,22.6,24.5,,,,,,,,
1,"[-123.002436, 49.125992, 4.6]",2025-12-23T08:00:00.000Z,DELTA BURNS BOG,1.9,100.0,1.9,100.0,100.0,3.9,1.5,0.0,100.0,0.0,,,,,
2,"[-123.193392, 49.347059, 170.885]",2025-12-23T08:00:00.000Z,WEST VANCOUVER AUT,3.2,100.0,2.7,97.0,100.0,2.6,2.1,0.0,100.0,0.0,,,,,
3,"[-123.121687, 49.295348, 4.0]",2025-12-23T08:00:00.000Z,VANCOUVER HARBOUR CS,5.7,100.0,2.3,79.0,100.0,,,,,0.0,100.0,,,,
4,"[-122.690076, 49.208307, 6.3]",2025-12-23T08:00:00.000Z,PITT MEADOWS CS,5.3,100.0,1.6,77.0,100.0,3.7,2.5,0.0,100.0,0.0,,,,,
