Over next **2-3 weeks**, you will practice:
- Opening netCDF files
- Understanding dimensions and variables
- Making the *same plot many times*
- Improving plots step by step


In [None]:
# Exercise 4.1 helper: list variables, dimensions, and units
for var in ds.data_vars:
    v = ds[var]
    print("Variable:", var)
    print("  Dimensions:", v.dims)
    print("  Shape:", v.shape)
    print("  Units:", v.attrs.get("units", "MISSING"))
    print("  Long name:", v.attrs.get("long_name", ""))
    print("")


In [None]:
# Exercise 4.3 helper: extract one data value and print context
sel = ds["temperature"].isel(time=100, lat=1, lon=2)
print("Value:", sel.values)
print("Time index:", sel.time.values)
print("Location (lat, lon):", float(ds['lat'].isel(lat=1).values), float(ds['lon'].isel(lon=2).values))
# Now explain this value in 2-3 sentences in a Markdown cell below.


In [None]:
# Exercise 4.4 helper: shape, size, and approximate file size (32-bit floats)
temp = ds["temperature"]
n_values = temp.size
bytes_per_value = 4  # 32-bit float
approx_bytes = n_values * bytes_per_value
print("Number of temperature values:", n_values)
print("Approx file size (bytes):", approx_bytes)
print("Approx file size (MB):", approx_bytes / (1024**2))


In [None]:
# Exercise 4.5 helper: compare two grid points (change the lat/lon indices)
p1 = ds.sel(lat=ds.lat[0], lon=ds.lon[0], method="nearest")
p2 = ds.sel(lat=ds.lat[-1], lon=ds.lon[-1], method="nearest")

print("Point 1 location:", float(p1.lat.values), float(p1.lon.values))
print("Point 2 location:", float(p2.lat.values), float(p2.lon.values))

print("Mean temp p1:", float(p1["temperature"].mean().values))
print("Mean temp p2:", float(p2["temperature"].mean().values))

print("Mean precip p1:", float(p1["precipitation"].mean().values))
print("Mean precip p2:", float(p2["precipitation"].mean().values))


In [None]:
# Exercise 4.6 helper: time slicing examples
# One-month slice (January)
jan = ds.sel(time=slice('2024-01-01','2024-01-31'))
print('January days:', jan.dims['time'])

# Seasonal slice (Jun-Aug)
jja = ds.sel(time=slice('2024-06-01','2024-08-31'))
print('JJA days:', jja.dims['time'])

# Note: adjust the years/dates to match the dataset if needed.



## 5. Selecting a subset of the data

We almost never plot the full dataset directly.
We first reduce it to something manageable.


In [None]:

# Select the grid point closest to a chosen location
point = ds.sel(lat=41, lon=-113, method="nearest")

# Convert to a pandas DataFrame
df = point.to_dataframe()

# Display first few rows
df.head()



## 6. Time series plots

Time series plots answer questions like:
- How does a variable change over time?
- Is there seasonality?


In [None]:

# Basic temperature time series
plt.figure(figsize=(10,3))
plt.plot(df.index, df["temperature"])

plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.title("Daily Temperature at One Location")

plt.tight_layout()
plt.show()



### Practice
- Change line color and thickness
- Plot precipitation instead of temperature
- Zoom into one month



## 7. Histograms

Histograms show how values are distributed.
The number of bins changes how the story looks.


In [None]:

# Histogram of temperature
plt.figure(figsize=(6,3))
plt.hist(df["temperature"], bins=20)

plt.xlabel("Temperature (°C)")
plt.ylabel("Count")
plt.title("Temperature Distribution")

plt.tight_layout()
plt.show()



### Practice
- Try 10 bins, then 40 bins
- Make a histogram of precipitation



## 8. Scatter plots

Scatter plots show relationships between two variables.
Each point is one day.


In [None]:

# Scatter plot: temperature vs precipitation
plt.figure(figsize=(6,4))
plt.scatter(df["temperature"], df["precipitation"], alpha=0.4)

plt.xlabel("Temperature (°C)")
plt.ylabel("Precipitation (mm/day)")
plt.title("Temperature vs Precipitation")

plt.tight_layout()
plt.show()


In [None]:

# Helper: Exercise 5.1 - time series for temperature and precipitation at a chosen point
# Choose a point (change lat/lon selection if desired)
point = ds.sel(lat=ds.lat[1], lon=ds.lon[1], method="nearest")
df_point = point.to_dataframe()

# Temperature time series
plt.figure(figsize=(10,3))
plt.plot(df_point.index, df_point["temperature"], linewidth=0.8)
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.title("Daily Temperature at chosen grid point")
plt.tight_layout()
plt.show()

# Precipitation time series
plt.figure(figsize=(10,3))
plt.plot(df_point.index, df_point["precipitation"], linewidth=0.8)
plt.xlabel("Date")
plt.ylabel("Precipitation (mm/day)")
plt.title("Daily Precipitation at chosen grid point")
plt.tight_layout()
plt.show()


In [None]:

# Helper: Exercise 5.2 - three time series views: raw, 7-day rolling mean, monthly mean
# Use df_point from previous cell
# Raw daily
plt.figure(figsize=(10,3))
plt.plot(df_point.index, df_point["temperature"], label="Daily", alpha=0.6)
plt.xlabel("Date"); plt.ylabel("Temperature (°C)"); plt.title("Raw daily temperature"); plt.tight_layout(); plt.show()

# 7-day rolling mean
temp_7 = df_point["temperature"].rolling(7, center=True).mean()
plt.figure(figsize=(10,3))
plt.plot(df_point.index, temp_7, color='C1', label="7-day mean")
plt.xlabel("Date"); plt.ylabel("Temperature (°C)"); plt.title("7-day rolling mean temperature"); plt.tight_layout(); plt.show()

# Monthly mean - resample by month
temp_month = df_point["temperature"].resample('M').mean()
plt.figure(figsize=(8,3))
plt.plot(temp_month.index, temp_month.values, marker='o')
plt.xlabel("Month"); plt.ylabel("Temperature (°C)"); plt.title("Monthly mean temperature"); plt.tight_layout(); plt.show()


In [None]:

# Helper: Exercise 5.3 - zooming scales
# Entire year (already plotted), now one month and one week
one_month = df_point.loc['2024-07-01':'2024-07-31']  # adjust if needed
one_week = df_point.loc['2024-07-10':'2024-07-16']   # adjust if needed

plt.figure(figsize=(10,3))
plt.plot(one_month.index, one_month["temperature"])
plt.xlabel("Date"); plt.ylabel("Temperature (°C)"); plt.title("Temperature - One Month"); plt.tight_layout(); plt.show()

plt.figure(figsize=(10,3))
plt.plot(one_week.index, one_week["temperature"], marker='o')
plt.xlabel("Date"); plt.ylabel("Temperature (°C)"); plt.title("Temperature - One Week"); plt.tight_layout(); plt.show()


In [None]:

# Helper: Exercise 5.4 - histograms with different bin counts (temperature)
plt.figure(figsize=(6,3))
plt.hist(df_point["temperature"].dropna(), bins=10, alpha=0.7)
plt.title("Temperature histogram - 10 bins"); plt.xlabel("Temperature (°C)"); plt.ylabel("Count"); plt.tight_layout(); plt.show()

plt.figure(figsize=(6,3))
plt.hist(df_point["temperature"].dropna(), bins=25, alpha=0.7)
plt.title("Temperature histogram - 25 bins"); plt.xlabel("Temperature (°C)"); plt.ylabel("Count"); plt.tight_layout(); plt.show()

plt.figure(figsize=(6,3))
plt.hist(df_point["temperature"].dropna(), bins=50, alpha=0.7)
plt.title("Temperature histogram - 50 bins"); plt.xlabel("Temperature (°C)"); plt.ylabel("Count"); plt.tight_layout(); plt.show()

# Also repeat for precipitation
plt.figure(figsize=(6,3))
plt.hist(df_point["precipitation"].dropna(), bins=25, alpha=0.7)
plt.title("Precipitation histogram - 25 bins"); plt.xlabel("Precipitation (mm/day)"); plt.ylabel("Count"); plt.tight_layout(); plt.show()


In [None]:

# Helper: Exercise 5.5 - compare distributions at two locations
p1 = ds.sel(lat=ds.lat[0], lon=ds.lon[0], method="nearest").to_dataframe()
p2 = ds.sel(lat=ds.lat[-1], lon=ds.lon[-1], method="nearest").to_dataframe()

bins = np.linspace(min(p1["temperature"].min(), p2["temperature"].min()), 
                   max(p1["temperature"].max(), p2["temperature"].max()), 25)

plt.figure(figsize=(6,3))
plt.hist(p1["temperature"].dropna(), bins=bins, alpha=0.5, label='Point 1')
plt.hist(p2["temperature"].dropna(), bins=bins, alpha=0.5, label='Point 2')
plt.legend(); plt.title("Overlaid temperature histograms"); plt.xlabel("Temperature (°C)"); plt.ylabel("Count"); plt.tight_layout(); plt.show()


In [None]:

# Helper: Exercise 5.6 - scatter with color by month
# Create month column and plot colored scatter
df_point = df_point.copy()
df_point['month'] = df_point.index.month

plt.figure(figsize=(6,4))
scatter = plt.scatter(df_point['temperature'], df_point['precipitation'], c=df_point['month'], cmap='tab10', alpha=0.6)
plt.colorbar(scatter, label='Month')
plt.xlabel("Temperature (°C)"); plt.ylabel("Precipitation (mm/day)"); plt.title("Temp vs Precip colored by month"); plt.tight_layout(); plt.show()


In [None]:

# Helper: Exercise 5.9 - plotting answers to three common questions
# 1. How does temperature vary over the year? -> Time series (use monthly mean)
monthly = df_point['temperature'].resample('M').mean()
plt.figure(figsize=(8,3)); plt.plot(monthly.index, monthly.values, marker='o'); plt.title('Monthly mean temperature'); plt.xlabel('Month'); plt.ylabel('Temperature (°C)'); plt.tight_layout(); plt.show()

# 2. What temperatures are most common? -> Histogram (already available)
plt.figure(figsize=(6,3)); plt.hist(df_point['temperature'].dropna(), bins=25); plt.title('Temperature histogram'); plt.xlabel('Temperature (°C)'); plt.ylabel('Count'); plt.tight_layout(); plt.show()

# 3. Is precipitation related to temperature? -> Scatter colored by month
plt.figure(figsize=(6,4)); plt.scatter(df_point['temperature'], df_point['precipitation'], c=df_point.index.month, cmap='tab10'); plt.title('Temp vs Precip (colored by month)'); plt.xlabel('Temperature (°C)'); plt.ylabel('Precipitation (mm/day)'); plt.tight_layout(); plt.show()
