In [1]:
import xarray as xr
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from pyproj import Transformer

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


YEAR = 1991
SOURCE_DIR = Path(f"/beegfs/CMIP6/wrf_era5/04km/{YEAR}")
PROCESSED_FILE = Path(f"/beegfs/CMIP6/cparr4/daily_downscaled_era5_for_rasdaman/t2_mean/t2_mean_{YEAR}_daily_era5_4km_3338.nc")
SELECTED_LOCATION = "Cordova"

ak_locations = {
    "Anchorage": (61.2181, -149.9003),
    "Fairbanks": (64.8378, -147.7164),
    "Utqiaġvik": (71.2906, -156.7886),
    "Bethel": (60.7922, -161.7558),
    "Cordova": (60.5438, -145.7573),
    "Nome": (64.5011, -165.4064),
    "Seward": (60.1044, -149.4458),
}


def find_nearest_grid_indices(ds, locations):
    """Find the nearest grid indices for a set of lat/lon locations."""
    ak_grid_indices = {}
    lats = ds['XLAT'].values
    lons = ds['XLONG'].values
    for name, (lat, lon) in locations.items():
        # Compute squared distance for all grid points
        dist2 = (lats - lat)**2 + (lons - lon)**2
        idx = np.unravel_index(np.argmin(dist2), lats.shape)
        ak_grid_indices[name] = {'south_north': idx[0], 'west_east': idx[1]}
    return ak_grid_indices

def project_locations(locations_lat_lon):
    """Project lat/lon coordinates to EPSG:3338."""
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:3338", always_xy=True)
    projected_locs = {name: transformer.transform(lon, lat) for name, (lat, lon) in locations_lat_lon.items()}
    return projected_locs

ak_locations_3338 = project_locations(ak_locations)


print("Loading datasets...")
source_files = sorted(SOURCE_DIR.glob(f"era5_wrf_dscale_4km_{YEAR}-*.nc"))
ds_processed = xr.open_dataset(PROCESSED_FILE)
print("Datasets loaded.")

def main():
    """Main function to run the comparison."""
    print(f"Processing comparison for location: {SELECTED_LOCATION}")

    print("Finding nearest grid cell in the first source file...")
    with xr.open_dataset(source_files[0]) as sample_ds:
        grid_indices = find_nearest_grid_indices(sample_ds, ak_locations)
    
    source_loc_indices = grid_indices[SELECTED_LOCATION]
    
    daily_means_dict = { (di,dj): [] for di in [-2,-1,0,1,2] for dj in [-2,-1,0,1,2] }
    print(f"Processing {len(source_files)} source files in a loop for multiple offsets...")
    for f in source_files:
        with xr.open_dataset(f) as ds:
            for di,dj in daily_means_dict.keys():
                wn = source_loc_indices['west_east'] + di
                sn = source_loc_indices['south_north'] + dj
                # guard bounds
                wn = max(0, min(wn, ds.dims['west_east']-1))
                sn = max(0, min(sn, ds.dims['south_north']-1))
                source_raw = ds['T2'].isel(west_east=wn, south_north=sn)
                daily_mean = source_raw.resample(Time="1D").mean() - 273.15
                daily_means_dict[(di,dj)].append(daily_mean)
    print("Combining daily means for each offset...")
    offset_series = {}
    for key, lst in daily_means_dict.items():
        series = xr.concat(lst, dim="Time").rename({'Time':'time'}).rename("t2_mean_source")
        offset_series[key] = series

    print("Extracting data from processed file...")
    processed_loc_coords = ak_locations_3338[SELECTED_LOCATION]
    processed_daily_mean = ds_processed["t2_mean"].sel(
        x=processed_loc_coords[0],
        y=processed_loc_coords[1],
        method="nearest"
    )

    print("\nOffset summary (mean absolute delta °C):")
    delta_dict = {}
    for key, src_series in offset_series.items():
        aligned_src, aligned_proc = xr.align(src_series, processed_daily_mean, join="inner")
        d = aligned_proc - aligned_src
        delta_dict[key] = float(np.abs(d).mean())
        print(f"  offset {key}: {delta_dict[key]:.3f}")

    # pick best offset (minimum mean abs delta)
    best_offset = min(delta_dict, key=delta_dict.get)
    print(f"\nBest offset: {best_offset} with mean abs delta {delta_dict[best_offset]:.3f} °C")

    # Use best offset for detailed plotting
    aligned_source = offset_series[best_offset]
    aligned_source, aligned_processed = xr.align(aligned_source, processed_daily_mean, join="inner")
    delta = aligned_processed - aligned_source
    delta = delta.rename("t2_mean_delta")

    print("Generating plot for best offset...")
    
    aligned_source = aligned_source.reset_coords(drop=True)
    aligned_processed = aligned_processed.reset_coords(drop=True)
    delta = delta.reset_coords(drop=True)

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(12, 10), sharex=True)
    
    aligned_source.plot(ax=axes[0], label='Source (Daily Mean)')
    axes[0].set_title(f'Source Daily Mean Temperature at {SELECTED_LOCATION}')
    axes[0].set_ylabel('Temperature (°C)')
    axes[0].grid(True)
    
    aligned_processed.plot(ax=axes[1], color='orange', label='Processed (Daily Mean)')
    axes[1].set_title(f'Processed Daily Mean Temperature at {SELECTED_LOCATION}')
    axes[1].set_ylabel('Temperature (°C)')
    axes[1].grid(True)
    
    delta.plot(ax=axes[2], color='green', label='Delta (Processed - Source)')
    axes[2].axhline(0, color='red', linestyle='--')
    axes[2].set_title('Difference (Processed - Source)')
    axes[2].set_ylabel('Temperature Delta (°C)')
    axes[2].grid(True)
    
    fig.suptitle(f'Temperature Comparison for {SELECTED_LOCATION} - {YEAR}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    output_fig_path = f"qc_comparison_{SELECTED_LOCATION}_{YEAR}.png"
    plt.savefig(output_fig_path)
    print(f"Plot saved to {output_fig_path}")
    plt.close() # Close figure to avoid displaying in non-interactive environments

    print("\\n--- Delta Summary ---")
    print(f"Mean Difference: {delta.mean().item():.4f} °C")
    print(f"Standard Deviation: {delta.std().item():.4f} °C")
    print(f"Max Difference: {delta.max().item():.4f} °C")
    print(f"Min Difference: {delta.min().item():.4f} °C")
    print("---------------------\\n")
    
    # Clean up datasets from memory
    ds_processed.close()

if __name__ == "__main__":
    main() 

Loading datasets...
Datasets loaded.
Processing comparison for location: Cordova
Finding nearest grid cell in the first source file...
Processing 365 source files in a loop for multiple offsets...
Combining daily means for each offset...
Extracting data from processed file...

Offset summary (mean absolute delta °C):
  offset (-2, -2): 1.208
  offset (-2, -1): 0.525
  offset (-2, 0): 1.820
  offset (-2, 1): 1.896
  offset (-2, 2): 0.696
  offset (-1, -2): 1.403
  offset (-1, -1): 1.482
  offset (-1, 0): 1.821
  offset (-1, 1): 1.965
  offset (-1, 2): 1.831
  offset (0, -2): 0.578
  offset (0, -1): 2.327
  offset (0, 0): 1.791
  offset (0, 1): 1.832
  offset (0, 2): 2.173
  offset (1, -2): 0.000
  offset (1, -1): 2.403
  offset (1, 0): 2.206
  offset (1, 1): 1.780
  offset (1, 2): 1.657
  offset (2, -2): 1.392
  offset (2, -1): 1.455
  offset (2, 0): 1.912
  offset (2, 1): 2.409
  offset (2, 2): 2.864

Best offset: (1, -2) with mean abs delta 0.000 °C
Generating plot for best offset...
