In [1]:
import os
import zarr
from glob import glob

import numpy as np
import xarray as xr

In [2]:
import pandas as pd 

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
station_names = ['Pituffik', 'Fairbanks', 'Guam', 'Yuma_PG' ,'Fort_Bragg']

## ERA5 hourly to daily

In [5]:
varname_pick = [
    '2m_temperature',
    'maximum_2m_temperature_since_previous_post_processing',
    'minimum_2m_temperature_since_previous_post_processing',
    'total_precipitation'
]


varname_rename = {
    '2m_temperature': 'TREFHT',
    'maximum_2m_temperature_since_previous_post_processing': 'TREFHTMX',
    'minimum_2m_temperature_since_previous_post_processing': 'TREFHTMN',
    'total_precipitation': 'PRECT'        
}

for station in station_names:
    fn_all = sorted(glob(f'/glade/campaign/ral/hap/ksha/EPRI_data/ERA5_hourly/*{station}*'))
    
    ds_collection = []
    
    for fn in fn_all:
        ds = xr.open_zarr(fn)[varname_pick].rename(varname_rename)
        ds_collection.append(ds)
        
    for i_year, year in enumerate(range(1957, 2025)):

        if i_year > 0:
            ds = ds_collection[i_year]
            # ======================================================== #
            # t2
            ds_t2 = xr.Dataset()
            ds_t2['TREFHTMX'] = ds['TREFHTMX'].resample(time="1D").min(keep_attrs=True)
            ds_t2['TREFHTMN'] = ds['TREFHTMN'].resample(time="1D").max(keep_attrs=True)
            ds_t2['TREFHT'] = ds['TREFHT'].resample(time="1D").mean(keep_attrs=True)
            
            # ======================================================== #
            # precip
            ds_previous = ds_collection[i_year-1].isel(time=slice(-48, None))
            ds_precip = xr.concat([ds_previous, ds], dim='time')

            time_start = '{}-12-31T00'.format(year-1)
            time_start_save = '{}-01-01T00'.format(year)
            time_end = '{}-12-31T23'.format(year)
            
            ds_hourly = ds_precip.sel(time=slice(time_start, time_end))
            ds_hourly = ds_hourly[['PRECT']]
            ds_hourly_shifted = ds_hourly.shift(time=-1)
            ds_daily = ds_hourly_shifted.resample(time='24h').sum()
            ds_daily['time'] = ds_daily['time'] + pd.Timedelta(hours=24)
            ds_daily = ds_daily.sel(time=slice(time_start_save, time_end))

            # ======================================================== #
            # combine & save
            ds_daily = xr.merge([ds_daily, ds_t2])
            
            save_name = f'/glade/campaign/ral/hap/ksha/EPRI_data/ERA5_daily/{station}_{year}.zarr'
            ds_daily.to_zarr(save_name, mode='w', consolidated=True, compute=True)
            print(save_name)

In [6]:
save_name

NameError: name 'save_name' is not defined