In [1]:
import xarray as xr
from dask.distributed import Client
import time
import datetime as dt

In [2]:
#define your observation, forward simulation and da simulation root path
myobsroot = '/Volumes/TO_1/roms4dvar_ecs/i4dvar_outputs/NOAA_GHRSST/'
mynlroot = '/Volumes/WD_3/'
mydaroot = '/Volumes/TO_1/roms4dvar_ecs/i4dvar_outputs/'
#define your workspace 
nl_workspace = 'outputs_SCORRECTION/outputs_201205/'
da_workspace = 'workspace_geopolarsst/'
obs_workspace = '2012/'
#define your filenames
obs_file='noaacwBLENDEDsstDaily_2012.nc'
nl_files = "*avg*.nc"
 #since this is a sst validation, only qck file is needed
prior_files = "/STORAGE/prior/*qck*.nc"
posterior_files = "/STORAGE/posterior/*qck*.nc"

In [3]:
#define your target data duration

start_date = '2012-05-01-12H'
end_date = '2012-07-21-12H'

start_datetime = dt.datetime.strptime(start_date,"%Y-%m-%d-%HH")
end_datetime = dt.datetime.strptime(end_date,"%Y-%m-%d-%HH")
data_len = (end_datetime-start_datetime).days+1 

#define your target variable
obs_var = 'analysed_sst'
# 2d situation
model_var = 'temp_sur' 
#3d situation
# model_var = 'temp'

# setting chunk size
x_chunk = int(262/2)
y_chunk = int(362/2)
z_chunk = 10

print('''duration to be validated: from %s to %s, total of %i days.
         target obs variable: %s
         target model variable: %s'''
      %(start_date,end_date,data_len,obs_var,model_var))



duration to be validated: from 2012-05-01-12H to 2012-07-21-12H, total of 82 days.
         target obs variable: analysed_sst
         target model variable: temp_sur


In [54]:
# lauching the dask workstation
client = Client(threads_per_worker=8, n_workers=4,memory_limit= '4 GiB')
print(client.dashboard_link)

http://127.0.0.1:8787/status


In [53]:
client.close()

In [None]:
# loading observation data
Obs_ds = xr.open_mfdataset(myobsroot+obs_workspace+obsfile,
                           engine='netcdf4',
                           chunks={'longitude':260,'latitude':210},
                           parallel=True).chunk(dict(time=-1))
Obs_data = Obs_ds[obs_var].isel(time=slice(0,data_len))
Obs_data = Obs_data-273.15 # turn to ceilcus
Obs_data

In [None]:
# loading forward sst
strt = time.time()
fwd_ds = xr.open_mfdataset(mynlroot+nl_workspace+nl_files,
                                 engine='netcdf4',coords='minimal',
                                 chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                                         # 's_rho':z_chunk, # only 3d needed
                                           'eta_u':y_chunk,'xi_u':x_chunk,\
                                           'eta_v':y_chunk,'xi_v':x_chunk,\
                                           'eta_psi':y_chunk,'xi_psi':x_chunk,},
                                ).chunk(dict(ocean_time=-1))
end = time.time()
print('loading costing %f min'%((end-start)/60))
# for forward there is no qck file ,so the model var has only 'temp'
# further we only want the surface data, so the s_rho = -1
fwd_data = fwd_ds['temp'].sel(ocean_time=slice(start_date,end_date)).isel(s_rho=-1)
# always drop the initial time of posterior since there is a jump
fwd_data = fwd_data.drop_duplicates(dim='ocean_time',keep='first')
fwd_data


In [55]:
# loading prior sst
start = time.time()
prior_ds = xr.open_mfdataset(mydaroot+da_workspace+prior_files,
                                 engine='netcdf4',coords='minimal',
                                 chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                                         # 's_rho':z_chunk, # only 3d needed
                                           'eta_u':y_chunk,'xi_u':x_chunk,\
                                           'eta_v':y_chunk,'xi_v':x_chunk,\
                                           'eta_psi':y_chunk,'xi_psi':x_chunk,},
                                ).chunk(dict(ocean_time=-1))
end = time.time()
print('loading costing %f min'%((end-start)/60))
prior_data = prior_ds[model_var].sel(ocean_time=slice(start_date,end_date))
# always drop the initial time of posterior since there is a jump
prior_data = prior_data.drop_duplicates(dim='ocean_time',keep='first')
prior_data


loading costing 0.702259 min


Unnamed: 0,Array,Chunk
Bytes,1.27 GiB,351.85 MiB
Shape,"(1945, 362, 242)","(1945, 181, 131)"
Dask graph,4 chunks in 57 graph layers,4 chunks in 57 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.27 GiB 351.85 MiB Shape (1945, 362, 242) (1945, 181, 131) Dask graph 4 chunks in 57 graph layers Data type float64 numpy.ndarray",242  362  1945,

Unnamed: 0,Array,Chunk
Bytes,1.27 GiB,351.85 MiB
Shape,"(1945, 362, 242)","(1945, 181, 131)"
Dask graph,4 chunks in 57 graph layers,4 chunks in 57 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,684.41 kiB,185.24 kiB
Shape,"(362, 242)","(181, 131)"
Dask graph,4 chunks in 132 graph layers,4 chunks in 132 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 684.41 kiB 185.24 kiB Shape (362, 242) (181, 131) Dask graph 4 chunks in 132 graph layers Data type float64 numpy.ndarray",242  362,

Unnamed: 0,Array,Chunk
Bytes,684.41 kiB,185.24 kiB
Shape,"(362, 242)","(181, 131)"
Dask graph,4 chunks in 132 graph layers,4 chunks in 132 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,684.41 kiB,185.24 kiB
Shape,"(362, 242)","(181, 131)"
Dask graph,4 chunks in 132 graph layers,4 chunks in 132 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 684.41 kiB 185.24 kiB Shape (362, 242) (181, 131) Dask graph 4 chunks in 132 graph layers Data type float64 numpy.ndarray",242  362,

Unnamed: 0,Array,Chunk
Bytes,684.41 kiB,185.24 kiB
Shape,"(362, 242)","(181, 131)"
Dask graph,4 chunks in 132 graph layers,4 chunks in 132 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:
# loading poseterior sst
post_ds = xr.open_mfdataset(mydaroot+da_workspace+posterior_files,
                                 engine='netcdf4',coords='minimal',
                                 chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                                         # 's_rho':z_chunk, # only 3d needed
                                           'eta_u':y_chunk,'xi_u':x_chunk,\
                                           'eta_v':y_chunk,'xi_v':x_chunk,\
                                           'eta_psi':y_chunk,'xi_psi':x_chunk,},
                                ).chunk(dict(ocean_time=-1))
post_data = post_ds[model_var].sel(ocean_time=slice(start_date,end_date))
# always drop the initial time of posterior since there is a jump
post_data = post_data.drop_duplicates(dim='ocean_time',keep='first')
post_data


In [None]:
# mapping obs to the model grid
Obs_modgrd = Obs_data.interp(longitude=posterior_data.lon_rho,
                             latitude=posterior_data.lat_rho,
                             method='linear')
Obs_modgrd

In [None]:
#calculate the daily mean of forward
fwd_dailymean = fwd_data.resample(ocean_time='1d').mean()
fwd_dailymean

In [None]:
fwd_dailymean.isel(ocean_time=1).plot()

In [57]:
#calculate the daily mean of prior
start = time.time()
prior_dailymean =prior_data.resample(ocean_time='1d').mean().data.compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
prior_dailymean.shape

calculating costing 0.663568 min


(82, 362, 242)

In [56]:
#calculate the daily mean of posterior
post_dailymean =post_data.resample(ocean_time='1d').mean()
post_dailymean

NameError: name 'post_data' is not defined