In [66]:
import xarray as xr
from dask.distributed import Client
import time
import datetime as dt
import warnings
warnings.filterwarnings('ignore')
import sys
sys.setrecursionlimit(100000)

In [2]:
#define your observation, forward simulation and da simulation root path
myobsroot = '/Volumes/TO_1/roms4dvar_ecs/i4dvar_outputs/NOAA_GHRSST/'
mynlroot = '/Volumes/WD_3/'
mydaroot = '/Volumes/TO_1/roms4dvar_ecs/i4dvar_outputs/'
#define your workspace 
nl_workspace = 'outputs_SCORRECTION/outputs_201205/'
da_workspace = 'workspace_geopolarsst/'
obs_workspace = '2012/'
#define your filenames
obs_file='noaacwBLENDEDsstDaily_2012.nc'
nl_files = "*avg*.nc"
 #since this is a sst validation, only qck file is needed
prior_files = "/STORAGE/prior/*qck*.nc"
posterior_files = "/STORAGE/posterior/*qck*.nc"

In [3]:
#define your target data duration

start_date = '2012-05-01-12H'
end_date = '2012-08-08-12H'

start_datetime = dt.datetime.strptime(start_date,"%Y-%m-%d-%HH")
end_datetime = dt.datetime.strptime(end_date,"%Y-%m-%d-%HH")
data_len = (end_datetime-start_datetime).days+1 

#define your target variable
obs_var = 'analysed_sst'
# 2d situation
model_var = 'temp_sur' 
#3d situation
# model_var = 'temp'

# setting chunk size
x_chunk = int(262/2)
y_chunk = int(362/2)
z_chunk = 10

print('''duration to be validated: from %s to %s, total of %i days.
         target obs variable: %s
         target model variable: %s'''
      %(start_date,end_date,data_len,obs_var,model_var))



duration to be validated: from 2012-05-01-12H to 2012-08-08-12H, total of 100 days.
         target obs variable: analysed_sst
         target model variable: temp_sur


In [4]:
# lauching the dask workstation
# client = Client(threads_per_worker=8, n_workers=4,memory_limit= '8 GiB')
# print(client.dashboard_link)

In [5]:
# client.close()

In [6]:
# loading observation data
Obs_ds = xr.open_mfdataset(myobsroot+obs_workspace+obs_file,
                           engine='netcdf4',
                           # chunks={'longitude':260,'latitude':210},
                           parallel=True).chunk(dict(time=-1))
Obs_data = Obs_ds[obs_var].isel(time=slice(0,data_len))
Obs_data = Obs_data-273.15 # turn to ceilcus
# Obs_data

In [7]:
# loading forward sst
start = time.time()
fwd_ds = xr.open_mfdataset(mynlroot+nl_workspace+nl_files,
                                      engine='netcdf4',coords='minimal',
                                      parallel=True,
                                      # chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                                      #      's_rho':z_chunk, # only 3d needed
                                      #      'eta_u':y_chunk,'xi_u':x_chunk,\
                                      #      'eta_v':y_chunk,'xi_v':x_chunk,\
                                      #      'eta_psi':y_chunk,'xi_psi':x_chunk,},
                                       )#.chunk(dict(ocean_time=-1))
end = time.time()
print('loading costing %f min'%((end-start)/60))
# for forward there is no qck file ,so the model var has only 'temp'
# further we only want the surface data, so the s_rho = -1
fwd_data = fwd_ds['temp'].sel(ocean_time=slice(start_date,end_date)).isel(s_rho=-1)
# always drop the initial time of posterior since there is a jump
fwd_data = fwd_data.drop_duplicates(dim='ocean_time',keep='first')
# fwd_data


loading costing 0.755714 min


In [8]:
# loading prior sst
start = time.time()
prior_ds = xr.open_mfdataset(mydaroot+da_workspace+prior_files,
                             engine='netcdf4',coords='minimal',
                             parallel=True,
                             # chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                             #             # 's_rho':z_chunk, # only 3d needed
                             #               'eta_u':y_chunk,'xi_u':x_chunk,\
                             #               'eta_v':y_chunk,'xi_v':x_chunk,\
                             #               'eta_psi':y_chunk,'xi_psi':x_chunk,},
                            )#.chunk(dict(ocean_time=-1))
end = time.time()
print('loading costing %f min'%((end-start)/60))
prior_data = prior_ds[model_var].sel(ocean_time=slice(start_date,end_date))
# always drop the initial time of posterior since there is a jump
prior_data = prior_data.drop_duplicates(dim='ocean_time',keep='first')
# prior_data


loading costing 0.511330 min


In [9]:
# loading poseterior sst
start = time.time()
post_ds = xr.open_mfdataset(mydaroot+da_workspace+posterior_files,
                                 engine='netcdf4',coords='minimal',
                                 parallel=True,
                                 # chunks={'eta_rho':y_chunk,'xi_rho':x_chunk,
                                 #         # 's_rho':z_chunk, # only 3d needed
                                 #           'eta_u':y_chunk,'xi_u':x_chunk,\
                                 #           'eta_v':y_chunk,'xi_v':x_chunk,\
                                 #           'eta_psi':y_chunk,'xi_psi':x_chunk,},
                                )#.chunk(dict(ocean_time=-1))
post_data = post_ds[model_var].sel(ocean_time=slice(start_date,end_date))
end = time.time()
print('loading costing %f min'%((end-start)/60))
# always drop the initial time of posterior since there is a jump
post_data = post_data.drop_duplicates(dim='ocean_time',keep='first')
# post_data


loading costing 0.467244 min


In [10]:
# mapping obs to the model grid
start = time.time()
Obs_modgrd = Obs_data.interp(longitude=post_data.lon_rho,
                             latitude=post_data.lat_rho,
                             method='linear').compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# Obs_modgrd

calculating costing 0.034880 min


In [11]:
#calculate the month mean of obs
Obs_monthmean = Obs_modgrd.resample(time='1m').mean()

# Obs_monthmean

In [13]:
#calculate the daily mean of forward
start = time.time()
fwd_dailymean = fwd_data.resample(ocean_time='1d').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# fwd_dailymean

calculating costing 0.008451 min


In [14]:
fwd_dailymean

In [16]:
#calculate month mean
start = time.time()
fwd_monthmean = fwd_data.resample(ocean_time='1m').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# fwd_monthmean

calculating costing 0.006240 min


In [17]:
fwd_monthmean

In [18]:
#calculate the daily mean of prior
start = time.time()
prior_dailymean =prior_data.resample(ocean_time='1d').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# prior_dailymean

calculating costing 0.621891 min


In [19]:
#calculate the month mean of prior
start = time.time()
prior_monthmean =prior_data.resample(ocean_time='1m').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# prior_monthmean

calculating costing 0.353371 min


In [20]:
#calculate the daily mean of posterior
start = time.time()
post_dailymean =post_data.resample(ocean_time='1d').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# post_dailymean

calculating costing 0.730097 min


In [21]:
#calculate the month mean of posterior
start = time.time()
post_monthmean =post_data.resample(ocean_time='1m').mean().compute()
end = time.time()
print('calculating costing %f min'%((end-start)/60))
# post_monthmean

calculating costing 0.461681 min


In [71]:
# monthly data contrast
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import scipy.io as sio
import cartopy.crs as ccrs
%matplotlib
my_sst_color = sio.loadmat(mydaroot+'LYG_rainbow.mat')['rainbow']
my_sst = LinearSegmentedColormap.from_list('sst',my_sst_color, N = 256)
imonth = 3 # 0st month = 5
fig, axs = plt.subplots(ncols=2, nrows=2)
fig.figsize=(20,20)
Obs_monthmean.isel(time=imonth).plot(vmin=5,vmax=30,
                                     cmap=my_sst,
                                     add_colorbar=True,
                                     cbar_kwargs={'label':'Celsius'},
                                     ax=axs[0,0])
axs[0,0].set_title('geopolar sst')
fwd_monthmean.isel(ocean_time=imonth).plot(vmin=5,vmax=30,
                                           cmap=my_sst,
                                           add_colorbar=True,
                                           cbar_kwargs={'label':'Celsius'},
                                           ax=axs[0,1])
axs[0,1].set_title('forward')
prior_monthmean.isel(ocean_time=imonth).plot(vmin=5,vmax=30,
                                           cmap=my_sst,
                                           add_colorbar=True,
                                             cbar_kwargs={'label':'Celsius'},
                                           ax=axs[1,0])
axs[1,0].set_title('prior')
post_monthmean.isel(ocean_time=imonth).plot(vmin=5,vmax=30,
                                           cmap=my_sst,
                                           add_colorbar=True,
                                            cbar_kwargs={'label':'Celsius'},
                                           ax=axs[1,1],
                                           )
axs[1,1].set_title('posterior')
plt.tight_layout()


Using matplotlib backend: MacOSX


<bound method Axes.set_title of <AxesSubplot:title={'center':'geopolar sst'}, xlabel='xi_rho', ylabel='eta_rho'>>