# CCS reference dataset 

This file aims at creating the reference dataset for North Pacific OSSE. It conctenates the variables form **/MITgcm_filtered_final** and **/MITgcm_it** folders. 

In [1]:
import xarray as xr
import numpy as np 
import glob
#for parallelizing
from joblib import Parallel
from joblib import delayed as jb_delayed

Choice of the OSSE zone between *crossover_CCS*, *crossover_centerpacific* and *crossover_hawaii*.

In [2]:
#zone = "crossover_CCS"
#zone = "crossover_centerpacific"
zone = "crossover_hawaii"


### 1) - Data Loading 

Total SSH and Balanced Motion SSH : 

In [3]:
list_files = glob.glob("/bettik/bellemva/MITgcm/MITgcm_filtered_final/MITgcm_filt_201205*.nc") + \
             glob.glob("/bettik/bellemva/MITgcm/MITgcm_filtered_final/MITgcm_filt_201206*.nc") + \
             glob.glob("/bettik/bellemva/MITgcm/MITgcm_filtered_final/MITgcm_filt_201207*.nc")
list_files.sort()

n_try = 0
while n_try<10:
    try : 
        ds = xr.open_mfdataset(list_files,combine='nested',concat_dim='time',parallel=True)
        break
    except : 
        print("Opening netcdf file failed, trying again...")
    n_try+=1

ds_ssh = xr.open_mfdataset(list_files,parallel=True,combine='nested',concat_dim='time',drop_variables=["mask", "ssh_igw","ssh_bar","ssh_hf","ssh_dedac"])
ds_ssh

Opening netcdf file failed, trying again...


Unnamed: 0,Array,Chunk
Bytes,86.31 GiB,0.94 GiB
Shape,"(2208, 1681, 3121)","(24, 1681, 3121)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 86.31 GiB 0.94 GiB Shape (2208, 1681, 3121) (24, 1681, 3121) Dask graph 92 chunks in 185 graph layers Data type float64 numpy.ndarray",3121  1681  2208,

Unnamed: 0,Array,Chunk
Bytes,86.31 GiB,0.94 GiB
Shape,"(2208, 1681, 3121)","(24, 1681, 3121)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,86.31 GiB,0.94 GiB
Shape,"(2208, 1681, 3121)","(24, 1681, 3121)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 86.31 GiB 0.94 GiB Shape (2208, 1681, 3121) (24, 1681, 3121) Dask graph 92 chunks in 185 graph layers Data type float64 numpy.ndarray",3121  1681  2208,

Unnamed: 0,Array,Chunk
Bytes,86.31 GiB,0.94 GiB
Shape,"(2208, 1681, 3121)","(24, 1681, 3121)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


Internal tide SSH : 

In [4]:
list_files = glob.glob(f"/bettik/bellemva/MITgcm/MITgcm_it/{zone}/MITgcm_it_201205*.nc") + \
             glob.glob(f"/bettik/bellemva/MITgcm/MITgcm_it/{zone}/MITgcm_it_201206*.nc") + \
             glob.glob(f"/bettik/bellemva/MITgcm/MITgcm_it/{zone}/MITgcm_it_201207*.nc") 

ds_it = xr.open_mfdataset(list_files,parallel=True,combine='nested',concat_dim='time',drop_variables=["ssh_it2", "ssh_it3","ssh_it_tot"])
ds_it 

Unnamed: 0,Array,Chunk
Bytes,15.19 GiB,169.10 MiB
Shape,"(2208, 961, 961)","(24, 961, 961)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.19 GiB 169.10 MiB Shape (2208, 961, 961) (24, 961, 961) Dask graph 92 chunks in 185 graph layers Data type float64 numpy.ndarray",961  961  2208,

Unnamed: 0,Array,Chunk
Bytes,15.19 GiB,169.10 MiB
Shape,"(2208, 961, 961)","(24, 961, 961)"
Dask graph,92 chunks in 185 graph layers,92 chunks in 185 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


### 2) - Data Processing 

Croping the **ds_ssh** dataset to only keep the CSS region :

In [5]:
ds_ssh = ds_ssh.sel(latitude=slice(ds_it.latitude.values.min(),ds_it.latitude.values.max()),
                      longitude=slice(ds_it.longitude.values.min(),ds_it.longitude.values.max()),drop=True)

Concatenating the datasets to create the reference dataset : 

In [6]:
dc_ref = xr.merge([ds_ssh,ds_it])

del ds_ssh, ds_it

Renaming the variables of the reference dataset : 
- *ssh* for *ssh_cor* 
- *ssh_it* for *ssh_it1* 

In [7]:
dc_ref = dc_ref.rename({"ssh_cor":"ssh","ssh_it1":"ssh_it"})

Updating the attributes of each of the variable, including time coordinate 

In [8]:
dc_ref["ssh"] = dc_ref.ssh.assign_attrs({"description":"Reference Sea Surface Height (without barotropic tide).",
                                         "units":"[m]"})
dc_ref["ssh_bm"] = dc_ref.ssh_bm.assign_attrs({"description":"Balanced Motion signal of the reference Sea Surface Height.",
                                         "units":"[m]"})
dc_ref["ssh_it"] = dc_ref.ssh_it.assign_attrs({"description":"First Mode Internal Tide signal of the reference Sea Surface Height.",
                                         "units":"[m]"})

Optional : coarsening the resolution of the dataset to reduce the storage memory.

In [9]:
dc_ref_coarse = True 
if dc_ref_coarse:
    dc_ref_coarse = dc_ref.coarsen(longitude=2,latitude=2,boundary='trim',side = "right").mean()
    dc_ref_coarse["latitude"] = dc_ref.latitude[:-1:2]
    dc_ref_coarse["longitude"] = dc_ref.longitude[:-1:2]

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    >>> array.reshape(shape, limit='128 MiB')
  dc_ref_coarse = dc_ref.coarsen(longitude=2,latitude=2,boundary='trim',side = "right").mean()
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    >>> array.reshape(shape, limit='128 MiB')
  dc_ref_coarse = dc_ref.coarsen(longitude=2,latitude=2,boundary='trim',side = "right").mean()
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    

### 3) - Data Saving 

In [10]:
name_folder = dict({"crossover_CCS":"2023b_SSHmapping_HF_California","crossover_centerpacific":"2023c_SSHmapping_HF_CenterPacific","crossover_hawaii":"2023d_SSHmapping_HF_Hawaii"})

def save_date(dc_ref,date):
    if dc_ref_coarse : 
        dc_ref_coarse.sel(time=slice(np.datetime64(date.astype('str')+"T00"),np.datetime64(date.astype('str')+"T23")))\
            .to_netcdf(f"/bettik/bellemva/ocean_data_challenge/{name_folder[zone]}/dc_ref_eval_coarse/{name_folder[zone]}_eval_"+date.astype('str')+".nc")
    else : 
        dc_ref.sel(time=slice(np.datetime64(date.astype('str')+"T00"),np.datetime64(date.astype('str')+"T23")))\
            .to_netcdf(f"/bettik/bellemva/ocean_data_challenge/{name_folder[zone]}/dc_ref_eval/{name_folder[zone]}_eval_"+date.astype('str')+".nc")
    return True

In [11]:
array_date = np.arange(np.datetime64("2012-05-01"),np.datetime64("2012-08-01"))

for date in array_date :
    save_date(dc_ref,date)
    print(date.astype('str')," : ok !")

2012-05-01  : ok !
2012-05-02  : ok !
2012-05-03  : ok !
2012-05-04  : ok !
2012-05-05  : ok !
2012-05-06  : ok !
2012-05-07  : ok !
2012-05-08  : ok !
2012-05-09  : ok !
2012-05-10  : ok !
2012-05-11  : ok !
2012-05-12  : ok !
2012-05-13  : ok !
2012-05-14  : ok !
2012-05-15  : ok !
2012-05-16  : ok !
2012-05-17  : ok !
2012-05-18  : ok !
2012-05-19  : ok !
2012-05-20  : ok !
2012-05-21  : ok !
2012-05-22  : ok !
2012-05-23  : ok !
2012-05-24  : ok !
2012-05-25  : ok !
2012-05-26  : ok !
2012-05-27  : ok !
2012-05-28  : ok !
2012-05-29  : ok !
2012-05-30  : ok !
2012-05-31  : ok !
2012-06-01  : ok !
2012-06-02  : ok !
2012-06-03  : ok !
2012-06-04  : ok !
2012-06-05  : ok !
2012-06-06  : ok !
2012-06-07  : ok !
2012-06-08  : ok !
2012-06-09  : ok !
2012-06-10  : ok !
2012-06-11  : ok !
2012-06-12  : ok !
2012-06-13  : ok !
2012-06-14  : ok !
2012-06-15  : ok !
2012-06-16  : ok !
2012-06-17  : ok !
2012-06-18  : ok !
2012-06-19  : ok !
2012-06-20  : ok !
2012-06-21  : ok !
2012-06-22  