In [1]:
import os                               # to set current directory

import xarray as xr                     # to work with labeled multi-dimensional arrays
import numpy as np                      # to work with numerical data
import pandas as pd                     # to work with labeled data
import matplotlib.pyplot as plt         # to create plots
import xskillscore                      # to calculate skill scores
from scipy.interpolate import griddata  # to interpolate data
from scipy import signal                # to work with signal processing
 
import cartopy.crs as ccrs              # to work with cartographic projections
import cartopy.feature as cfeature      # to work with natural features
import cartopy                          # to work with cartographic data

import scipy.fftpack as fp              # to work with Fourier transforms

from astropy.convolution import Gaussian2DKernel, interpolate_replace_nans # to work with convolution

#from dask import delayed,compute       #### PORQUE ESTA COMENTADO? ####

import sys                              # to add the 'src' directory to the path
import gc                               # to work with the garbage collector 

In [2]:
# Loading CALEDO60_dedac data
#dir_input = '/bettik/PROJECTS/pr-data-ocean/riverama/Datos/CALEDO60_dedac'  # directory where the input data is stored
#pattern = 'ssh_dedac_masked_'
#ds_ssh = xr.open_mfdataset(os.path.join(dir_input,pattern+'*.nc'))
# Arranging the dataset
# Loading SSH
ds_ssh = xr.open_mfdataset("/bettik/PROJECTS/pr-data-ocean/riverama/Datos/CALEDO60_dedac/ssh_dedac_masked_01.nc", parallel=True)
ds_ssh = ds_ssh.drop_vars(['nav_lat','nav_lon'])
ds_ssh = ds_ssh.rename({
    'longitude': 'nav_lon',
    'latitude': 'nav_lat'
})   

In [3]:
ds_ssh.load()

In [4]:
# Adjust chunking strategy for the 'ds_ssh' dataset
ds_ssh = ds_ssh.chunk({'x': 788, 'y': 853, 'time_counter': -1})

# We assume 'ssh_dedac' is the variable of interest in the 'ds_ssh' dataset
# Note: Actual adaptation will require understanding of how to apply the specific geographical selections
# and calculations based on the 2D 'nav_lat' and 'nav_lon'

# Coriolis period calculation (adjust calculation as per the 2D coordinates if needed)
# Here, an approximation is made by averaging latitudes, but a more precise method may be needed
average_latitude = ds_ssh.nav_lat.mean().values
f = 2*2*np.pi/86164*np.sin(np.deg2rad(average_latitude))
T = np.abs(2*np.pi/f)

In [5]:
# Define time-related variables as per the new dataset's structure
dt = 3600  # Time step in seconds
window_len = int(2*T//dt)  # Window length for the filter
time_window = np.arange(-window_len, window_len + 1)
exp_window = np.exp(-np.square(time_window / (T/dt)))

In [6]:
# Create a DataArray for weights
weight = xr.DataArray(exp_window, dims=['window'])

In [7]:
# Adapt the rolling window calculation for 'ssh_bm'
# This step assumes 'time_counter' is the correct dimension for rolling
ssh_bm = (
    ds_ssh.ssh_dedac
    .rolling(time_counter=window_len*2+1, center=True)
    .construct('window')
    .dot(weight) / weight.sum()
)

In [8]:
# Example function adapted for saving output, illustrating handling of 2D coordinates
def save_bm_hf_updated(ds_ssh, dir_output):
    # Ensure that calculations for ssh_bm and ssh_hf are included here

    # Sample calculation (simplified) - Replace with actual computation logic
    # Assuming ssh_dedac is a DataArray in ds_ssh
    ssh_dedac = ds_ssh['ssh_dedac']

    # Example placeholders for ssh_bm and ssh_hf calculation
    # Replace or adjust with actual calculations for ssh_bm
    # For demonstration, using ssh_dedac directly; in practice, replace with real computation
    ssh_bm = ssh_dedac.mean(dim='time_counter')
    ssh_hf = ssh_dedac - ssh_bm

    # Creating a new dataset for saving
    ds_out = xr.Dataset({
        'ssh_bm': ssh_bm,
        'ssh_hf': ssh_hf
    }, coords={
        'time_counter': ds_ssh.time_counter,
        'nav_lat': ds_ssh.nav_lat,
        'nav_lon': ds_ssh.nav_lon
    })

    # Example file name generation and saving
    file_name = "MITgcm_filtered.nc"
    ds_out.to_netcdf(os.path.join(dir_output, file_name))

    print(f"File saved as {os.path.join(dir_output, file_name)}")
    del ds_ssh, ds_out
    gc.collect()


In [9]:
# Define output directory (ensure this exists or is created)
dir_output = "/bettik/PROJECTS/pr-data-ocean/riverama/Datos/Varios"

In [17]:
from dask import delayed, compute
import dask

def process_and_save_chunk(ssh_dedac_chunk, ssh_bm_chunk, output_directory):
    """
    Processes and saves chunks of data to the specified output directory.

    Parameters:
    - ssh_dedac_chunk: xarray DataArray or Dataset representing a chunk of ssh_dedac data.
    - ssh_bm_chunk: xarray DataArray or Dataset representing a chunk of ssh_bm data.
    - output_directory: String specifying the directory where output files should be saved.

    Returns:
    A string message confirming the processing and saving of the chunk.
    """
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Define file names based on some identifier (e.g., longitude range). Adjust as needed.
    dedac_file_name = os.path.join(output_directory, "ssh_dedac_chunk.nc")
    bm_file_name = os.path.join(output_directory, "ssh_bm_chunk.nc")
    
    # Save ssh_dedac_chunk and ssh_bm_chunk to NetCDF files
    if ssh_dedac_chunk is not None:
        ssh_dedac_chunk.to_netcdf(dedac_file_name)
        
    if ssh_bm_chunk is not None:
        ssh_bm_chunk.to_netcdf(bm_file_name)
    
    # Return a confirmation message
    return f"Saved: {dedac_file_name}, {bm_file_name}"


# Ensuring the loop and the use of delayed_results is outside and after the function definition
delayed_results = []
for lon in np.arange(180, 245, 5):
    # Compute the condition first
    condition = ((ds_ssh.nav_lon >= lon) & (ds_ssh.nav_lon < lon + 5)).compute()

    ssh_dedac_chunk = ds_ssh['ssh_dedac'].where(condition, drop=True)
    
    # Check if 'ssh_bm' is a variable within 'ds_ssh' before attempting to select it
    if 'ssh_bm' in ds_ssh.data_vars:
        ssh_bm_chunk = ds_ssh['ssh_bm'].where(condition, drop=True)
    else:
        ssh_bm_chunk = None  # Or some appropriate action if 'ssh_bm' does not exist
    
    # Ensure your processing function and dir_output are properly defined and used here
    delayed_result = delayed(process_and_save_chunk)(ssh_dedac_chunk, ssh_bm_chunk, dir_output)
    delayed_results.append(delayed_result)

# Compute all delayed tasks
dask.compute(*delayed_results)

HDF5-DIAG: Error detected in HDF5 (1.12.2) thread 1:
  #000: H5F.c line 532 in H5Fcreate(): unable to create file
    major: File accessibility
    minor: Unable to open file
  #001: H5VLcallback.c line 3282 in H5VL_file_create(): file create failed
    major: Virtual Object Layer
    minor: Unable to create file
  #002: H5VLcallback.c line 3248 in H5VL__file_create(): file create failed
    major: Virtual Object Layer
    minor: Unable to create file
  #003: H5VLnative_file.c line 63 in H5VL__native_file_create(): unable to create file
    major: File accessibility
    minor: Unable to open file
  #004: H5Fint.c line 1858 in H5F_open(): unable to truncate a file which is already open
    major: File accessibility
    minor: Unable to open file
HDF5-DIAG: Error detected in HDF5 (1.12.2) thread 2:
  #000: H5F.c line 532 in H5Fcreate(): unable to create file
    major: File accessibility
    minor: Unable to open file
  #001: H5VLcallback.c line 3282 in H5VL_file_create(): file create fa

PermissionError: [Errno 13] Permission denied: '/bettik/PROJECTS/pr-data-ocean/riverama/Datos/Varios/ssh_dedac_chunk.nc'