# This notebook is meant to be a demo of what can be done with intake on ref-marc datas

In [None]:
import intake
import dask_hpcconfig
from distributed import Client
import xarray as xr
import hvplot.xarray
import geoviews.feature as gf
import os

Detail of the imports:
-----------------------------------------------------------------------------------------------  
    Intake    
>- Handle the catalog contained in the yaml files.

    dask_hpcconfig    
>- Parrallelize your tasks on an hpc like datatrmor.

    Distributed import Client    
>- Client is creating the dask client

    Xarray    
>- Xarray makes operation on datasets and creates labbeled multidimensional arrays

    hvplot.xarray
>- Plotting 

    geoviews.features
>- Plotting that enables some features on maps 

    os
>- Can handle operating system dependant functionnality 

In [None]:
cluster = dask_hpcconfig.cluster("datarmor-local")
cluster.scale(14)
client = Client(cluster)
client

To vizaulise the dask-lab extension you need to go the the dask-lab tab and fill it wih :
>- /user/{ JUPYTERHUB-USERNAME }/proxy/{ PORT-NUMBER }

In [None]:
def list_param(cat, param):
    return cat.metadata["parameters"][param]["allowed"]

def allowed_param(cat):
    return cat.metadata["parameters"]

def allowed_years(data):
    path = "/home/datawork-lops-iaocea/catalog/kerchunk/ref-marc/" + data
    files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    files.sort()
    return files

def year_concat(cat, source, region):
    concat = [cat[source](region=region, filename=i).to_dask() 
              for i in allowed_years(region)]

    return xr.concat(
        concat, 
        dim="time", 
        coords="minimal", 
        data_vars="minimal", 
        compat="override"
    )

Detail of the functions:
-----------------------------------------------------------------------------------------------  
    list_param(cat, param)    
>- Takes 2 arguments: cat = An opened catalog, param = The name of a parameter.
>- Returns a ptyhon list containing all the parameters.

    allowed_param(cat)    
>- Takes 1 argument: cat = An opened catalog
>- Returns a python dict containing the allowed values of each parameters and their description

    Allowed_years(data)    
>- Takes 1 argument: data = A region of data.
>- Returns a pythton list containing all the files stored in the directory. The combined jsons are stored in a directory named after the region (Same name than in the ref-marc directory)  
/!\ In this notebook, it will only work with ref-marc data because the json files are located at /home/datawork-lops-iaocea/catalog/kerchunk/ref-marc/. To use it to another directory, update the variable "path" in the function

    year_concat(cat, source, region)    
>- Takes 3 arguments: cat = An opened catalog, source = the source from this catalog, region = the region where the data comes from
>- Returns: A dataset concatenated over the time dimension


In [None]:
cat = intake.open_catalog("/home/datawork-lops-iaocea/catalog/intake/marc.yaml")

#### To open the catalog you can use the cell above, you just need to give the path to an intake catalog.

In [None]:
regions = list_param(
    cat, "region"
)

### This tab is here to test the file and see how long it takes to be concatenated, if everything works as it should and measure the performance

Tested on datarmor with the spark notebook







|    REGION                               |  OPEN DATASET  |      CONCAT     |   COMPUTE   |     PLOT      |Concat time (s) |Nbr of Years |
|    :------------------------------------|:--------------:|:---------------:|:-----------:|:-------------:|:--------------:|:-----------:|
|   ANTILLES-UG                           |Works           |Missing variable |             |               |                |             |
|   ATNE-10M                              |Works           |Missing variable |             |               |                |             |
|   CALEDONIE-3MIN                        |Works           |Missing variable |             |               |                |             |
|   GLOBAL-30MIN                          |Works           |Missing variable |             |               |                |             |
|   MARC_F1-MARS3D-ADOUR                  |Works           |Works            |Works        |Works          |23.6            |9            |
|   MARC_F1-MARS3D-ARMOR                  |Works           |Works            |Works        |Works          |23              |9            |
|   MARC_F1-MARS3D-FINIS                  |Works           |Works            |Works        |Works          |20.9            |9            |
|   MARC_F1-MARS3D-GIRONDE                |Works           |Works            |Works        |Works          |21.1            |9            |
|   MARC_F1-MARS3D-LOIRE                  |Works           |Works            |Works        |Works          |20              |9            |
|   MARC_F1-MARS3D-MANGAE2500-AGRIF       |Works           |Works            |Works        |Works          |563 (ms)        |2            |
|   MARC_F1-MARS3D-MORBIHAN               |Works           |Works            |Works        |Works          |17              |6            |
|   MARC_F1-MARS3D-PDC                    |Works           |Works            |Works        |Works          |117 (1.57 s)    |9            |
|   MARC_F1-MARS3D-SEINE                  |Works           |Works            |Works        |Works          |29.8            |9            |
|   MED-6MIN                              |Works           |Missing variable |             |               |                |12           |
|   MENOR-2MIN                            |Works           |Missing variable |             |               |                |12           |
|   MENOR-UG                              |Works           |Diff  dims size  |             |               |                |12           |
|   NORGAS-2MIN                           |Works           |Missing variable |             |               |                |12           |
|   NORGAS-UG                             |Works           |Missing variable |             |               |                |15           |
|   NORGAS-UG_V3                          |Works           |Works            |Works        |               |20.5            |12           |
|   POLYNESIE-3MIN                        |Works           |Missing variable |             |               |                |             |
|   REUNION-180M                          |Works           |Works            |Works        |Works          |453 (ms)        |2            |
|   REUNION-UG                            |Works           |Missing variable |             |               |                |12           |
|   b1                                    |Works           |Works            |Works        |fails          |1.95            |5            |
|   b1_v10                                |Works           |Missing variable |             |               |                |11           |
|   b1_v10r2019                           |Works           |Works            |Works        |Works          |2.43            |11           |
|   f1_e2500                              |Works           |Works            |Works        |Works          |127 (2m07)      |11           |
|   f1_e4000                              |Works           |Works            |Works        |Works          |128 (2m08)      |17           |
|   f2_1200                               |Works           |Works            |Works        |Works          |23.4            |10           |
|   f2_1200_sn                            |Works           |Works            |Works        |Works          |2.84            |4            |
|   f2_1200_v10                           |Works           |Works            |Works        |Works          |18.2            |8            |
|   l2_v10                                |Works           |Works            |Works        |Works          |8.17            |9            |
|                                         |                |                 |             |               |                |             |


In [None]:
region = "REUNION-180M"

In [None]:
filename = allowed_years(data=region)
cat.marc(region=region,filename="2021.json.zst").to_dask()

In [None]:
len(allowed_years(region))

In [None]:
%%time
ds = year_concat(
    cat, "marc", region
)

In [None]:
%%time
da = ds.cge

In [None]:
da.unify_chunks().hvplot.quadmesh(
     x="longitude", y="latitude", rasterize=True, geo=True, cmap="BuPu"
) * gf.land.opts(scale="10m") * gf.coastline.opts(scale="10m")