In [None]:
from erddapy import ERDDAP
import netCDF4 as nc
import pandas as pd
from connec_functions import GDB

### EMSO ERIC ERDDAP server

In [None]:
# analysed endpoint:
endpoint_url = "https://erddap.emso.eu/erddap"

### Analysis results:

- datasets offered by the ERDDAP server directly return data  
(compared to other ERDDAP server where you need an additional step to retrieve actual files)
- requires knowledge on netCDF files 
- content of netCDF files:
    - in some cases, there is use of OrcID and urls for creator  
        --> good but inconsistent, incomplete (e.g. ROR-id for institutes) 
        --> why not for contributors, ...?  
- allDatasets dataset is a dataset that contains metadata about all the datasets offered by the ERDDAP server, the properties (~ i.e. columns) with which this metadata is described is the same between ERDDAP servers (cf. comparing to EMSO-ERIC ERDDAP server)  

In [None]:
#make connection
emsoERDDAP = ERDDAP(
    server=endpoint_url,
    protocol="tabledap"
)

In [None]:
#get list of dataste IDs
datasets = emsoERDDAP.get_search_url(response='csv')
df = pd.read_csv(datasets)
for dataset_id in df['Dataset ID']:
    print(dataset_id)

**Exploration of the allDatasets dataset**  
~ contains metadata about the datasets offered by the ERDDAP server

In [None]:
# set dataset ID
emsoERDDAP.dataset_id = "allDatasets"

In [None]:
#Get data (as 2D dataframe)
EMSO_alldatasets_df = emsoERDDAP.to_pandas()
EMSO_alldatasets_df

In [None]:
#explore columns
EMSO_alldatasets_df.columns

In [None]:
#See if other metadata listed in allDatasets dataset
for line in EMSO_alldatasets_df.summary:
    print(line)

In [None]:
# List unique values for each columns
for col in EMSO_alldatasets_df.columns:
    if col not in ['datasetID', 'summary', 'title', 'minLongitude (degrees_east)','maxLongitude (degrees_east)', 'longitudeSpacing (degrees_east)','minLatitude (degrees_north)', 'maxLatitude (degrees_north)','latitudeSpacing (degrees_north)', 'minAltitude (m)', 'maxAltitude (m)','minTime (UTC)', 'maxTime (UTC)', 'timeSpacing (seconds)']:
        print(col)
        print(EMSO_alldatasets_df[col].unique())

**Exploration of the EMSO_OBSEA_CTD_30min dataset**

In [None]:
# set a dataset ID
emsoERDDAP.dataset_id = "EMSO_OBSEA_CTD_30min"

In [None]:
#try get various kinds of info
search_url = emsoERDDAP.get_search_url()
download_url = emsoERDDAP.get_download_url()
info_url = emsoERDDAP.get_info_url()

print(f"search_url: {search_url},\ndownload_url: {download_url},\ninfo_url: {info_url}")

In [None]:
# get & view data (as 2D DataFrame)
EMSO_OBSEA_CTD_30min_df = emsoERDDAP.to_pandas()
EMSO_OBSEA_CTD_30min_df

In [None]:
#explore columns
EMSO_OBSEA_CTD_30min_df.columns

In [None]:
# List unique values for each columns
for col in EMSO_OBSEA_CTD_30min_df.columns:
    print(col)
    print(EMSO_OBSEA_CTD_30min_df[col].unique())

In [None]:
#get data (as netCDF)
EMSO_OBSEA_CTD_30min_ncCF = emsoERDDAP.to_ncCF()
EMSO_OBSEA_CTD_30min_ncCF

In [None]:
#list information on the dataset
print(f"GLOBAL ATTRIBUTES: {list(EMSO_OBSEA_CTD_30min_ncCF.ncattrs())}")
print(f"DIMENSIONS: {list(EMSO_OBSEA_CTD_30min_ncCF.dimensions.keys())}")
print(f"VARIABLES: {list(EMSO_OBSEA_CTD_30min_ncCF.variables.keys())}")

In [None]:
#for the given list of variables-of-interest, 
# list the dimension(s) in which they occur, their shape and associated units 
variables_of_interest = ['time', 'latitude', 'depth', 'PSAL', 'TEMP', 'PSAL_QC']

for var in variables_of_interest:
    EMSO_OBSEA_CTD_30min_ncCF_var = EMSO_OBSEA_CTD_30min_ncCF.variables[var]
    print(var.upper())

    print(f"dimensions: {EMSO_OBSEA_CTD_30min_ncCF_var.dimensions}")
    print(f"shape: {EMSO_OBSEA_CTD_30min_ncCF_var.shape}")
    print(f"units: {EMSO_OBSEA_CTD_30min_ncCF_var.units}")

    data = EMSO_OBSEA_CTD_30min_ncCF_var[:]
    print(f"data: {data}")


**Exploration of the E2M3A_SAMI dataset**

In [None]:
#Set dataset ID
emsoERDDAP.dataset_id = "E2M3A_SAMI"

In [None]:
#get data (as dataframe)
E2M3A_SAMI_df = emsoERDDAP.to_pandas()
E2M3A_SAMI_df

In [None]:
#explore columns
E2M3A_SAMI_df.columns

In [None]:
# List unique values for each columns
for col in E2M3A_SAMI_df.columns:
    if col not in ['time (UTC)']:
        print(col)
        print(E2M3A_SAMI_df[col].unique())

In [None]:
#get data (as netCDF)
E2M3A_SAMI_nc = emsoERDDAP.to_ncCF()
E2M3A_SAMI_nc

In [None]:
#list information on the dataset
print(f"GLOBAL ATTRIBUTES: {list(E2M3A_SAMI_nc.ncattrs())}")
print(f"DIMENSIONS: {list(E2M3A_SAMI_nc.dimensions.keys())}")
print(f"VARIABLES: {list(E2M3A_SAMI_nc.variables.keys())}")


**Exploration of the EMSO-AZORES_TCM3-1_2016-2017 dataset**

In [None]:
#Set dataset ID
emsoERDDAP.dataset_id = "EMSO-AZORES_TCM3-1_2016-2017"

In [None]:
#get data (as dataframe)
EMSO_AZORES_TCM3_1_2016_2017_df = emsoERDDAP.to_pandas()
EMSO_AZORES_TCM3_1_2016_2017_df

In [None]:
#explore columns 
EMSO_AZORES_TCM3_1_2016_2017_df.columns

In [None]:
# List unique values for each columns
for col in EMSO_AZORES_TCM3_1_2016_2017_df.columns:
    print(col)
    print(EMSO_AZORES_TCM3_1_2016_2017_df[col].unique())

In [None]:
#get data (as netCDF)
EMSO_AZORES_TCM3_1_2016_2017_nc = emsoERDDAP.to_ncCF()
EMSO_AZORES_TCM3_1_2016_2017_nc

In [None]:
#list information on the dataset
print(f"GLOBAL ATTRIBUTES: {list(EMSO_AZORES_TCM3_1_2016_2017_nc.ncattrs())}")
print(f"DIMENSIONS: {list(EMSO_AZORES_TCM3_1_2016_2017_nc.dimensions.keys())}")
print(f"VARIABLES: {list(EMSO_AZORES_TCM3_1_2016_2017_nc.variables.keys())}")


In [None]:
#for the given list of variables-of-interest, 
# list the dimension(s) in which they occur, their shape and associated units 
for var in EMSO_AZORES_TCM3_1_2016_2017_nc.variables:
    print("Variable: ", var)

    variable = EMSO_AZORES_TCM3_1_2016_2017_nc[var]
    print("    In dimensions: ", variable.dimensions)


**Exploration of the W1M3A_deploy07 dataset**

In [None]:
#Set a different dataset ID
emsoERDDAP.dataset_id = "W1M3A_deploy07"

In [None]:
#get data (as dataframe)
W1M3A_deploy07_df = emsoERDDAP.to_pandas()
W1M3A_deploy07_df

In [None]:
#explore columns
W1M3A_deploy07_df.columns

In [None]:
#compare columns between datasets
print(list(E2M3A_SAMI_df.columns))
print(list(EMSO_OBSEA_CTD_30min_df.columns))
print(list(EMSO_AZORES_TCM3_1_2016_2017_df.columns))
print(list(W1M3A_deploy07_df.columns))

In [None]:
#check if other metadata info available 
for line in W1M3A_deploy07_df.summary:
    print(line)

In [None]:
#get data (as netCDF)
W1M3A_deploy07_nc = emsoERDDAP.to_ncCF()
W1M3A_deploy07_nc

In [None]:
#list information on the dataset
print(f"GLOBAL ATTRIBUTES: {list(W1M3A_deploy07_nc.ncattrs())}")
print(f"DIMENSIONS: {list(W1M3A_deploy07_nc.dimensions.keys())}")
print(f"VARIABLES: {list(W1M3A_deploy07_nc.variables.keys())}")

In [None]:
#for the given list of variables-of-interest, 
# list the dimension(s) in which they occur, their shape and associated units 
for var in W1M3A_deploy07_nc.variables:
    print("Variable: ", var)

    variable = W1M3A_deploy07_nc[var]
    print("    In dimensions: ", variable.dimensions)
