In [1]:
from erddapy import ERDDAP
import netCDF4 as nc
import pandas as pd
from connec_functions import GDB
from collections import defaultdict

from accessibility import check_endpoint
from summarize import * #json_keys, data_frame

### EMSO ERIC ERDDAP server

In [2]:
# analysed endpoint:
endpoint_url = "https://erddap.emso.eu/erddap"

#make connection
emsoERDDAP = ERDDAP(server=endpoint_url, protocol="tabledap")

In [3]:
if check_endpoint(endpoint_url):
    print("The endpoint is machine-accessible.")
else:
    print("The endpoint is not machine-accessible.")

Checking endpoint: https://erddap.emso.eu/erddap
Endpoint is online: 200
Content type may not be machine-readable: text/html;charset=UTF-8
The endpoint is machine-accessible.


**Exploration of the allDatasets dataset**  
~ retrieving a dataset that lists all available datasets on the ERDDAP server. The returned DataFrame will contain metadata for each dataset available on that server  
~ essentially a catalog of all datasets hosted on the server, including essential metadata that allows you to identify and filter the datasets of interest  

In [4]:
# set dataset ID
emsoERDDAP.dataset_id = "allDatasets"

#Get data (as 2D dataframe)
EMSO_alldatasets_df = emsoERDDAP.to_pandas()

In [5]:
#explore columns
EMSO_alldatasets_df.columns

Index(['datasetID', 'accessible', 'institution', 'dataStructure',
       'cdm_data_type', 'class', 'title', 'minLongitude (degrees_east)',
       'maxLongitude (degrees_east)', 'longitudeSpacing (degrees_east)',
       'minLatitude (degrees_north)', 'maxLatitude (degrees_north)',
       'latitudeSpacing (degrees_north)', 'minAltitude (m)', 'maxAltitude (m)',
       'minTime (UTC)', 'maxTime (UTC)', 'timeSpacing (seconds)', 'griddap',
       'subset', 'tabledap', 'MakeAGraph', 'sos', 'wcs', 'wms', 'files',
       'fgdc', 'iso19115', 'metadata', 'sourceUrl', 'infoUrl', 'rss', 'email',
       'testOutOfDate', 'outOfDate', 'summary'],
      dtype='object')

In [6]:
# save
EMSO_alldatasets_df.to_csv("properties/EMSO_ERDDAP_overview_metadata.csv", index=False)
# view
EMSO_alldatasets_df

Unnamed: 0,datasetID,accessible,institution,dataStructure,cdm_data_type,class,title,minLongitude (degrees_east),maxLongitude (degrees_east),longitudeSpacing (degrees_east),...,fgdc,iso19115,metadata,sourceUrl,infoUrl,rss,email,testOutOfDate,outOfDate,summary
0,allDatasets,public,Axiom Docker Install,table,Other,EDDTableFromAllDatasets,* The List of All Active Datasets in this ERDD...,,,,...,,,https://erddap.emso.eu/erddap/info/allDatasets...,https://localhost:8443/erddap,https://erddap.emso.eu/erddap,https://erddap.emso.eu/erddap/rss/allDatasets.rss,https://erddap.emso.eu/erddap/subscriptions/ad...,,,This dataset is a table which has a row of inf...
1,EMSO_OBSEA_CTD_30min,public,Polytechnic University of Catalonia,table,Point,EDDTableFromErddap,CTD data at OBSEA Underwater Observatory 30 mi...,1.752570,1.752570,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,(local files),https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_C...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,CTD data measured at OBSEA underwater observatory
2,EMSO_OBSEA_Besos_Buoy_Airmar_200WX_30min,public,Polytechnic University of Catalonia,table,Point,EDDTableFromErddap,Data from Airmar 200 WX weather station deploy...,1.752570,1.752570,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,(local files),https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_B...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Weather station from an Airmar 200WX deployed...
3,EMSO_OBSEA_Besos_Buoy_Airmar_200WX_full,public,Polytechnic University of Catalonia,table,Point,EDDTableFromErddap,Data from Airmar 200 WX weather station deploy...,1.752570,1.752570,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,(local files),https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_B...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Weather station from an Airmar 200WX deployed...
4,E2M3A_METEO,public,National Institute of Oceanography and Applied...,table,TimeSeries,EDDTableFromErddap,"E2M3A METEO timeSeries, NRT in situ Observations",18.082417,18.082417,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/E2M3A_METEO...,(source database),https://nodc.ogs.it,https://erddap.emso.eu/erddap/rss/E2M3A_METEO.rss,https://erddap.emso.eu/erddap/subscriptions/ad...,,,"E2M3A METEO timeSeries, NRT in situ Observations"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,W1M3A_deploy04,public,Consiglio Nazionale delle Ricerche,table,TimeSeries,EDDTableFromErddap,W1M3A data (201705-201806),9.111700,9.118163,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,(local files),http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Data from W1M3A observatory (062015-062016)
167,W1M3A_deploy05,public,Consiglio Nazionale delle Ricerche,table,TimeSeries,EDDTableFromErddap,W1M3A data (202010-202107),9.111700,9.118163,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,(local files),http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Data from W1M3A observatory (062015-062016)
168,W1M3A_deploy06,public,Consiglio Nazionale delle Ricerche,table,TimeSeries,EDDTableFromErddap,W1M3A data (202107-202204),9.111700,9.118163,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,(local files),http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Data from W1M3A observatory (062015-062016)
169,W1M3A_deploy07,public,Consiglio Nazionale delle Ricerche,table,TimeSeries,EDDTableFromErddap,W1M3A data (202310-......),9.106570,9.135168,,...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,(local files),http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,,,Data from W1M3A observatory (062015-062016)


In [7]:
print(f"There are {len(EMSO_alldatasets_df['datasetID'].drop_duplicates())} datasets offered by the EMSO ERDDAP server")

There are 171 datasets offered by the EMSO ERDDAP server


In [8]:
#See if other metadata listed in allDatasets dataset
for line in EMSO_alldatasets_df.summary:
    print(line)

This dataset is a table which has a row of information for each dataset currently active in this ERDDAP.
CTD data measured at OBSEA underwater observatory
Weather station from  an Airmar 200WX deployed at OBSEA's Besos Buoy, NW mediterranean sea
Weather station from  an Airmar 200WX deployed at OBSEA's Besos Buoy, NW mediterranean sea full sensor data
E2M3A METEO timeSeries, NRT in situ Observations
E2M3A MRDT timeSeries, NRT in situ Observations
E2M3A PCO2PROA timeSeries, NRT in situ Observations
E2M3A PCO2PROW timeSeries, NRT in situ Observations
E2M3A SAMI timeSeries, NRT in situ Observations
E2M3A SBE16PLS timeSeries, NRT in situ Observations
E2M3A SBE37O timeSeries, NRT in situ Observations
E2M3A timeSeries ALL INSTRUMENTS, NRT in situ Observations
E2M3A timeSeries, data collected from 2021 to 2022
This dataset contains dissolved iron concentrations ((Fe(II) + Fe(III); \u00b5mol/l) acquired between September 2013 and July 2017 using the CHEMINI Fe, a CHEmical MINIaturized analyser

Exploration of **search information** for each data offered through the ERDDAP server

In [9]:
# get search information
searchinfo_df = pd.read_csv(emsoERDDAP.get_search_url(response="csv"))

In [10]:
searchinfo_df.columns

Index(['griddap', 'Subset', 'tabledap', 'Make A Graph', 'wms', 'files',
       'Title', 'Summary', 'FGDC', 'ISO 19115', 'Info', 'Background Info',
       'RSS', 'Email', 'Institution', 'Dataset ID'],
      dtype='object')

In [11]:
searchinfo_df

Unnamed: 0,griddap,Subset,tabledap,Make A Graph,wms,files,Title,Summary,FGDC,ISO 19115,Info,Background Info,RSS,Email,Institution,Dataset ID
0,,https://erddap.emso.eu/erddap/tabledap/allData...,https://erddap.emso.eu/erddap/tabledap/allData...,https://erddap.emso.eu/erddap/tabledap/allData...,,,* The List of All Active Datasets in this ERDD...,This dataset is a table which has a row of inf...,,,https://erddap.emso.eu/erddap/info/allDatasets...,https://erddap.emso.eu/erddap,,,Axiom Docker Install,allDatasets
1,,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,,,CTD data at OBSEA Underwater Observatory 30 mi...,CTD data measured at OBSEA underwater observat...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_C...,https://erddap.emso.eu/erddap/subscriptions/ad...,Polytechnic University of Catalonia,EMSO_OBSEA_CTD_30min
2,,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,,,Data from Airmar 200 WX weather station deploy...,Weather station from an Airmar 200WX deployed...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_B...,https://erddap.emso.eu/erddap/subscriptions/ad...,Polytechnic University of Catalonia,EMSO_OBSEA_Besos_Buoy_Airmar_200WX_30min
3,,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,https://erddap.emso.eu/erddap/tabledap/EMSO_OB...,,,Data from Airmar 200 WX weather station deploy...,Weather station from an Airmar 200WX deployed...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/EMSO_OBSEA_...,https://edmo.seadatanet.org/report/2150,https://erddap.emso.eu/erddap/rss/EMSO_OBSEA_B...,https://erddap.emso.eu/erddap/subscriptions/ad...,Polytechnic University of Catalonia,EMSO_OBSEA_Besos_Buoy_Airmar_200WX_full
4,,,https://erddap.emso.eu/erddap/tabledap/E2M3A_M...,https://erddap.emso.eu/erddap/tabledap/E2M3A_M...,,,"E2M3A METEO timeSeries, NRT in situ Observations","E2M3A METEO timeSeries, NRT in situ Observatio...",https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/E2M3A_METEO...,https://nodc.ogs.it,https://erddap.emso.eu/erddap/rss/E2M3A_METEO.rss,https://erddap.emso.eu/erddap/subscriptions/ad...,National Institute of Oceanography and Applied...,E2M3A_METEO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,,,W1M3A data (201705-201806),Data from W1M3A observatory (062015-062016)\n\...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,Consiglio Nazionale delle Ricerche,W1M3A_deploy04
167,,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,,,W1M3A data (202010-202107),Data from W1M3A observatory (062015-062016)\n\...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,Consiglio Nazionale delle Ricerche,W1M3A_deploy05
168,,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,,,W1M3A data (202107-202204),Data from W1M3A observatory (062015-062016)\n\...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,Consiglio Nazionale delle Ricerche,W1M3A_deploy06
169,,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,https://erddap.emso.eu/erddap/tabledap/W1M3A_d...,,,W1M3A data (202310-......),Data from W1M3A observatory (062015-062016)\n\...,https://erddap.emso.eu/erddap/metadata/fgdc/xm...,https://erddap.emso.eu/erddap/metadata/iso1911...,https://erddap.emso.eu/erddap/info/W1M3A_deplo...,http://www.w1m3a.cnr.it,https://erddap.emso.eu/erddap/rss/W1M3A_deploy...,https://erddap.emso.eu/erddap/subscriptions/ad...,Consiglio Nazionale delle Ricerche,W1M3A_deploy07


Exploration of **metadata information** available for each dataset offered through the ERDDAP server

In [12]:
# Metadata information for each dataset
metadatainfo_df = pd.DataFrame(columns=["Row Type", "Variable Name", "Attribute Name", "Data Type", "Value", "DatasetID"])
# get metadata for each dataset
for datasetID in EMSO_alldatasets_df['datasetID']:
    emsoERDDAP.dataset_id = datasetID
    try:
        _df = pd.read_csv(emsoERDDAP.get_info_url(response="csv")) #metadata retrieved via info_url
        _df["DatasetID"] = datasetID
        metadatainfo_df = pd.concat([metadatainfo_df, _df], ignore_index=True)
    except Exception as e:
        print(f"there was an error for {datasetID}: '{e}'")
        continue

# save to csv file
metadatainfo_df.to_csv("properties/EMSO_ERDDAP_dataset_metadata.csv", index=False)

metadatainfo_df

Unnamed: 0,Row Type,Variable Name,Attribute Name,Data Type,Value,DatasetID
0,attribute,NC_GLOBAL,cdm_data_type,String,Other,allDatasets
1,attribute,NC_GLOBAL,Conventions,String,"COARDS, CF-1.6, ACDD-1.3",allDatasets
2,attribute,NC_GLOBAL,creator_email,String,nobody@example.com,allDatasets
3,attribute,NC_GLOBAL,creator_name,String,Axiom Docker Install,allDatasets
4,attribute,NC_GLOBAL,creator_url,String,https://erddap.emso.eu/erddap,allDatasets
...,...,...,...,...,...,...
80364,attribute,depth_QC,actual_range,byte,"7, 7",EMSO_OBSEA_AWAC_waves_full
80365,attribute,depth_QC,conventions,String,OceanSITES QC Flags,EMSO_OBSEA_AWAC_waves_full
80366,attribute,depth_QC,flag_meanings,String,unknown;good_data;probably_good_data;potential...,EMSO_OBSEA_AWAC_waves_full
80367,attribute,depth_QC,flag_values,String,0;1;2;3;4;7;8;9,EMSO_OBSEA_AWAC_waves_full


In [13]:
# explore returned metadata
metadatainfo_df.groupby(['DatasetID', 'Row Type']).nunique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Variable Name,Attribute Name,Data Type,Value
DatasetID,Row Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BB_505_ADCP,attribute,17,71,4,140
BB_505_ADCP,variable,16,0,4,0
BB_567_SBE56,attribute,8,72,3,107
BB_567_SBE56,variable,7,0,3,0
BB_584_SBE56,attribute,8,72,3,105
...,...,...,...,...,...
smartbay_obs_fluorometer_ecofl,variable,12,0,4,0
smartbay_obs_hour_mean,attribute,59,136,3,515
smartbay_obs_hour_mean,variable,58,0,4,0
smartbay_obs_pco2_contros,attribute,11,129,3,180


In [14]:
# Explore the dimensions of each dataset (Row Type = 'dimension')
dim_info = metadatainfo_df[ metadatainfo_df['Row Type'] == 'dimension'].groupby(['DatasetID', 'Variable Name']).nunique()

dim_info.to_csv("properties/EMSO_ERDDAP_dataset_metadata_dimensions.csv", index=False)

dim_info

Unnamed: 0_level_0,Unnamed: 1_level_0,Row Type,Attribute Name,Data Type,Value
DatasetID,Variable Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [15]:
# Explore the variables of each dataset (Row Type = 'variable')
#note: in netCDFs, attributes represent additional information 
var_info = metadatainfo_df[ metadatainfo_df['Row Type'] == 'variable'].groupby(['DatasetID', 'Variable Name']).agg({
        'Data Type': 'unique',
        'Value': 'unique'
    }).reset_index()

var_info.to_csv("properties/EMSO_ERDDAP_dataset__metadata_variables.csv", index=False)

var_info

Unnamed: 0,DatasetID,Variable Name,Data Type,Value
0,BB_505_ADCP,CurrVelE_ADCP,[double],[nan]
1,BB_505_ADCP,CurrVelN_ADCP,[double],[nan]
2,BB_505_ADCP,CurrVelUp_ADCP,[double],[nan]
3,BB_505_ADCP,CurrVel_QC,[byte],[nan]
4,BB_505_ADCP,ECHO_BEAM_1,[double],[nan]
...,...,...,...,...
3922,smartbay_obs_pco2_contros,pco2_corrected,[float],[nan]
3923,smartbay_obs_pco2_contros,pco2_corrected_qc,[int],[nan]
3924,smartbay_obs_pco2_contros,site_bathy_depth,[double],[nan]
3925,smartbay_obs_pco2_contros,station_id,[String],[nan]


In [16]:
# Explore the attributes of each dataset (Row Type = 'attribute')
metadatainfo_df[ metadatainfo_df['Row Type'] == 'attribute'].groupby(['DatasetID', 'Attribute Name']).nunique()

Unnamed: 0_level_0,Unnamed: 1_level_0,Row Type,Variable Name,Data Type,Value
DatasetID,Attribute Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BB_505_ADCP,Conventions,1,1,1,1
BB_505_ADCP,Easternmost_Easting,1,1,1,1
BB_505_ADCP,Northernmost_Northing,1,1,1,1
BB_505_ADCP,Southernmost_Northing,1,1,1,1
BB_505_ADCP,Westernmost_Easting,1,1,1,1
...,...,...,...,...,...
smartbay_obs_pco2_contros,update_interval,1,1,1,1
smartbay_obs_pco2_contros,valid_max,1,3,1,3
smartbay_obs_pco2_contros,valid_min,1,3,1,3
smartbay_obs_pco2_contros,wfd_waterbody_name,1,1,1,1


In [17]:
# and the variables/dimensions with which they're associated 
attr_info = metadatainfo_df[ metadatainfo_df['Row Type'] == 'attribute'].groupby(['DatasetID', 'Attribute Name', 'Variable Name']).agg({
        'Data Type': 'unique',
        'Value': 'unique'
    }).reset_index()

attr_info.to_csv("properties/EMSO_ERDDAP_dataset_metadata_attributes.csv", index=False)

attr_info

Unnamed: 0,DatasetID,Attribute Name,Variable Name,Data Type,Value
0,BB_505_ADCP,Conventions,NC_GLOBAL,[String],"[OceanSITES v1.4,SeaDataNet_1.0,COARDS,CF-1.6,..."
1,BB_505_ADCP,Easternmost_Easting,NC_GLOBAL,[double],[17.19352]
2,BB_505_ADCP,Northernmost_Northing,NC_GLOBAL,[double],[41.3413]
3,BB_505_ADCP,Southernmost_Northing,NC_GLOBAL,[double],[41.3413]
4,BB_505_ADCP,Westernmost_Easting,NC_GLOBAL,[double],[17.19352]
...,...,...,...,...,...
76437,smartbay_obs_pco2_contros,valid_min,depth,[double],[0.0]
76438,smartbay_obs_pco2_contros,valid_min,latitude,[double],[-90.0]
76439,smartbay_obs_pco2_contros,valid_min,longitude,[double],[-180.0]
76440,smartbay_obs_pco2_contros,wfd_waterbody_name,NC_GLOBAL,[String],[Outer Galway Bay]


Exploration of **data** from each dataset offered through the ERDDAP server, by accessing as pd.DataFrame  
(less efficient than accessing as ncCF)

In [4]:
#list datasets
print(f"there are {len(EMSO_alldatasets_df['datasetID'].drop_duplicates())} datasets available via EMSO ERDDAP")

# Connect to server 
erddap = ERDDAP(server="https://erddap.emso.eu/erddap", protocol="tabledap")

summary_full_df = pd.DataFrame()
#first half (otherwise code takes too long to run)
for datasetID in EMSO_alldatasets_df['datasetID'][:len(EMSO_alldatasets_df) // 2]:
    print(f"Processing dataset: {datasetID}")
    
    try:
        # Set the dataset ID
        erddap.dataset_id = datasetID
        
        # Fetch data as a 2D dataframe with timeout handling
        dataset_df = erddap.to_pandas()
        
        # Process data        
        summ_df = data_frame(dataset_df, datasetID)
        summary_full_df = pd.concat([summary_full_df, summ_df])
        
    except Exception as e:
        print(f"There was an error for {datasetID}: {e}")

# write first half to csv file
summary_full_df.to_csv("properties/EMSO_ERDDAP_dataset_data_metadata.csv", index=False)
# view
summary_full_df
#Property ~ variables (attributes & dimensions not clear)

there are 171 datasets available via EMSO ERDDAP
Processing dataset: allDatasets
Processing dataset: EMSO_OBSEA_CTD_30min
Processing dataset: EMSO_OBSEA_Besos_Buoy_Airmar_200WX_30min
Processing dataset: EMSO_OBSEA_Besos_Buoy_Airmar_200WX_full
Processing dataset: E2M3A_METEO
Processing dataset: E2M3A_MRDT
Processing dataset: E2M3A_PCO2PROA
Processing dataset: E2M3A_PCO2PROW
Processing dataset: E2M3A_SAMI
Processing dataset: E2M3A_SBE16PLS
Processing dataset: E2M3A_SBE37O
Processing dataset: E2M3A_CTD_meteo_CO2_pH_NRT
Processing dataset: E2M3A_2021_2022_TS
Processing dataset: Emso_Azores_Chemini_IRON
Processing dataset: EMSO-AZORES_TCM3-1_2016-2017
Processing dataset: EMSO-AZORES_TCM3-1_2017-2018
Processing dataset: EMSO-AZORES_TCM3-1_2018-2019
Processing dataset: EMSO-AZORES_TCM3-1_2021-2022
Processing dataset: EMSO-AZORES_TCM3-2_2017-2018
Processing dataset: EMSO-AZORES_TCM3-2_2018-2019
Processing dataset: EMSO-AZORES_TCM3-2_2019-2020
Processing dataset: EMSO-AZORES_TCM3-2_2020-2021
Pr

Unnamed: 0,DatasetID,Property,Count,Types,Example,UniqueValues
0,allDatasets,datasetID,171,object,allDatasets,{}
1,allDatasets,accessible,171,object,public,{public}
2,allDatasets,institution,171,object,Axiom Docker Install,{}
3,allDatasets,dataStructure,171,object,table,{table}
4,allDatasets,cdm_data_type,171,object,Other,"{Other, Point, TimeSeries}"
...,...,...,...,...,...,...
3,EMSO-AZORES_EGIM_Turbidity_2017-2018,longitude (degrees_east),25469,float64,-32.27562,{-32.27562}
4,EMSO-AZORES_EGIM_Turbidity_2017-2018,depth (m),25469,float64,1700.0,{1700.0}
5,EMSO-AZORES_EGIM_Turbidity_2017-2018,DEPH_QC,25469,int64,7,{7}
6,EMSO-AZORES_EGIM_Turbidity_2017-2018,TUR4 (NTU),25469,float64,294.0,{}


In [6]:
#second half
for datasetID in EMSO_alldatasets_df['datasetID'][(len(EMSO_alldatasets_df) // 2):]:
    print(f"Processing dataset: {datasetID}")
    
    try:
        # Set the dataset ID
        erddap.dataset_id = datasetID
        
        # Fetch data as a 2D dataframe with timeout handling
        dataset_df = erddap.to_pandas()
        
        # Process data        
        summ_df = data_frame(dataset_df, datasetID)
        summary_full_df = pd.concat([summary_full_df, summ_df], ignore_index=True)
        #overwrite each time, try to get info from as much datasets as possible before error
        summary_full_df.to_csv("properties/EMSO_ERDDAP_dataset_data_metadata2.csv", index=False)
    except Exception as e:
        print(f"There was an error for {datasetID}: {e}")


summary_full_df.to_csv("properties/EMSO_ERDDAP_dataset_data_metadata2.csv", index=False)
summary_full_df
#note: doesn't list netCDF attributes

Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2011-2012
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2013-2014
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2014-2015
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2016-2017
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2017-2018
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2018-2019
Processing dataset: EMSO-AZORES_CHEMINI_Total-Iron_2019-2020
Processing dataset: EMSO-AZORES_Seamon-East_Optode-O2_2013-2014
Processing dataset: EMSO-AZORES_Seamon-East_Optode-O2_2015-2016
Processing dataset: EMSO-AZORES_Seamon-East_Optode-O2_2016-2017
Processing dataset: EMSO-AZORES_Seamon-East_Optode-O2_2012-2013
Processing dataset: EMSO-AZORES_Wetlabs_Turbidity_2011-2012
Processing dataset: EMSO-AZORES_Wetlabs_Turbidity_2012-2013
Processing dataset: EMSO-AZORES_Wetlabs_Turbidity_2013-2014
Processing dataset: EMSO-AZORES_Wetlabs_Turbidity_2015-2016
Processing dataset: EMSO-AZORES_Wetlabs_Turbidity_2016-2017
Processing datase

: 

: 

Comparing the properties between ERDDAP servers (ARGO & EMSO ERDDAP servers)

*~ similar properties but not 100% same, which can lead to confusion & result in error when combining data*

### Analysis results:

- datasets offered by the ERDDAP server directly return data  
(compared to other ERDDAP server where you need an additional step to retrieve actual files)
- requires knowledge on netCDF files 
- content of netCDF files:
    - in some cases, there is use of OrcID and urls for creator  
        --> good but inconsistent, incomplete (e.g. ROR-id for institutes) 
        --> why not for contributors, ...?  
- allDatasets dataset is a dataset that contains metadata about all the datasets offered by the ERDDAP server, the properties (~ i.e. columns) with which this metadata is described is the same between ERDDAP servers (cf. comparing to EMSO-ERIC ERDDAP server)  