In [2]:
from erddapy import ERDDAP
import pandas as pd
import requests
import netCDF4 as nc

### ARGO ERDDAP server

In [3]:
# analysed endpoint: 
endpoint_url = "https://erddap.ifremer.fr/erddap"

### Analysis results:

- data is findable, not always accessible 
    - for some listed datasets: an additional step is needed to open the actual netCDF files 
    (though the documentation mentions this is "OpenDAP on top of NetCDF files" still extra step needed)
    - at moment of running the code, some datasets were not accessible 
    - 'log in' column in 'allDatasets' dataset indicates that not all data is freely available

- data granularity
    - some datasets (e.g. ArgaFloats) contain file paths in 'file' column --> retrieval of actual data requires additional steps
    - other datasets directly return measurement data
      

- semantics:
    - interoperability at higher level --> allDatasets dataset offers metadata about the datasets that are available through the ERDDAP server. Properties/columns used to describe this metadata are the same as in other ERDDAP servers (eg. the EMSO-ERIC ERDDAP server).  
    - other datasets offered by ERDDAP server contain different properties --> similar columns/properties but not defined through external standard terms
        - more prone towards wrongly combining different data  



In [4]:
#connect to server
argoERDDAP = ERDDAP(
    server=endpoint_url,
    protocol="tabledap"
)

In [5]:
#get list of dataset IDs
datasets = argoERDDAP.get_search_url(response='csv')
df = pd.read_csv(datasets)
for dataset_id in df['Dataset ID']:
    print(dataset_id)

allDatasets
ArgoFloats
ArgoFloats-synthetic-BGC
ArgoFloats-reference
ArgoFloats-index
ArgoFloats-reference-CTD
drifter_hourly_qc
drifter_6hour_qc
copernicus-fos
ariane_trajectories_qualitative
coast_status
OceanGlidersGDACTrajectories
SDC_BAL_AGG_V2
SDC_BLS_AGG_V2
SDC_GLO_AGG_V2
SDC_MED_AGG_V2
SDC_NATL_AGG_V2
SDC_BLS_DATA_TS_V1


**Exploration of the allDatasets dataset**  
~ contains metadata about the datasets offered by the ERDDAP server 

In [6]:
#Set a dataset ID
argoERDDAP.dataset_id = "allDatasets"

In [7]:
#Get data (as 2D dataframe)
argoERDDAP_allDatasets_df = argoERDDAP.to_pandas()
argoERDDAP_allDatasets_df

Unnamed: 0,datasetID,accessible,institution,dataStructure,cdm_data_type,class,title,minLongitude (degrees_east),maxLongitude (degrees_east),longitudeSpacing (degrees_east),...,fgdc,iso19115,metadata,sourceUrl,infoUrl,rss,email,testOutOfDate,outOfDate,summary
0,allDatasets,public,Ifremer,table,Other,EDDTableFromAllDatasets,* The List of All Active Datasets in this ERDD...,,,,...,,,https://erddap.ifremer.fr/erddap/info/allDatas...,https://erddap.ifremer.fr/erddap,https://erddap.ifremer.fr/erddap,https://erddap.ifremer.fr/erddap/rss/allDatase...,https://erddap.ifremer.fr/erddap/subscriptions...,,,This dataset is a table which has a row of inf...
1,OACP-Argo-Global,public,LOPS/Ifremer,grid,Grid,EDDGridFromNcFiles,2000-2015 climatology of the Subtropical Mode ...,-242.156,129.844,1.0,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/OACP-Arg...,(local files),https://doi.org/10.17882/56503,https://erddap.ifremer.fr/erddap/rss/OACP-Argo...,https://erddap.ifremer.fr/erddap/subscriptions...,,,Maps of properties from OAC-P estimates. Therm...
2,ArgoFloats,public,Argo,table,TrajectoryProfile,EDDTableFromMultidimNcFiles,Argo Float Measurements,-179.99942,180.0,,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/ArgoFloa...,(local files),https://argo.ucsd.edu/,https://erddap.ifremer.fr/erddap/rss/ArgoFloat...,https://erddap.ifremer.fr/erddap/subscriptions...,,,Argo float vertical profiles from Coriolis Glo...
3,ArgoFloats-synthetic-BGC,public,Argo,table,TrajectoryProfile,EDDTableFromMultidimNcFiles,Argo float synthetic vertical profiles : BGC data,-999.999,181.706,,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/ArgoFloa...,(local files),http://www.argodatamgt.org/Documentation,https://erddap.ifremer.fr/erddap/rss/ArgoFloat...,https://erddap.ifremer.fr/erddap/subscriptions...,now-5days,0.585791,Argo float synthetic vertical profiles : BGC data
4,ArgoFloats-reference,public,Argo,table,Other,EDDTableFromMultidimNcFiles,Argo Reference Measurements,-180.0,179.999487,,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/ArgoFloa...,(local files),http://www.argodatamgt.org/DMQC/Reference-data...,https://erddap.ifremer.fr/erddap/rss/ArgoFloat...,https://erddap.ifremer.fr/erddap/subscriptions...,,,Argo float vertical profiles to be used in DMQ...
5,ArgoFloats-index,public,Argo,table,Other,EDDTableFromAsciiFiles,ArgoFloats index,-999.999,346.813,,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/ArgoFloa...,ftp://ftp.ifremer.fr/ifremer/argo/ar_index_glo...,http://www.argodatamgt.org/DMQC/Reference-data...,https://erddap.ifremer.fr/erddap/rss/ArgoFloat...,https://erddap.ifremer.fr/erddap/subscriptions...,,,Argo detailed index. Gathers data available at...
6,ArgoFloats-reference-CTD,log in,Argo,table,Other,EDDTableFromMultidimNcFiles,CTD Reference Measurements,,,,...,,,,,,,,,,"Conductivity, Temperature, Depth (CTD) Referen..."
7,SST_Anomalies_Caledonie,public,Jet Propulsion Laboratory,grid,Grid,EDDGridFromNcFiles,"Daily MUR SST, Final product",155.01,175.01,0.01,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/SST_Anom...,(local files),https://podaac.jpl.nasa.gov/ws/metadata/datase...,https://erddap.ifremer.fr/erddap/rss/SST_Anoma...,https://erddap.ifremer.fr/erddap/subscriptions...,now-109days,8.870765,"A merged, multi-sensor L4 Foundation Sea Surfa..."
8,OS_DYFAMED_1994-2014_D_TSO2,public,IMEV Villefranche-sur-mer,grid,Profile,EDDGridFromNcFiles,EMSO Ligure DYFAMED Time Series from 1994 to 2...,,,,...,,,https://erddap.ifremer.fr/erddap/info/OS_DYFAM...,(local files),???,https://erddap.ifremer.fr/erddap/rss/OS_DYFAME...,https://erddap.ifremer.fr/erddap/subscriptions...,,,EMSO Ligure DYFAMED Time Series from 1994 to 2...
9,drifter_hourly_qc,public,NOAA Atlantic Oceanographic and Meteorological...,table,Trajectory,EDDTableFromErddap,Global Drifter Program - 1 Hour Interpolated Q...,-180.0,180.0,,...,https://erddap.ifremer.fr/erddap/metadata/fgdc...,https://erddap.ifremer.fr/erddap/metadata/iso1...,https://erddap.ifremer.fr/erddap/info/drifter_...,(local files),https://www.aoml.noaa.gov/phod/dac/dirall.html,https://erddap.ifremer.fr/erddap/rss/drifter_h...,https://erddap.ifremer.fr/erddap/subscriptions...,,,Global Drifter Program hourly drifting buoy co...


In [9]:
#explore columns
argoERDDAP_allDatasets_df.columns

Index(['datasetID', 'accessible', 'institution', 'dataStructure',
       'cdm_data_type', 'class', 'title', 'minLongitude (degrees_east)',
       'maxLongitude (degrees_east)', 'longitudeSpacing (degrees_east)',
       'minLatitude (degrees_north)', 'maxLatitude (degrees_north)',
       'latitudeSpacing (degrees_north)', 'minAltitude (m)', 'maxAltitude (m)',
       'minTime (UTC)', 'maxTime (UTC)', 'timeSpacing (seconds)', 'griddap',
       'subset', 'tabledap', 'MakeAGraph', 'sos', 'wcs', 'wms', 'files',
       'fgdc', 'iso19115', 'metadata', 'sourceUrl', 'infoUrl', 'rss', 'email',
       'testOutOfDate', 'outOfDate', 'summary'],
      dtype='object')

In [10]:
#See if other metadata listed in allDatasets dataset --> metadata available in df.summary
for line in argoERDDAP_allDatasets_df.summary:
    print(line)

This dataset is a table which has a row of information for each dataset currently active in this ERDDAP.
Maps of properties from OAC-P estimates. Thermohaline computing using gsw oceanographic toolbox
Argo float vertical profiles from Coriolis Global Data Assembly Centres\n(GDAC). Argo is an international collaboration that collects high-quality\ntemperature and salinity profiles from the upper 2000m of the ice-free\nglobal ocean and currents from intermediate depths. The data come from\nbattery-powered autonomous floats that spend most of their life drifting\nat depth where they are stabilised by being neutrally buoyant at the\n"parking depth" pressure by having a density equal to the ambient pressure\nand a compressibility that is less than that of sea water. At present there\nare several models of profiling float used in Argo. All work in a similar\nfashion but differ somewhat in their design characteristics. At typically\n10-day intervals, the floats pump fluid into an external bla

In [11]:
# List unique values for each columns
for col in argoERDDAP_allDatasets_df.columns:
    if col not in ['datasetID', 'summary', 'title', 'minLongitude (degrees_east)','maxLongitude (degrees_east)', 'longitudeSpacing (degrees_east)','minLatitude (degrees_north)', 'maxLatitude (degrees_north)','latitudeSpacing (degrees_north)', 'minAltitude (m)', 'maxAltitude (m)','minTime (UTC)', 'maxTime (UTC)', 'timeSpacing (seconds)']:
        print(col)
        print(argoERDDAP_allDatasets_df[col].unique())

accessible
['public' 'log in']
institution
['Ifremer' 'LOPS/Ifremer' 'Argo' 'Jet Propulsion Laboratory'
 'IMEV Villefranche-sur-mer'
 'NOAA Atlantic Oceanographic and Meteorological Laboratory'
 'AZTI; Ifremer'
 'UAF; ONC; CORDC - Coastal Observing Research and Development Center'
 'IFREMER' 'IRD' 'SeaDataNet' 'GEBCO, BODC .']
dataStructure
['table' 'grid']
cdm_data_type
['Other' 'Grid' 'TrajectoryProfile' 'Profile' 'Trajectory' 'TimeSeries'
 'Point']
class
['EDDTableFromAllDatasets' 'EDDGridFromNcFiles'
 'EDDTableFromMultidimNcFiles' 'EDDTableFromAsciiFiles'
 'EDDTableFromErddap' 'EDDTableFromNcFiles' 'EDDGridSideBySide']
griddap
[nan 'https://erddap.ifremer.fr/erddap/griddap/OACP-Argo-Global'
 'https://erddap.ifremer.fr/erddap/griddap/SST_Anomalies_Caledonie'
 'https://erddap.ifremer.fr/erddap/griddap/OS_DYFAMED_1994-2014_D_TSO2'
 'https://erddap.ifremer.fr/erddap/griddap/HF_75c7_5b60_95d8'
 'https://erddap.ifremer.fr/erddap/griddap/HF_ac49_84ad_3eb6'
 'https://erddap.ifremer.fr/erdd

**Exploration of the ArgoFloats-index dataset**

In [12]:
#Set a different dataset ID
argoERDDAP.dataset_id = "ArgoFloats-index"

In [13]:
#try to get various kinds of info
search_url = argoERDDAP.get_search_url()
download_url = argoERDDAP.get_download_url()
info_url = argoERDDAP.get_info_url()

print(f"search_url: {search_url},\ndownload_url: {download_url},\ninfo_url: {info_url}")

search_url: https://erddap.ifremer.fr/erddap/search/advanced.html?page=1&itemsPerPage=1000&protocol=tabledap&cdm_data_type=(ANY)&institution=(ANY)&ioos_category=(ANY)&keywords=(ANY)&long_name=(ANY)&standard_name=(ANY)&variableName=(ANY)&minLon=(ANY)&maxLon=(ANY)&minLat=(ANY)&maxLat=(ANY)&minTime=&maxTime=,
download_url: https://erddap.ifremer.fr/erddap/tabledap/ArgoFloats-index.html?,
info_url: https://erddap.ifremer.fr/erddap/info/ArgoFloats-index/index.html


In [14]:
# get & view data (as 2D DataFrame)
argoERDDAP_argoFloats_idx_df = argoERDDAP.to_pandas()
argoERDDAP_argoFloats_idx_df

Unnamed: 0,file,date (UTC),latitude (degrees_north),longitude (degrees_east),ocean,profiler_type,institution,date_update (UTC)
0,aoml/13857/profiles/R13857_001.nc,1997-07-29T20:03:00Z,0.267,-16.032,A,845,AO,2018-10-11T18:05:20Z
1,aoml/13857/profiles/R13857_002.nc,1997-08-09T19:21:12Z,0.072,-17.659,A,845,AO,2018-10-11T18:05:21Z
2,aoml/13857/profiles/R13857_003.nc,1997-08-20T18:45:45Z,0.543,-19.622,A,845,AO,2018-10-11T18:05:21Z
3,aoml/13857/profiles/R13857_004.nc,1997-08-31T19:39:05Z,1.256,-20.521,A,845,AO,2018-10-11T18:05:21Z
4,aoml/13857/profiles/R13857_005.nc,1997-09-11T18:58:08Z,0.720,-20.768,A,845,AO,2018-10-11T18:05:21Z
...,...,...,...,...,...,...,...,...
3013088,nmdis/2901633/profiles/R2901633_067.nc,2013-05-01T04:38:45Z,27.462,139.107,P,841,NM,2013-05-07T10:34:43Z
3013089,nmdis/2901633/profiles/R2901633_068.nc,2013-05-11T04:33:38Z,27.432,138.840,P,841,NM,2013-05-11T16:57:23Z
3013090,nmdis/2901633/profiles/R2901633_069.nc,2013-05-21T04:26:31Z,27.692,138.677,P,841,NM,2013-05-21T17:01:39Z
3013091,nmdis/2901633/profiles/R2901633_070.nc,2013-05-31T04:45:25Z,27.895,138.465,P,841,NM,2013-05-31T18:15:16Z


In [15]:
#explore columns
argoERDDAP_argoFloats_idx_df.columns

Index(['file', 'date (UTC)', 'latitude (degrees_north)',
       'longitude (degrees_east)', 'ocean', 'profiler_type', 'institution',
       'date_update (UTC)'],
      dtype='object')

In [16]:
#getting an actual data file
base_url = "https://data-argo.ifremer.fr/dac/" #need to look up and hardcode 
filepath = argoERDDAP_argoFloats_idx_df.loc[0, 'file']
filename = filepath.split('/')[-1]
url = base_url+filepath

response = requests.get(url)
with open(filename, 'wb') as file:
    file.write(response.content)

dataset = nc.Dataset(filename, 'r')
dataset

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    title: Argo float vertical profile
    institution: AOML
    source: Argo float
    history: 2018-10-11T18:05:20Z creation
    references: http://www.argodatamgt.org/Documentation
    comment: free text
    user_manual_version: 3.2
    Conventions: Argo-3.2 CF-1.6
    featureType: trajectoryProfile
    dimensions(sizes): DATE_TIME(14), STRING256(256), STRING64(64), STRING32(32), STRING16(16), STRING8(8), STRING4(4), STRING2(2), N_PROF(1), N_PARAM(2), N_LEVELS(112), N_CALIB(1), N_HISTORY(2)
    variables(dimensions): |S1 DATA_TYPE(STRING16), |S1 FORMAT_VERSION(STRING4), |S1 HANDBOOK_VERSION(STRING4), |S1 REFERENCE_DATE_TIME(DATE_TIME), |S1 DATE_CREATION(DATE_TIME), |S1 DATE_UPDATE(DATE_TIME), |S1 PLATFORM_NUMBER(N_PROF, STRING8), |S1 PROJECT_NAME(N_PROF, STRING64), |S1 PI_NAME(N_PROF, STRING64), |S1 STATION_PARAMETERS(N_PROF, N_PARAM, STRING16), int32 CYCLE_NUMBER(N_PROF), |S1 DIRECTION(

In [18]:
#list information on the dataset
print(f"GLOBAL ATTRIBUTES: {list(dataset.ncattrs())}")
print(f"DIMENSIONS: {list(dataset.dimensions.keys())}")
print(f"VARIABLES: {list(dataset.variables.keys())}")

GLOBAL ATTRIBUTES: ['title', 'institution', 'source', 'history', 'references', 'comment', 'user_manual_version', 'Conventions', 'featureType']
DIMENSIONS: ['DATE_TIME', 'STRING256', 'STRING64', 'STRING32', 'STRING16', 'STRING8', 'STRING4', 'STRING2', 'N_PROF', 'N_PARAM', 'N_LEVELS', 'N_CALIB', 'N_HISTORY']
VARIABLES: ['DATA_TYPE', 'FORMAT_VERSION', 'HANDBOOK_VERSION', 'REFERENCE_DATE_TIME', 'DATE_CREATION', 'DATE_UPDATE', 'PLATFORM_NUMBER', 'PROJECT_NAME', 'PI_NAME', 'STATION_PARAMETERS', 'CYCLE_NUMBER', 'DIRECTION', 'DATA_CENTRE', 'DC_REFERENCE', 'DATA_STATE_INDICATOR', 'DATA_MODE', 'PLATFORM_TYPE', 'FLOAT_SERIAL_NO', 'FIRMWARE_VERSION', 'WMO_INST_TYPE', 'JULD', 'JULD_QC', 'JULD_LOCATION', 'LATITUDE', 'LONGITUDE', 'POSITION_QC', 'POSITIONING_SYSTEM', 'VERTICAL_SAMPLING_SCHEME', 'CONFIG_MISSION_NUMBER', 'PROFILE_PRES_QC', 'PROFILE_TEMP_QC', 'PRES', 'PRES_QC', 'PRES_ADJUSTED', 'PRES_ADJUSTED_QC', 'PRES_ADJUSTED_ERROR', 'TEMP', 'TEMP_QC', 'TEMP_ADJUSTED', 'TEMP_ADJUSTED_QC', 'TEMP_ADJUST

In [22]:
#for the list of variables, 
# list the dimension(s) in which they occur, their shape and associated units 
for var in list(dataset.variables):
    if var in dataset.variables:
        variable = dataset[var]
        print(var.upper())

        print(f"dimensions: {variable.dimensions}")
        print(f"shape: {variable.shape}")
        try:
            print(f"units: {variable.units}")
        except:
            pass

DATA_TYPE
dimensions: ('STRING16',)
shape: (16,)
FORMAT_VERSION
dimensions: ('STRING4',)
shape: (4,)
HANDBOOK_VERSION
dimensions: ('STRING4',)
shape: (4,)
REFERENCE_DATE_TIME
dimensions: ('DATE_TIME',)
shape: (14,)
DATE_CREATION
dimensions: ('DATE_TIME',)
shape: (14,)
DATE_UPDATE
dimensions: ('DATE_TIME',)
shape: (14,)
PLATFORM_NUMBER
dimensions: ('N_PROF', 'STRING8')
shape: (1, 8)
PROJECT_NAME
dimensions: ('N_PROF', 'STRING64')
shape: (1, 64)
PI_NAME
dimensions: ('N_PROF', 'STRING64')
shape: (1, 64)
STATION_PARAMETERS
dimensions: ('N_PROF', 'N_PARAM', 'STRING16')
shape: (1, 2, 16)
CYCLE_NUMBER
dimensions: ('N_PROF',)
shape: (1,)
DIRECTION
dimensions: ('N_PROF',)
shape: (1,)
DATA_CENTRE
dimensions: ('N_PROF', 'STRING2')
shape: (1, 2)
DC_REFERENCE
dimensions: ('N_PROF', 'STRING32')
shape: (1, 32)
DATA_STATE_INDICATOR
dimensions: ('N_PROF', 'STRING4')
shape: (1, 4)
DATA_MODE
dimensions: ('N_PROF',)
shape: (1,)
PLATFORM_TYPE
dimensions: ('N_PROF', 'STRING32')
shape: (1, 32)
FLOAT_SERIAL_N

**Exploration of the HF_75c7_5b60_95d8 dataset**

In [23]:
#Set dataset ID
argoERDDAP.dataset_id = "HF_75c7_5b60_95d8"

In [24]:
#get data (as dataframe)
argoERDDAP_HF_75c7_5b60_95d8_df = argoERDDAP.to_pandas()
argoERDDAP_HF_75c7_5b60_95d8_df

HTTPError: Error {
    code=404;
    message="Not Found: Currently unknown datasetID=HF_75c7_5b60_95d8";
}


**Exploration of the ariane_trajectories_qualitative dataset**

In [29]:
#Set dataset ID
argoERDDAP.dataset_id = "ariane_trajectories_qualitative"

In [30]:
argoERDDAP_ariane_trajectories_qualitative_df = argoERDDAP.to_pandas()
argoERDDAP_ariane_trajectories_qualitative_df

Unnamed: 0,init_x (No dimension),init_y (No dimension),init_z (No dimension),init_t (See global attributes...),init_age (seconds),init_transp (m3/s),final_x (No dimension),final_y (No dimension),final_z (No dimension),final_t (See global attributes...),final_age (seconds),final_transp (m3/s)
0,182.1642,272.3743,1.5,0.5,0.0,1.0,448.000000,296.000000,1.5,36504.500000,1.314144e+08,1.0
1,181.8065,272.3147,1.5,0.5,0.0,1.0,614.986243,719.000000,1.5,29549.785838,1.063774e+08,1.0
2,179.1697,274.1630,1.5,0.5,0.0,1.0,1.000000,201.564711,1.5,12502.490471,4.500717e+07,1.0
3,180.7601,274.8874,1.5,0.5,0.0,1.0,351.000000,273.000000,1.5,36504.500000,1.314144e+08,1.0
4,181.1447,275.1676,1.5,0.5,0.0,1.0,362.000000,364.000000,1.5,36504.500000,1.314144e+08,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1889836,269.9379,219.1329,1.5,36504.5,0.0,1.0,268.089619,219.427038,1.5,36528.500000,8.640000e+04,1.0
1889837,270.5438,217.3731,1.5,36504.5,0.0,1.0,270.343088,217.531382,1.5,36528.500000,8.640000e+04,1.0
1889838,270.3280,218.7179,1.5,36504.5,0.0,1.0,269.270101,219.120328,1.5,36528.500000,8.640000e+04,1.0
1889839,272.3119,219.0997,1.5,36504.5,0.0,1.0,270.394682,219.668109,1.5,36528.500000,8.640000e+04,1.0


**Exploration of the SDC_MED_CLIM_TS_V2_s_pre_post_emt dataset**

In [31]:
#Set dataset ID
argoERDDAP.dataset_id = "SDC_MED_CLIM_TS_V2_s_pre_post_emt"

In [32]:
#get data (as dataframe)
SDC_MED_CLIM_TS_V2_s_pre_post_emt_df = argoERDDAP.to_pandas()
SDC_MED_CLIM_TS_V2_s_pre_post_emt_df

HTTPError: Error {
    code=404;
    message="Not Found: Currently unknown datasetID=SDC_MED_CLIM_TS_V2_s_pre_post_emt";
}
