# Finding datasets of interest

In [1]:
import datetime
import pandas as pd
from erddapy import ERDDAP
from tqdm.notebook import tqdm

We can use ERDDAP's built in search function to find datasets that match a keyword

In [2]:
e = ERDDAP(
    server="https://osmc.noaa.gov/erddap"
)
url = e.get_search_url(search_for="buoys", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title", "Summary"]]

Unnamed: 0,Dataset ID,Title,Summary
0,pmelTaoDyAirt,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 197...",This dataset has daily Air Temperature data fr...
1,pmelTaoDySst,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 197...",This dataset has daily Sea Surface Temperature...
2,pmelTaoDyT,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 197...",This dataset has daily Temperature data from t...
3,pmelTaoDyW,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 197...",This dataset has daily Wind data from the\nTAO...
4,pmelTaoDyS,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 198...",This dataset has daily Salinity data from the\...
5,pmelTaoDyAdcp,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 198...",This dataset has daily Acoustic Doppler Curren...
6,pmelTaoDyTau,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 198...",This dataset has daily Wind Stress data from t...
7,pmelTaoDySss,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 199...",This dataset has daily Sea Surface Salinity da...
8,pmelTaoDyRain,"TAO/TRITON, RAMA, and PIRATA Buoys, Daily, 199...",This dataset has daily Precipitation data from...
9,drifter_6hour_qc,Global Drifter Program - 6 Hour Interpolated Q...,Global Drifter Program 6-hourly drifting buoy ...


In [3]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap"
)
url = e.get_search_url(search_for="cable", response="csv")

df = pd.read_csv(url)
df["Dataset ID"]

0         nrt_SEA067_M26
1         nrt_SEA067_M27
2         nrt_SEA067_M29
3         nrt_SEA066_M45
4         nrt_SEA067_M30
5         nrt_SEA067_M32
6         nrt_SEA066_M41
7         nrt_SEA066_M42
8         nrt_SEA066_M43
9         nrt_SEA076_M13
10         nrt_SEA079_M9
11        nrt_SEA078_M11
12        nrt_SEA067_M37
13        nrt_SEA067_M39
14    delayed_SEA067_M26
15    delayed_SEA067_M27
16    delayed_SEA067_M29
17    delayed_SEA067_M30
18    delayed_SEA067_M32
19    delayed_SEA066_M43
20    delayed_SEA066_M41
21    delayed_SEA066_M42
22    delayed_SEA076_M13
23     delayed_SEA079_M9
24    delayed_SEA078_M11
25    delayed_SEA067_M37
26    delayed_SEA067_M39
27       adcp_SEA067_M26
28       adcp_SEA067_M27
29       adcp_SEA067_M29
30        adcp_SEA079_M9
31       adcp_SEA076_M13
32       adcp_SEA067_M30
33       adcp_SEA067_M32
34       adcp_SEA067_M37
35       adcp_SEA067_M39
36       adcp_SEA066_M41
37       adcp_SEA066_M42
38       adcp_SEA066_M43
39       adcp_SEA078_M11


### Search with allDatasets

For a more sophisticated search, we can use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all glider missions. We will use only nrt datasets to speed up the process of examining metadata

In [4]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap",
    protocol="tabledap",
)

# Fetch dataset list
e.response = "csv"
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas(parse_dates=['minTime (UTC)', 'maxTime (UTC)'])

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)

# Keep a susbset of useful columns
df_datasets = df_datasets[[
 'institution',
 'dataStructure',
 'cdm_data_type',
 'minLongitude (degrees_east)',
 'maxLongitude (degrees_east)',
 'minLatitude (degrees_north)',
 'maxLatitude (degrees_north)',
 'minAltitude (m)',
 'maxAltitude (m)',
 'minTime (UTC)',
 'maxTime (UTC)',
 'infoUrl',
]]

df_datasets = df_datasets[df_datasets.index.str[:3] == "nrt"]
print(f"Analysing {len(df_datasets)} nrt datasets")

Analysing 154 nrt datasets


**side note** All VOTO datasets are represented twice on the ERDDAP: the `nrt_` version contains data communicated in near real time by the glider. `delayed_` is the dataset at full resolution downloaded from the glider after recovery. This is typically ~ 500 times larger than `nrt_`

In [5]:
df_datasets

Unnamed: 0_level_0,institution,dataStructure,cdm_data_type,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime (UTC),maxTime (UTC),infoUrl
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
nrt_SEA068_M27,Voice of the Ocean Foundation,table,TimeSeries,19.901883,19.982300,58.199850,58.265983,0.102737,-160.836667,2022-07-27 17:09:41+00:00,2022-07-31 03:51:42+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M8,Voice of the Ocean Foundation,table,TimeSeries,15.736967,16.323750,55.532217,55.746117,0.072139,-75.772686,2022-10-05 15:04:10+00:00,2022-10-19 20:25:55+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M9,Voice of the Ocean Foundation,table,TimeSeries,15.739817,16.296533,55.544550,55.661333,0.054402,-70.144741,2022-10-20 15:11:45+00:00,2022-10-31 13:05:46+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M13,Voice of the Ocean Foundation,table,TimeSeries,19.400867,20.098333,58.250300,58.517300,0.266205,-164.838439,2023-01-13 02:09:24+00:00,2023-02-14 10:34:35+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M16,Voice of the Ocean Foundation,table,TimeSeries,19.786233,19.967400,57.992133,58.514050,0.156828,-190.680438,2023-04-11 12:59:54+00:00,2023-05-16 11:16:28+00:00,https://cfconventions.org/cf-conventions/v1.6....
...,...,...,...,...,...,...,...,...,...,...,...,...
nrt_SEA061_M62,Voice of the Ocean Foundation,table,TimeSeries,10.796117,11.466467,57.613517,58.199350,-0.011295,-192.699238,2022-07-19 12:05:47+00:00,2022-08-12 07:13:14+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M63,Voice of the Ocean Foundation,table,TimeSeries,10.849650,11.525550,57.582883,58.112233,0.154361,-185.037862,2022-08-15 08:05:08+00:00,2022-09-07 00:18:26+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA078_M11,Voice of the Ocean Foundation,table,TimeSeries,19.510350,20.092867,58.146167,58.514700,0.036061,-173.116164,2023-03-16 10:19:34+00:00,2023-04-11 11:56:43+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA078_M12,Voice of the Ocean Foundation,table,TimeSeries,19.829033,20.360400,57.249250,58.138117,0.159316,-235.173925,2023-05-16 07:43:20+00:00,2023-06-20 09:21:55+00:00,https://cfconventions.org/cf-conventions/v1.6....


### Find datasets from deployments that lasted longer than 30 days

In [6]:
df_datasets["endurance"] = df_datasets['maxTime (UTC)'] - df_datasets['minTime (UTC)']
min_days = 30
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

datasetID
nrt_SEA076_M13   32 days 08:25:11
nrt_SEA076_M16   34 days 22:16:34
nrt_SEA076_M17   37 days 17:14:37
nrt_SEA045_M41   34 days 01:28:01
nrt_SEA045_M43   32 days 13:01:05
nrt_SEA045_M44   31 days 22:47:09
nrt_SEA079_M9    30 days 01:34:14
nrt_SEA079_M11   34 days 15:25:50
nrt_SEA079_M12   38 days 00:11:17
nrt_SEA044_M32   34 days 00:20:23
nrt_SEA044_M34   32 days 17:56:00
nrt_SEA044_M35   31 days 18:45:38
nrt_SEA077_M22   34 days 22:07:17
nrt_SEA078_M12   35 days 01:38:35
Name: endurance, dtype: timedelta64[ns]

### Which glider missions extended to a depth of greater than 150 m in the seas northeast of Gotland?

In [7]:
min_altitude = -150 # note the sign!
min_easting = 19
min_northing = 58
mask = (df_datasets['maxAltitude (m)'] < min_altitude).values \
* (df_datasets['maxLongitude (degrees_east)'] > min_easting).values \
* (df_datasets['maxLatitude (degrees_north)'] > min_northing).values
df_datasets[mask].index.values

array(['nrt_SEA068_M27', 'nrt_SEA076_M13', 'nrt_SEA076_M16',
       'nrt_SEA069_M9', 'nrt_SEA079_M9', 'nrt_SEA079_M12',
       'nrt_SEA067_M26', 'nrt_SEA067_M27', 'nrt_SEA067_M29',
       'nrt_SEA067_M30', 'nrt_SEA067_M32', 'nrt_SEA067_M37',
       'nrt_SEA067_M39', 'nrt_SEA066_M41', 'nrt_SEA066_M42',
       'nrt_SEA066_M43', 'nrt_SEA077_M21', 'nrt_SEA077_M22',
       'nrt_SEA077_M24', 'nrt_SEA078_M11', 'nrt_SEA078_M12'], dtype=object)

---------------------------
### References

VOTO ERDDAP https://erddap.observations.voiceoftheocean.org/erddap/index.html 

More info on using ERDDAP's inbuilt search https://ioos.github.io/erddapy/01b-tabledap-output.html