# Finding datasets of interest

In [1]:
import datetime
import pandas as pd
import numpy as np
from erddapy import ERDDAP
from tqdm.notebook import tqdm

We can use ERDDAP's built in search function to find datasets that match a keyword

### Find drifter datasets from OSMC

In [2]:
e = ERDDAP(
    server="https://osmc.noaa.gov/erddap",
    protocol="tabledap"
)
url = e.get_search_url(search_for="drifter", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title"]]

Unnamed: 0,Dataset ID,Title
0,drifter_6hour_qc,Global Drifter Program - 6 Hour Interpolated Q...
1,drifter_hourly_qc,Global Drifter Program - Hourly Interpolated Q...


### Find argo floats with chlorophyll and CDOM data

In [3]:
e = ERDDAP(
    server="https://erddap.ifremer.fr/erddap",
)
url = e.get_search_url(search_for="chlorophyll CDOM", response="csv")

df = pd.read_csv(url)
df[["Dataset ID", "Title"]]

Unnamed: 0,Dataset ID,Title
0,OceanGlidersGDACTrajectories,OceanGliders GDAC trajectories
1,ArgoFloats-synthetic-BGC,Argo float synthetic vertical profiles : BGC data


### Search with allDatasets

For a more sophisticated search, we can use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all datasets. 

### Get metadata for IOOS glider datasets

In [4]:
e = ERDDAP(
    server="https://gliders.ioos.us/erddap",
    protocol="tabledap",
)

# Fetch dataset list
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas()

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)
df_datasets["minTime"] = pd.to_datetime(df_datasets['minTime (UTC)'].str[:-1])
df_datasets["maxTime"] = pd.to_datetime(df_datasets['maxTime (UTC)'].str[:-1])

# Keep a susbset of useful columns
df_datasets = df_datasets[[
 'institution',
 'minLongitude (degrees_east)',
 'maxLongitude (degrees_east)',
 'minLatitude (degrees_north)',
 'maxLatitude (degrees_north)',
 'minAltitude (m)',
 'maxAltitude (m)',
 'minTime',
 'maxTime',
]]

print(f"Found {len(df_datasets)} datasets")

Found 1647 datasets


In [5]:
df_datasets

Unnamed: 0_level_0,institution,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime,maxTime
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
amelia-20180501T0000,Virginia Institute of Marine Science - William...,-75.079579,-74.445788,37.404436,37.539598,-1.040000e+00,-351.1800,2018-05-01 14:58:48,2018-05-14 12:29:12
amelia-20200825T1929,Virginia Institute of Marine Science - William...,-75.360315,-74.443807,36.334915,37.353705,2.511368e-02,-323.9919,2020-08-25 19:59:18,2020-09-25 14:32:48
amelia-20201015T1436,Virginia Institute of Marine Science - William...,-74.947858,-74.405013,37.042386,37.662572,-2.014898e-02,-344.7628,2020-10-15 14:44:15,2020-10-22 06:36:43
amlr01-20181216T0641-delayed,NOAA SWFSC Antarctic Ecosystem Research Division,-61.756371,-56.996472,-63.465319,-62.203002,-1.315100e-14,-1003.8600,2018-12-16 16:05:20,2019-03-08 09:37:33
amlr01-20191206T0452-delayed,NOAA SWFSC Antarctic Ecosystem Research Division,-61.667188,-57.433161,-63.555934,-62.186826,-3.265242e-15,-962.1450,2019-12-06 14:45:17,2020-03-12 14:37:18
...,...,...,...,...,...,...,...,...,...
UW646-20210816T0000,Oregon State University,-125.395030,-124.601065,43.850373,44.653477,4.059241e-01,-996.5469,2021-08-16 18:57:36,2021-08-24 23:59:59
UW646-20211112T0000,Oregon State University,-128.973918,-124.363708,40.820313,41.177692,5.003096e-01,-1005.2760,2021-11-12 18:53:18,2022-06-16 14:33:31
UW646-20220907T0000,Oregon State University,-129.101990,-124.354081,40.805190,41.240848,6.568508e-01,-994.1479,2022-07-29 04:21:11,2023-01-26 13:31:17
UW685-20230125T0000,Oregon State University,-129.214460,-124.350906,40.834967,41.117200,3.087025e-01,-993.3872,2023-01-26 20:10:27,2023-08-10 04:58:13


### Which gliders have uploaded data in the last 6 hours?

In [6]:
df_datasets[ np.datetime64('now') - df_datasets.maxTime < datetime.timedelta(hours=6)]

Unnamed: 0_level_0,institution,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime,maxTime
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bill-20230802T1140,University of Connecticut,-73.718571,-73.499541,40.887233,40.998007,0.138916,-22.88993,2023-08-02 12:30:05,2023-08-10 09:43:21
ce_311-20230616T1819,OOI Coastal Endurance,-125.99751,-124.094224,44.368149,44.68872,-0.029757,-196.9691,2023-06-16 18:35:00,2023-08-10 10:17:01
ng296-20230726T172700,Naval Oceanographic Office,-78.631205,-76.255053,29.876715,31.219815,-0.009893,-975.2805,2023-07-26 18:08:01,2023-08-10 07:06:40
ng601-20230803T155100,Naval Oceanographic Office,-85.422891,-84.300814,27.422158,27.73805,-0.069463,-797.9359,2023-08-03 15:57:19,2023-08-10 08:09:37
sbu02-20230803T1544,Stony Brook University,-73.345835,-72.66113,40.010509,40.465548,-0.059538,-57.36876,2023-08-03 15:47:21,2023-08-10 08:14:19
SG664-20230719T1323,National Oceanic and Atmospheric Administratio...,-69.618307,-69.147036,15.785064,18.338595,0.44274,-904.9946,2023-07-19 13:29:34,2023-08-10 08:55:59
SG665-20230711T1314,National Oceanic and Atmospheric Administratio...,-67.29314,-66.914778,16.209975,17.863989,0.236631,-907.0547,2023-07-11 13:18:58,2023-08-10 08:29:11
SG666-20230711T1331,National Oceanic and Atmospheric Administratio...,-67.054038,-66.200716,16.232829,17.864093,0.35887,-904.0105,2023-07-11 13:35:27,2023-08-10 07:52:29
SG668-20230802T1445,National Oceanic and Atmospheric Administratio...,-76.080444,-74.971027,24.649643,24.937351,0.068813,-904.0661,2023-08-02 14:49:04,2023-08-10 08:33:12
SG684-20230711T1347,National Oceanic and Atmospheric Administratio...,-67.052246,-64.920233,15.823341,17.864077,0.325985,-903.6865,2023-07-11 13:52:01,2023-08-10 06:45:29


### Find datasets from deployments that lasted longer than 90 days

In [7]:
df_datasets["endurance"] = df_datasets['maxTime'] - df_datasets['minTime']
min_days = 90
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

datasetID
amlr01-20191206T0452-delayed       96 days 23:52:01
amlr02-20191206T1236-delayed       96 days 15:33:31
amlr03-20191206T0529-delayed       96 days 22:19:47
bios_anna-20180112T0455            91 days 21:55:52
bios_anna-20180112T0455-delayed    93 days 16:11:03
                                         ...       
UW157-20190916T0000               184 days 22:31:43
UW157-20200917T0000               143 days 01:43:53
UW646-20211112T0000               215 days 19:40:13
UW646-20220907T0000               181 days 09:10:06
UW685-20230125T0000               195 days 08:47:46
Name: endurance, Length: 409, dtype: timedelta64[ns]

### Which glider missions extended to a depth of greater than 150 m in seas north of 70 N?

In [8]:
min_altitude = -150 # note the sign!
min_northing = 70
mask = (df_datasets['maxAltitude (m)'] < min_altitude).values \
* (df_datasets['maxLatitude (degrees_north)'] > min_northing).values
df_datasets[mask]

Unnamed: 0_level_0,institution,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime,maxTime,endurance
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cp_389-20161011T2040-delayed,OOI Coastal & Global Scale Nodes (CGSN),-71.175203,-47.003674,39.331109,84.001959,0.119085,-987.8069,2016-10-11 20:54:26,2017-01-08 18:09:42,88 days 21:15:16
unit_595-20180804T0000-delayed,"University of Alaska Fairbanks, College of Fis...",-168.901621,-155.397927,67.835807,71.753266,-0.072915,-188.1303,2018-08-03 22:28:49,2018-09-24 20:56:07,51 days 22:27:18


---------------------------
### References

IOOS ERDDAP https://gliders.ioos.us/erddap

More info on using ERDDAP's inbuilt search https://ioos.github.io/erddapy/01b-tabledap-output.html