# Finding missions of interest

In [1]:
import datetime
import pandas as pd
from erddapy import ERDDAP
import utils
from tqdm.notebook import tqdm

We can use ERDDAP's built in search function to find datasets that match a keyword

In [2]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap"
)
url = e.get_search_url(search_for="cable", response="csv")

df = pd.read_csv(url)
df["Dataset ID"]

0         nrt_SEA067_M26
1         nrt_SEA067_M27
2         nrt_SEA067_M29
3         nrt_SEA066_M45
4         nrt_SEA067_M30
5         nrt_SEA067_M32
6         nrt_SEA066_M41
7         nrt_SEA066_M42
8         nrt_SEA066_M43
9         nrt_SEA076_M13
10         nrt_SEA079_M9
11        nrt_SEA078_M11
12        nrt_SEA067_M37
13        nrt_SEA067_M39
14    delayed_SEA067_M26
15    delayed_SEA067_M27
16    delayed_SEA067_M29
17    delayed_SEA067_M30
18    delayed_SEA067_M32
19    delayed_SEA066_M41
20    delayed_SEA066_M42
21    delayed_SEA066_M43
22     delayed_SEA079_M9
23    delayed_SEA076_M13
24    delayed_SEA078_M11
25    delayed_SEA067_M37
26    delayed_SEA067_M39
27       adcp_SEA067_M26
28       adcp_SEA067_M27
29       adcp_SEA067_M29
30        adcp_SEA079_M9
31       adcp_SEA076_M13
32       adcp_SEA067_M30
33       adcp_SEA067_M32
34       adcp_SEA067_M37
35       adcp_SEA067_M39
36       adcp_SEA066_M41
37       adcp_SEA066_M42
38       adcp_SEA066_M43
39       adcp_SEA078_M11


### Search with allDatasets

For a more sophisticated search, we can use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all glider missions. We will use only nrt datasets to speed up the process of examining metadata

In [3]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap",
    protocol="tabledap",
)

# Fetch dataset list
e.response = "csv"
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas(parse_dates=['minTime (UTC)', 'maxTime (UTC)'])

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)

# Keep a susbset of useful columns
df_datasets = df_datasets[[
 'institution',
 'dataStructure',
 'cdm_data_type',
 'minLongitude (degrees_east)',
 'maxLongitude (degrees_east)',
 'minLatitude (degrees_north)',
 'maxLatitude (degrees_north)',
 'minAltitude (m)',
 'maxAltitude (m)',
 'minTime (UTC)',
 'maxTime (UTC)',
 'infoUrl',
]]

df_datasets = df_datasets[df_datasets.index.str[:3] == "nrt"]
print(f"Analysing {len(df_datasets)} nrt datasets")

Analysing 141 nrt datasets


**side note** All VOTO datasets are represented twice on the ERDDAP: the `nrt_` version contains data communicated in near real time by the glider. `delayed_` is the dataset at full resolution downloaded from the glider after recovery. This is typically ~ 500 times larger than `nrt_`

In [4]:
df_datasets

Unnamed: 0_level_0,institution,dataStructure,cdm_data_type,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime (UTC),maxTime (UTC),infoUrl
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
nrt_SEA068_M27,Voice of the Ocean Foundation,table,TimeSeries,19.901883,19.982300,58.199850,58.265983,0.102737,-160.836667,2022-07-27 17:09:41+00:00,2022-07-31 03:51:42+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M8,Voice of the Ocean Foundation,table,TimeSeries,15.736967,16.323750,55.532217,55.746117,0.072139,-75.772686,2022-10-05 15:04:10+00:00,2022-10-19 20:25:55+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M9,Voice of the Ocean Foundation,table,TimeSeries,15.739817,16.296533,55.544550,55.661333,0.054402,-70.144741,2022-10-20 15:11:45+00:00,2022-10-31 13:05:46+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M13,Voice of the Ocean Foundation,table,TimeSeries,19.400867,20.098333,58.250300,58.517300,0.266205,-164.838439,2023-01-13 02:09:24+00:00,2023-02-14 10:34:35+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M16,Voice of the Ocean Foundation,table,TimeSeries,19.786233,19.967400,57.992133,58.514050,0.156828,-190.680438,2023-04-11 12:59:54+00:00,2023-05-16 11:16:28+00:00,https://cfconventions.org/cf-conventions/v1.6....
...,...,...,...,...,...,...,...,...,...,...,...,...
nrt_SEA061_M60,Voice of the Ocean Foundation,table,TimeSeries,10.927150,11.464567,57.619400,58.127500,0.091150,-151.033965,2022-06-15 08:38:43+00:00,2022-07-01 07:57:22+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M62,Voice of the Ocean Foundation,table,TimeSeries,10.796117,11.466467,57.613517,58.199350,-0.011295,-192.699238,2022-07-19 12:05:47+00:00,2022-08-12 07:13:14+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M63,Voice of the Ocean Foundation,table,TimeSeries,10.849650,11.525550,57.582883,58.112233,0.154361,-185.037862,2022-08-15 08:05:08+00:00,2022-09-07 00:18:26+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA078_M11,Voice of the Ocean Foundation,table,TimeSeries,19.510350,20.092867,58.146167,58.514700,0.036061,-173.116164,2023-03-16 10:19:34+00:00,2023-04-11 11:56:43+00:00,https://cfconventions.org/cf-conventions/v1.6....


### Find datasets from deployments that lasted longer than 30 days

In [5]:
df_datasets["endurance"] = df_datasets['maxTime (UTC)'] - df_datasets['minTime (UTC)']
min_days = 30
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

datasetID
nrt_SEA076_M13   32 days 08:25:11
nrt_SEA076_M16   34 days 22:16:34
nrt_SEA045_M41   34 days 01:28:01
nrt_SEA045_M43   32 days 13:01:05
nrt_SEA045_M44   31 days 22:47:09
nrt_SEA079_M9    30 days 01:34:14
nrt_SEA079_M11   34 days 15:25:50
nrt_SEA044_M32   34 days 00:20:23
nrt_SEA044_M34   32 days 17:56:00
nrt_SEA044_M35   31 days 18:45:38
Name: endurance, dtype: timedelta64[ns]

### Which glider missions extended to a depth of greater than 150 m in the seas northeast of Gotland?

In [6]:
min_altitude = -150 # note the sign!
min_easting = 19
min_northing = 58
mask = (df_datasets['maxAltitude (m)'] < min_altitude).values \
* (df_datasets['maxLongitude (degrees_east)'] > min_easting).values \
* (df_datasets['maxLatitude (degrees_north)'] > min_northing).values
df_datasets[mask].index.values

array(['nrt_SEA068_M27', 'nrt_SEA076_M13', 'nrt_SEA076_M16',
       'nrt_SEA069_M9', 'nrt_SEA079_M9', 'nrt_SEA067_M26',
       'nrt_SEA067_M27', 'nrt_SEA067_M29', 'nrt_SEA067_M30',
       'nrt_SEA067_M32', 'nrt_SEA067_M37', 'nrt_SEA067_M39',
       'nrt_SEA066_M41', 'nrt_SEA066_M42', 'nrt_SEA066_M43',
       'nrt_SEA077_M21', 'nrt_SEA077_M22', 'nrt_SEA078_M11'], dtype=object)

---------------------

# Deeper metadata

To access metadata at the sensor level, we need to download the metadata for each dataset. This is achieved by taking a small slice of the dataset and parsing the resultant netCDF. Look at the function `get_meta` in `utils.py` for details.

In [7]:
ds_meta = {}
for dataset_id in tqdm(df_datasets.index):
    ds_meta[dataset_id] = utils.get_meta(dataset_id)

  0%|          | 0/141 [00:00<?, ?it/s]

Here's the extra metadata we're pulling

In [8]:
ds_meta[dataset_id]

{'AD2CP': {'calibration_date': '2022-08-02',
  'factory_calibrated': 'Yes',
  'long_name': 'Nortek Glider1000 AD2CP',
  'make': 'Nortek',
  'make_model': 'Nortek AD2CP',
  'model': 'AD2CP',
  'serial': '104646'},
 'Conventions': 'CF-1.10, COARDS, ACDD-1.3',
 'Easternmost_Easting': 20.354016666666666,
 'Northernmost_Northing': 57.59006666666667,
 'Southernmost_Northing': 57.249249999999996,
 'Westernmost_Easting': 20.052666666666667,
 'acknowledgement': 'This study used data collected and made freely available by Voice of the Ocean Foundation (https://voiceoftheocean.org) accessed from https://erddap.observations.voiceoftheocean.org/erddap/index.html',
 'basin': 'Eastern Gotland Basin',
 'cdm_data_type': 'TimeSeries',
 'cdm_timeseries_variables': 'profile_index',
 'close': <function Dataset.close>,
 'cmptypes': {},
 'comment': 'deployment and recovery in Gotland',
 'contributor_name': 'Callum Rollo, Louise Biddle, Olle Petersson, Aleksandra Mazur, Marcus Melin, Gunnar Johnsson, Andrew B

### Expanding the table

Let's add this more detailed metadata to our metadtata DataFrame so we have more scope for filtering

In [9]:
meta_dictionaries = []
for dataset_id, meta in ds_meta.items():
    glider_dict = dict(df_datasets.loc[dataset_id]) | meta
    meta_dictionaries.append(glider_dict)


In [10]:
df_datasets = pd.DataFrame(meta_dictionaries)

In [11]:
df_datasets.index = df_datasets.dataset_id

In [12]:
df_datasets

Unnamed: 0_level_0,institution,dataStructure,cdm_data_type,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime (UTC),...,vltypes,wmo_id,AD2CP,nitrate,testOutOfDate,optics_nanoflu,altimeter,irradiance,turbulence,methane
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
nrt_SEA068_M27,Voice of the Ocean Foundation,table,TimeSeries,19.901883,19.982300,58.199850,58.265983,0.102737,-160.836667,2022-07-27 17:09:41+00:00,...,{},6801602,,,,,,,,
nrt_SEA076_M8,Voice of the Ocean Foundation,table,TimeSeries,15.736967,16.323750,55.532217,55.746117,0.072139,-75.772686,2022-10-05 15:04:10+00:00,...,{},6801665,"{'calibration_date': '2022-03-16', 'factory_ca...","{'calibration_date': '2019-05-20', 'factory_ca...",,,,,,
nrt_SEA076_M9,Voice of the Ocean Foundation,table,TimeSeries,15.739817,16.296533,55.544550,55.661333,0.054402,-70.144741,2022-10-20 15:11:45+00:00,...,{},6801665,"{'calibration_date': '2022-03-16', 'factory_ca...","{'calibration_date': '2019-05-20', 'factory_ca...",,,,,,
nrt_SEA076_M13,Voice of the Ocean Foundation,table,TimeSeries,19.400867,20.098333,58.250300,58.517300,0.266205,-164.838439,2023-01-13 02:09:24+00:00,...,{},6801665,"{'calibration_date': '2022-03-16', 'factory_ca...",,,,,,,
nrt_SEA076_M16,Voice of the Ocean Foundation,table,TimeSeries,19.786233,19.967400,57.992133,58.514050,0.156828,-190.680438,2023-04-11 12:59:54+00:00,...,{},6801665,"{'calibration_date': '2022-03-16', 'factory_ca...",,now-26days,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
nrt_SEA061_M60,Voice of the Ocean Foundation,table,TimeSeries,10.927150,11.464567,57.619400,58.127500,0.091150,-151.033965,2022-06-15 08:38:43+00:00,...,{},6801706,,,,,"{'make': 'UNKNOWN', 'model': 'UNKNOWN', 'seria...",,,
nrt_SEA061_M62,Voice of the Ocean Foundation,table,TimeSeries,10.796117,11.466467,57.613517,58.199350,-0.011295,-192.699238,2022-07-19 12:05:47+00:00,...,{},6801706,,,,,"{'make': 'UNKNOWN', 'model': 'UNKNOWN', 'seria...",,,
nrt_SEA061_M63,Voice of the Ocean Foundation,table,TimeSeries,10.849650,11.525550,57.582883,58.112233,0.154361,-185.037862,2022-08-15 08:05:08+00:00,...,{},6801706,,,,,"{'make': 'UNKNOWN', 'model': 'UNKNOWN', 'seria...",,,
nrt_SEA078_M11,Voice of the Ocean Foundation,table,TimeSeries,19.510350,20.092867,58.146167,58.514700,0.036061,-173.116164,2023-03-16 10:19:34+00:00,...,{},8901000,"{'calibration_date': '2022-08-02', 'factory_ca...",,now-96days,,,,,


Let's have a look at some of this more detailed metadat that we can now run queries against

### Which datasets were collected in Bornholm?

In [13]:
bornholm_missions = []
for dataset_id, meta in ds_meta.items():
    if "bornholm" in meta["basin"].lower():
        bornholm_missions.append(dataset_id)
print(f"Missions in Bornholm:\n{bornholm_missions}")

Missions in Bornholm:
['nrt_SEA076_M8', 'nrt_SEA076_M9', 'nrt_SEA069_M11', 'nrt_SEA055_M16', 'nrt_SEA055_M18', 'nrt_SEA055_M19', 'nrt_SEA055_M20', 'nrt_SEA055_M21', 'nrt_SEA055_M24', 'nrt_SEA055_M28', 'nrt_SEA055_M31', 'nrt_SEA055_M37', 'nrt_SEA055_M43', 'nrt_SEA045_M48', 'nrt_SEA045_M54', 'nrt_SEA045_M56', 'nrt_SEA045_M60', 'nrt_SEA045_M62', 'nrt_SEA045_M64', 'nrt_SEA045_M65', 'nrt_SEA045_M67', 'nrt_SEA045_M69', 'nrt_SEA045_M71', 'nrt_SEA045_M73', 'nrt_SEA045_M74', 'nrt_SEA045_M75', 'nrt_SEA063_M17', 'nrt_SEA063_M18', 'nrt_SEA063_M19', 'nrt_SEA063_M20', 'nrt_SEA063_M21', 'nrt_SEA063_M22', 'nrt_SEA063_M33', 'nrt_SEA063_M35', 'nrt_SEA063_M37', 'nrt_SEA063_M38', 'nrt_SEA063_M39', 'nrt_SEA063_M40', 'nrt_SEA063_M55', 'nrt_SEA063_M60', 'nrt_SEA044_M40', 'nrt_SEA044_M48', 'nrt_SEA067_M41', 'nrt_SEA067_M42', 'nrt_SEA066_M10', 'nrt_SEA066_M12', 'nrt_SEA066_M14', 'nrt_SEA070_M13', 'nrt_SEA070_M14', 'nrt_SEA070_M15', 'nrt_SEA077_M11', 'nrt_SEA077_M12', 'nrt_SEA077_M13', 'nrt_SEA077_M15', 'nrt_SE

### Which missions had an JFE oxygen optode and a Nortek  AD2CP?

In [14]:
rinko_nortek_missions = []
for dataset_id, meta in ds_meta.items():
    if "AD2CP" not in meta.keys():
        continue
    if "JFE" in meta["oxygen"]["make_model"] and "Nortek" in meta["AD2CP"]["make_model"]:
        rinko_nortek_missions.append(dataset_id)
print(f"Missions with JFE oxygen optode and a Nortek  AD2CP:\n{rinko_nortek_missions}")



Missions with JFE oxygen optode and a Nortek  AD2CP:
['nrt_SEA076_M13', 'nrt_SEA076_M16', 'nrt_SEA057_M75', 'nrt_SEA045_M33', 'nrt_SEA045_M36', 'nrt_SEA045_M37', 'nrt_SEA045_M41', 'nrt_SEA045_M42', 'nrt_SEA045_M43', 'nrt_SEA045_M44', 'nrt_SEA045_M45', 'nrt_SEA045_M48', 'nrt_SEA045_M50', 'nrt_SEA045_M51', 'nrt_SEA045_M52', 'nrt_SEA045_M54', 'nrt_SEA045_M56', 'nrt_SEA045_M60', 'nrt_SEA045_M62', 'nrt_SEA045_M64', 'nrt_SEA045_M65', 'nrt_SEA045_M67', 'nrt_SEA045_M69', 'nrt_SEA045_M71', 'nrt_SEA045_M73', 'nrt_SEA045_M74', 'nrt_SEA045_M75', 'nrt_SEA063_M17', 'nrt_SEA063_M18', 'nrt_SEA063_M19', 'nrt_SEA063_M20', 'nrt_SEA063_M21', 'nrt_SEA063_M22', 'nrt_SEA063_M24', 'nrt_SEA063_M33', 'nrt_SEA063_M35', 'nrt_SEA063_M37', 'nrt_SEA063_M38', 'nrt_SEA063_M39', 'nrt_SEA063_M40', 'nrt_SEA063_M55', 'nrt_SEA063_M60', 'nrt_SEA079_M9', 'nrt_SEA079_M11', 'nrt_SEA067_M41', 'nrt_SEA067_M42', 'nrt_SEA077_M21', 'nrt_SEA077_M22', 'nrt_SEA056_M40', 'nrt_SEA056_M42', 'nrt_SEA056_M54', 'nrt_SEA056_M55', 'nrt_SEA056

### Which datasets were collected as part of the SAMBA project during 2022?

In [15]:
start = df_datasets["maxTime (UTC)"] > pd.Timestamp("2022-01-01").tz_localize('utc') 
end = df_datasets["minTime (UTC)"] < pd.Timestamp("2023-01-01").tz_localize('utc') 
project = df_datasets["project"] == "SAMBA"
print(f"SAMBA 2022 missions:")
print(df_datasets[mask].index)

SAMBA 2022 missions:
Index(['nrt_SEA068_M27', 'nrt_SEA076_M13', 'nrt_SEA076_M16', 'nrt_SEA069_M9',
       'nrt_SEA079_M9', 'nrt_SEA067_M26', 'nrt_SEA067_M27', 'nrt_SEA067_M29',
       'nrt_SEA067_M30', 'nrt_SEA067_M32', 'nrt_SEA067_M37', 'nrt_SEA067_M39',
       'nrt_SEA066_M41', 'nrt_SEA066_M42', 'nrt_SEA066_M43', 'nrt_SEA077_M21',
       'nrt_SEA077_M22', 'nrt_SEA078_M11'],
      dtype='object', name='dataset_id')


### Which datasets have oxygen data from > 80 m depth from optode serial number 205992 

In [16]:
deep_205992_misions = []
for dataset_id, meta in ds_meta.items():
    if  meta["oxygen"]["serial"] == "205592" and meta["geospatial_vertical_max"] > 0:
        deep_205992_misions.append(dataset_id)
print(f"Missions with optode number 205992 going to > 80 m:\n{deep_205992_misions}")

Missions with optode number 205992 going to > 80 m:
['nrt_SEA066_M10', 'nrt_SEA066_M12', 'nrt_SEA066_M14', 'nrt_SEA066_M16', 'nrt_SEA066_M41', 'nrt_SEA066_M42']


---------------------------
### References

VOTO ERDDAP https://erddap.observations.voiceoftheocean.org/erddap/index.html 

More info on using ERDDAP's inbuilt search https://ioos.github.io/erddapy/01b-tabledap-output.html