# Finding missions of interest

In [1]:
import datetime
import pandas as pd
from erddapy import ERDDAP
import utils
from tqdm.notebook import tqdm

We can use ERDDAP's built in search function to find datasets that match a keyword

In [2]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap"
)
url = e.get_search_url(search_for="gotland", response="csv")

df = pd.read_csv(url)
df["Dataset ID"]

0      delayed_SEA069_M9
1          nrt_SEA069_M9
2         nrt_SEA068_M27
3     delayed_SEA068_M27
4         nrt_SEA055_M23
5     delayed_SEA055_M23
6     delayed_SEA067_M27
7     delayed_SEA067_M29
8     delayed_SEA067_M30
9     delayed_SEA066_M41
10    delayed_SEA066_M42
11    delayed_SEA066_M43
12    delayed_SEA067_M32
13    delayed_SEA067_M26
14        nrt_SEA067_M29
15        nrt_SEA067_M30
16        nrt_SEA066_M43
17        nrt_SEA067_M26
18        nrt_SEA067_M27
19        nrt_SEA067_M32
20        nrt_SEA066_M41
21        nrt_SEA066_M42
22        nrt_SEA066_M45
23    delayed_SEA067_M37
24        nrt_SEA067_M37
25        nrt_SEA067_M39
Name: Dataset ID, dtype: object

### Search with allDatasets

For a more sophisticated search, we can use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all glider missions. We will use only nrt datasets to speed up the process of examining metadata

In [3]:
e = ERDDAP(
    server="https://erddap.observations.voiceoftheocean.org/erddap",
    protocol="tabledap",
)

# Fetch dataset list
e.response = "csv"
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas(parse_dates=['minTime (UTC)', 'maxTime (UTC)'])

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)

# Keep a susbset of useful columns
df_datasets = df_datasets[[
 'institution',
 'dataStructure',
 'cdm_data_type',
 'minLongitude (degrees_east)',
 'maxLongitude (degrees_east)',
 'minLatitude (degrees_north)',
 'maxLatitude (degrees_north)',
 'minAltitude (m)',
 'maxAltitude (m)',
 'minTime (UTC)',
 'maxTime (UTC)',
 'infoUrl',
]]

df_datasets = df_datasets[df_datasets.index.str[:3] == "nrt"]
print(f"Analysing {len(df_datasets)} nrt datasets")

Analysing 116 nrt datasets


**side note** All VOTO datasets are represented twice on the ERDDAP: the `nrt_` version contains data communicated in near real time by the glider. `delayed_` is the dataset at full resolution downloaded from the glider after recovery. This is typically ~ 500 times larger than `nrt_`

In [4]:
df_datasets

Unnamed: 0_level_0,institution,dataStructure,cdm_data_type,minLongitude (degrees_east),maxLongitude (degrees_east),minLatitude (degrees_north),maxLatitude (degrees_north),minAltitude (m),maxAltitude (m),minTime (UTC),maxTime (UTC),infoUrl
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
nrt_SEA068_M27,Voice of the Ocean Foundation,table,TimeSeries,19.901883,19.982300,58.199850,58.265983,0.102737,-160.836667,2022-07-27 17:09:41+00:00,2022-07-31 03:51:42+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M8,Voice of the Ocean Foundation,table,TimeSeries,15.736967,16.323750,55.532217,55.746117,0.072139,-75.772686,2022-10-05 15:04:10+00:00,2022-10-19 20:25:55+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA076_M9,Voice of the Ocean Foundation,table,TimeSeries,15.739817,16.296533,55.544550,55.661333,0.054402,-70.144741,2022-10-20 15:11:45+00:00,2022-10-31 13:05:46+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA069_M9,Voice of the Ocean Foundation,table,TimeSeries,19.909683,19.964233,58.192767,58.268500,0.158712,-162.851979,2022-07-27 16:37:58+00:00,2022-07-31 03:36:24+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA069_M11,Voice of the Ocean Foundation,table,TimeSeries,15.990183,16.376900,55.255733,55.581050,0.247636,-81.446143,2022-09-23 15:06:17+00:00,2022-10-12 15:05:13+00:00,https://cfconventions.org/cf-conventions/v1.6....
...,...,...,...,...,...,...,...,...,...,...,...,...
nrt_SEA061_M56,Voice of the Ocean Foundation,table,TimeSeries,10.953917,11.451883,57.622883,58.120500,0.010007,-134.529687,2022-04-15 10:04:36+00:00,2022-05-04 10:22:12+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M57,Voice of the Ocean Foundation,table,TimeSeries,10.839783,11.457350,57.619817,58.357500,0.071136,-154.469223,2022-05-05 09:10:20+00:00,2022-05-23 07:27:12+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M60,Voice of the Ocean Foundation,table,TimeSeries,10.927150,11.464567,57.619400,58.127500,0.091150,-151.033965,2022-06-15 08:38:43+00:00,2022-07-01 07:57:22+00:00,https://cfconventions.org/cf-conventions/v1.6....
nrt_SEA061_M62,Voice of the Ocean Foundation,table,TimeSeries,10.796117,11.466467,57.613517,58.199350,-0.011295,-192.699238,2022-07-19 12:05:47+00:00,2022-08-12 07:13:14+00:00,https://cfconventions.org/cf-conventions/v1.6....


### Find datasets from deployments that lasted longer than 30 days

In [5]:
df_datasets["endurance"] = df_datasets['maxTime (UTC)'] - df_datasets['minTime (UTC)']
min_days = 30
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

datasetID
nrt_SEA045_M41   34 days 01:28:01
nrt_SEA045_M44   31 days 22:47:09
nrt_SEA044_M32   34 days 00:20:23
nrt_SEA044_M34   32 days 17:56:00
nrt_SEA044_M35   31 days 18:45:38
Name: endurance, dtype: timedelta64[ns]

### Which glider missions extended to a depth of greater than 150 m in the seas northeast of Gotland?

In [6]:
min_altitude = -150 # note the sign!
min_easting = 19
min_northing = 58
mask = (df_datasets['maxAltitude (m)'] < min_altitude).values \
* (df_datasets['maxLongitude (degrees_east)'] > min_easting).values \
* (df_datasets['maxLatitude (degrees_north)'] > min_northing).values
df_datasets[mask].index.values

array(['nrt_SEA068_M27', 'nrt_SEA069_M9', 'nrt_SEA067_M26',
       'nrt_SEA067_M27', 'nrt_SEA067_M29', 'nrt_SEA067_M30',
       'nrt_SEA067_M32', 'nrt_SEA067_M37', 'nrt_SEA067_M39',
       'nrt_SEA066_M41', 'nrt_SEA066_M42', 'nrt_SEA066_M43'], dtype=object)

---------------------

# Deeper metadata

To access metadata at the sensor level, we need to download the metadata for each dataset. This is achieved by taking a small slice of the dataset and parsing the resultant netCDF. Look at the function `get_meta` in `utils.py` for details.

In [7]:
ds_meta = {}
for dataset_id in tqdm(df_datasets.index):
    ds_meta[dataset_id] = utils.get_meta(dataset_id)

  0%|          | 0/116 [00:00<?, ?it/s]

Here's the extra metadata we're pulling

In [8]:
ds_meta[dataset_id]

{'acknowledgement': 'This study used data collected and made freely available by Voice of the Ocean Foundation (https://voiceoftheocean.org) accessed from https://erddap.observations.voiceoftheocean.org/erddap/index.html',
 'altimeter': {'make': 'UNKNOWN', 'model': 'UNKNOWN', 'serial': '48'},
 'basin': 'Skagerrak, Kattegat',
 'cdm_data_type': 'TimeSeries',
 'cdm_timeseries_variables': 'profile_index',
 'comment': 'deployment and recovery in Kattegat/Skagerrak',
 'contributor_name': 'Callum Rollo, Louise Biddle, Olle Petersson, Aleksandra Mazur, Marcus Melin',
 'contributor_role': 'Data Processor, PI, Head of Operations, Glider Technician, Glider Technician',
 'Conventions': 'CF-1.6, COARDS, ACDD-1.3',
 'creator_email': 'callum.rollo@voiceoftheocean.org',
 'creator_name': 'Callum Rollo',
 'creator_type': 'person',
 'creator_url': 'https://observations.voiceoftheocean.org',
 'ctd': {'calibration_date': '2021-02-25',
  'factory_calibrated': 'Yes',
  'long_name': 'RBR legato CTD',
  'make'

### Expanding the table

Let's add this more detailed metadata to our metadtata DataFrame so we have more scope for filtering

In [9]:
for dataset_id, meta in ds_meta.items():
    for key, val in meta.items():
        if key not in list(df_datasets):
            df_datasets[key] = None
        # Some of the metadata is stored in dicts or lists, which pandas does not like, so we cast to string
        try:
            df_datasets.loc[dataset_id, key] = val
        except:
            df_datasets.loc[dataset_id, key] = str(val)

Let's have a look at some of this more detailed metadat that we can now run queries against

### Which datasets were collected in Bornholm?

In [10]:
bornholm_missions = []
for dataset_id, meta in ds_meta.items():
    if "bornholm" in meta["basin"].lower():
        bornholm_missions.append(dataset_id)
print(f"Missions in Bornholm:\n{bornholm_missions}")

Missions in Bornholm:
['nrt_SEA076_M8', 'nrt_SEA076_M9', 'nrt_SEA069_M11', 'nrt_SEA055_M16', 'nrt_SEA055_M18', 'nrt_SEA055_M19', 'nrt_SEA055_M20', 'nrt_SEA055_M21', 'nrt_SEA055_M24', 'nrt_SEA055_M28', 'nrt_SEA055_M31', 'nrt_SEA055_M37', 'nrt_SEA055_M43', 'nrt_SEA045_M48', 'nrt_SEA045_M54', 'nrt_SEA045_M56', 'nrt_SEA045_M60', 'nrt_SEA045_M62', 'nrt_SEA045_M64', 'nrt_SEA045_M65', 'nrt_SEA045_M67', 'nrt_SEA045_M69', 'nrt_SEA045_M71', 'nrt_SEA045_M73', 'nrt_SEA063_M17', 'nrt_SEA063_M18', 'nrt_SEA063_M19', 'nrt_SEA063_M20', 'nrt_SEA063_M21', 'nrt_SEA063_M22', 'nrt_SEA063_M33', 'nrt_SEA063_M35', 'nrt_SEA063_M37', 'nrt_SEA063_M38', 'nrt_SEA063_M39', 'nrt_SEA063_M40', 'nrt_SEA044_M40', 'nrt_SEA044_M48', 'nrt_SEA066_M10', 'nrt_SEA066_M12', 'nrt_SEA066_M14', 'nrt_SEA070_M13', 'nrt_SEA070_M14', 'nrt_SEA070_M15', 'nrt_SEA077_M11', 'nrt_SEA077_M12', 'nrt_SEA077_M13', 'nrt_SEA077_M15', 'nrt_SEA077_M17', 'nrt_SEA077_M18', 'nrt_SEA056_M40', 'nrt_SEA056_M54', 'nrt_SEA056_M55', 'nrt_SEA056_M56', 'nrt_SE

### Which missions had an JFE oxygen optode and a Nortek  AD2CP?

In [11]:
rinko_nortek_missions = []
for dataset_id, meta in ds_meta.items():
    if "AD2CP" not in meta.keys():
        continue
    if "JFE" in meta["oxygen"]["make_model"] and "Nortek" in meta["AD2CP"]["make_model"]:
        rinko_nortek_missions.append(dataset_id)
print(f"Missions with JFE oxygen optode and a Nortek  AD2CP:\n{rinko_nortek_missions}")



Missions with JFE oxygen optode and a Nortek  AD2CP:
['nrt_SEA045_M33', 'nrt_SEA045_M36', 'nrt_SEA045_M37', 'nrt_SEA045_M41', 'nrt_SEA045_M44', 'nrt_SEA045_M45', 'nrt_SEA045_M48', 'nrt_SEA045_M50', 'nrt_SEA045_M51', 'nrt_SEA045_M52', 'nrt_SEA045_M54', 'nrt_SEA045_M56', 'nrt_SEA045_M60', 'nrt_SEA045_M62', 'nrt_SEA045_M64', 'nrt_SEA045_M65', 'nrt_SEA045_M67', 'nrt_SEA045_M69', 'nrt_SEA045_M71', 'nrt_SEA045_M73', 'nrt_SEA063_M17', 'nrt_SEA063_M18', 'nrt_SEA063_M19', 'nrt_SEA063_M20', 'nrt_SEA063_M21', 'nrt_SEA063_M22', 'nrt_SEA063_M24', 'nrt_SEA063_M33', 'nrt_SEA063_M35', 'nrt_SEA063_M37', 'nrt_SEA063_M38', 'nrt_SEA063_M39', 'nrt_SEA063_M40', 'nrt_SEA056_M40', 'nrt_SEA056_M42', 'nrt_SEA056_M54', 'nrt_SEA056_M55', 'nrt_SEA056_M56', 'nrt_SEA056_M57']


### Which datasets were collected as part of the SAMBA project during 2022?

In [12]:
start = df_datasets["maxTime (UTC)"] > pd.Timestamp("2022-01-01").tz_localize('utc') 
end = df_datasets["minTime (UTC)"] < pd.Timestamp("2023-01-01").tz_localize('utc') 
project = df_datasets["project"] == "SAMBA"
print(f"SAMBA 2022 missions:")
print(df_datasets[mask].index)

SAMBA 2022 missions:
Index(['nrt_SEA068_M27', 'nrt_SEA069_M9', 'nrt_SEA067_M26', 'nrt_SEA067_M27',
       'nrt_SEA067_M29', 'nrt_SEA067_M30', 'nrt_SEA067_M32', 'nrt_SEA067_M37',
       'nrt_SEA067_M39', 'nrt_SEA066_M41', 'nrt_SEA066_M42', 'nrt_SEA066_M43'],
      dtype='object', name='datasetID')


### Which datasets have oxygen data from > 80 m depth from optode serial number 205992 

In [13]:
deep_205992_misions = []
for dataset_id, meta in ds_meta.items():
    if  meta["oxygen"]["serial"] == "205592" and meta["geospatial_vertical_max"] > 0:
        deep_205992_misions.append(dataset_id)
print(f"Missions with optode number 205992 going to > 80 m:\n{deep_205992_misions}")

Missions with optode number 205992 going to > 80 m:
['nrt_SEA066_M10', 'nrt_SEA066_M12', 'nrt_SEA066_M14', 'nrt_SEA066_M16', 'nrt_SEA066_M41', 'nrt_SEA066_M42']


---------------------------
### References

VOTO ERDDAP https://erddap.observations.voiceoftheocean.org/erddap/index.html 

More info on using ERDDAP's inbuilt search https://ioos.github.io/erddapy/01b-tabledap-output.html