# Marine EOV Broker



In [1]:
# ! pip install --upgrade --force-reinstall git+https://github.com/vliz-be-opsci/marine-eov-broker

In [2]:
from marine_eov_broker import MarineRiBroker
import logging
import matplotlib.pyplot as plt

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# logger.setLevel(logging.DEBUG)

print(MarineRiBroker.ERDDAP_OUTPUT_FORMATS)
print(MarineRiBroker.EOV_LIST)

['csv', 'geoJson', 'json', 'nc', 'ncCF', 'odvTxt']
['EV_OXY', 'EV_SEATEMP', 'EV_SALIN', 'EV_CURR', 'EV_CHLA', 'EV_CO2', 'EV_NUTS']


## Start the broker

It will take some time (though it still needs improvements on performances). This is because the broker will :
* load vocabularies upon startup
* load erddap datasets metadata from all erddap servers


**Question :**
Do we want to work with all datasets on Erddap servers ? Or do we want to build a fixed list for them ?

In [3]:
%%time
try:
    MarineRiBroker.EOV_LIST = ['EV_OXY', 'EV_SEATEMP', 'EV_SALIN']
    broker = MarineRiBroker.MarineBroker(erddap_servers={"https://erddap.eurobis.org//erddap":['lyumkis']})
except Exception as error:
    print(error)
    x = error

INFO:root:Querying vocabulary server for EOV : EV_OXY
INFO:root:Querying vocabulary server for EOV : EV_SEATEMP
INFO:root:Querying vocabulary server for EOV : EV_SALIN
INFO:root:Response: {'head': {'vars': ['dt', 'P01notation', 'prefLabel', 'R03', 'P09', 'P02']}, 'results': {'bindings': [{'dt': {'type': 'uri', 'value': 'http://vocab.nerc.ac.uk/collection/P01/current/DOXMZZXX/'}, 'P01notation': {'type': 'literal', 'value': 'SDN:P01::DOXMZZXX'}, 'prefLabel': {'type': 'literal', 'xml:lang': 'en', 'value': 'Concentration of oxygen {O2 CAS 7782-44-7} per unit mass of the water body [dissolved plus reactive particulate phase]'}, 'R03': {'type': 'literal', 'value': 'SDN:R03::DOXY'}, 'P09': {'type': 'literal', 'value': 'SDN:P09::DOX2'}, 'P02': {'type': 'literal', 'value': 'SDN:P02::DOXY'}}, {'dt': {'type': 'uri', 'value': 'http://vocab.nerc.ac.uk/collection/P01/current/DOXYAAOP/'}, 'P01notation': {'type': 'literal', 'value': 'SDN:P01::DOXYAAOP'}, 'prefLabel': {'type': 'literal', 'xml:lang': 'e

CPU times: user 44.5 ms, sys: 8.54 ms, total: 53 ms
Wall time: 8.17 s


## Create a request to the broker :
The user must provide the EOVs, min/max date/lat/lon, output format desired.

When creating a query, the broker :
* first looks at every dataset to see if they match any eov requested by the user
* then checks if the datasets match the time/bbox requested by the user

In [20]:
# broker.vocabularies['EV_OXY']

In [5]:
start_date = "1900-01-01"
end_date = "2022-01-02"
# North-east Atlantic Ocean
min_lon = 0
min_lat = 0
max_lon = 90
max_lat = 90

logger.setLevel(logging.DEBUG)

In [6]:
%%time
response = broker.submit_request(["EV_SALIN", "EV_OXY", "EV_SEATEMP"], 
                                 start_date,
                                 end_date,
                                 min_lon,
                                 min_lat,
                                 max_lon,
                                 max_lat,
                                 "nc"
                                 )

DEBUG:marine_eov_broker.MarineRiBroker:Looking for eovs in lyumkis took 0.0005030632019042969 seconds with result : ['Temp']
DEBUG:marine_eov_broker.ErddapMarineRI:Will check spatiotemporal constraints from query https://erddap.eurobis.org//erddap/tabledap/lyumkis.csv?time&time%3E=1900-01-01&time%3C=2022-01-02&latitude%3E=0.0&latitude%3C=90.0&longitude%3E=0.0&longitude%3C=90.0&orderByLimit(%22time/6months,1%22)
DEBUG:marine_eov_broker.MarineRiBroker:Will add variables found ['Temp'] in response for dataset lyumkis
DEBUG:marine_eov_broker.MarineRiBroker:Creating DataFrame with dataset lyumkis


CPU times: user 31.5 ms, sys: 0 ns, total: 31.5 ms
Wall time: 415 ms


In [27]:
response.queries

Unnamed: 0,query_url,AccConstrDescription,AccConstrDisplay,AccConstrEN,AccessConstraint,AccessConstraints,Acronym,BrackishFlag,CDate,cdm_data_type,...,VersionDay,VersionMonth,VersionName,VersionYear,VlizCoreFlag,Westernmost_Easting,query_object,EV_OXY,EV_SEATEMP,EV_SALIN
lyumkis,https://erddap.eurobis.org//erddap/tabledap/ly...,"This license lets others distribute, remix, tw...",This dataset is licensed under a Creative Comm...,Attribution (CC BY),Attribution (CC BY),,,0,2010-11-03,Other,...,3,11,1,2010,1,29.7,<marine_eov_broker.MarineRiBroker.ErddapReques...,,Temp,


In [28]:
# %%time
# response = broker.submit_request(["EV_SALIN", "EV_OXY", "EV_SEATEMP", "EV_CO2", "EV_CHLA"], 
#                                  start_date,
#                                  end_date,
#                                  min_lon,
#                                  min_lat,
#                                  max_lon,
#                                  max_lat,
#                                  "nc"
#                                  )

## Query Response

In [31]:
xx = response.get_dataset('lyumkis')

In [41]:
xx.metadata

Unnamed: 0,Row Type,Variable Name,Attribute Name,Data Type,Value
0,attribute,NC_GLOBAL,AccConstrDescription,String,"This license lets others distribute, remix, tw..."
1,attribute,NC_GLOBAL,AccConstrDisplay,String,This dataset is licensed under a Creative Comm...
2,attribute,NC_GLOBAL,AccConstrEN,String,Attribution (CC BY)
3,attribute,NC_GLOBAL,AccessConstraint,String,Attribution (CC BY)
4,attribute,NC_GLOBAL,AccessConstraints,String,
...,...,...,...,...,...
171,attribute,WaterAbund,long_name,String,SDN:P01::SDBIOL01
172,attribute,WaterAbund,sdn_parameter_urn,String,SDN:P01::SDBIOL01
173,attribute,WaterAbund,units,String,
174,attribute,WaterAbund,units_uri,String,


In [42]:
xx.found_eovs

{'EV_SEATEMP': ['Temp']}

In [43]:
xx.data_url

'https://erddap.eurobis.org//erddap/tabledap/lyumkis'

You can do a simple search on the ERDDAP metadata to find whatever text you're looking for. 

In [55]:
xx.metadata[xx.metadata['Value'].str.startswith('SDN:P01', na=False)]

Unnamed: 0,Row Type,Variable Name,Attribute Name,Data Type,Value
164,attribute,Temp,long_name,String,SDN:P01::TEMPPR01
165,attribute,Temp,sdn_parameter_urn,String,SDN:P01::TEMPPR01
171,attribute,WaterAbund,long_name,String,SDN:P01::SDBIOL01
172,attribute,WaterAbund,sdn_parameter_urn,String,SDN:P01::SDBIOL01


## Results

The interesting part !
The broker provides a BrokerResponse object. It contains the variable **queries** which is a Pandas DataFrame.

The pandas DataFrame contains all the global attributes, query URL and ErddapRequest object for each dataset found for the user request.

In [10]:
response.queries

Unnamed: 0,query_url,AccConstrDescription,AccConstrDisplay,AccConstrEN,AccessConstraint,AccessConstraints,Acronym,BrackishFlag,CDate,cdm_data_type,...,VersionDay,VersionMonth,VersionName,VersionYear,VlizCoreFlag,Westernmost_Easting,query_object,EV_OXY,EV_SEATEMP,EV_SALIN
lyumkis,https://erddap.eurobis.org//erddap/tabledap/ly...,"This license lets others distribute, remix, tw...",This dataset is licensed under a Creative Comm...,Attribution (CC BY),Attribution (CC BY),,,0,2010-11-03,Other,...,3,11,1,2010,1,29.7,<marine_eov_broker.MarineRiBroker.ErddapReques...,,Temp,


**Or just the list of datasets ID**

In [11]:
response.get_datasets_list()

['lyumkis']

### Access a dataset with its dataset ID

In [12]:
dataset_id = response.get_datasets_list()[0]
print(dataset_id)

lyumkis


### Get the description of the EOVs found variables in the dataset

In [13]:
dataset_id = 'lyumkis'
response.get_dataset_EOVs_list(dataset_id)

{'EV_SALIN': '', 'EV_OXY': '', 'EV_SEATEMP': 'Temp'}

### Get the query URL for the dataset ID

In [14]:
response.get_dataset_query_url(dataset_id)

'https://erddap.eurobis.org//erddap/tabledap/lyumkis.nc?time%2Clatitude%2Clongitude%2CTemp&time%3E=1900-01-01&time%3C=2022-01-02&latitude%3E=0.0&latitude%3C=90.0&longitude%3E=0.0&longitude%3C=90.0'

In [15]:
response.__class__

marine_eov_broker.MarineRiBroker.BrokerResponse

### Execute a query & get the result as a Pandas DataFrame...

In [16]:
df = response.dataset_to_pandas_dataframe(dataset_id)
df

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): erddap.eurobis.org:443
DEBUG:urllib3.connectionpool:https://erddap.eurobis.org:443 "GET //erddap/tabledap/lyumkis.nc?time%2Clatitude%2Clongitude%2CTemp&time%3E=1900-01-01&time%3C=2022-01-02&latitude%3E=0.0&latitude%3C=90.0&longitude%3E=0.0&longitude%3C=90.0 HTTP/1.1" 200 None


Unnamed: 0_level_0,time,latitude,longitude,Temp
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2005-01-07 05:35:00,45.720001,31.690001,0
1,2005-01-07 05:35:00,45.720001,31.690001,0
2,2005-01-07 05:35:00,45.720001,31.690001,0
3,2005-01-07 05:35:00,45.720001,31.690001,0
4,2005-01-07 05:35:00,45.720001,31.690001,0
...,...,...,...,...
26352,1992-09-12 02:28:00,42.880001,40.400002,22
26353,1992-09-12 02:28:00,42.880001,40.400002,22
26354,1992-09-12 02:28:00,42.880001,40.400002,22
26355,1992-09-12 02:28:00,42.880001,40.400002,22


### ... or an Xarray dataset

In [17]:
ds = response.dataset_to_xarray(dataset_id)
ds

### Only retrieve a specific EOV :

In [18]:
# ds = response.dataset_to_pandas_dataframe(dataset_id, "EV_OXY")
# # ds

### Download a dataset as a NetCDF file

In [19]:
response.dataset_to_file_download(dataset_id, "nc")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): erddap.eurobis.org:443
DEBUG:urllib3.connectionpool:https://erddap.eurobis.org:443 "GET //erddap/tabledap/lyumkis.nc?time%2Clatitude%2Clongitude%2CTemp&time%3E=1900-01-01&time%3C=2022-01-02&latitude%3E=0.0&latitude%3C=90.0&longitude%3E=0.0&longitude%3C=90.0 HTTP/1.1" 200 None


True