In [1]:
import pandas as pd
import requests

from accessibility import check_endpoint
from summarize import * #json_keys, data_frame

### Argo metadata & data API

#### ARGO metadata API

API points to: "ARGO Floats metadata"

API documentation (swagger): https://fleetmonitoring.euro-argo.eu/swagger-ui.html

In [44]:
# Analysed endpoint (selected from API docs)
argo_metadata_url = "https://fleetmonitoring.euro-argo.eu/floats"

In [52]:
if check_endpoint(argo_metadata_url):
    print("The endpoint is machine-accessible.")
else:
    print("The endpoint is not machine-accessible.")

Checking endpoint: https://fleetmonitoring.euro-argo.eu/floats
Endpoint is online: 200
Content type suggests machine-readable data: application/json
The endpoint is machine-accessible.


In [25]:
endpoint_response = requests.get(argo_metadata_url).json()

In [26]:
# number of objects offered by json API
print(f"The root url of this API returns a list of JSON objects, each representing a ARGO float. \nThe number of floats: {len(endpoint_response)} \nEach float is described through the listed properties (also visualized further along).")

The root url of this API returns a list of JSON objects, each representing a ARGO float. 
The number of floats: 19045 
Each float is described through the listed properties (also visualized further along).


In [27]:
summary = json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

# Convert the summary dictionary to a pandas DataFrame
df_summary = pd.DataFrame([
    {
        "Property": key,
        "Count": info["count"],
        "Types": ', '.join(info["types"]),
        "Example": info["example"]
    }
    for key, info in summary.items()
])
df_summary.to_csv("properties/ARGO_JSONAPI_metadata_floats.csv", index=False)
df_summary

Key: wmo, Count: 19045, Types: str, Example value: '7902194'
Key: serialInst, Count: 19045, Types: NoneType, str, Example value: 'AI2632-23EU018'
Key: serialIMEI, Count: 19045, Types: NoneType, Example value: 'None'
Key: platform, Count: 19045, Types: dict, Example value: '{'code': '7902194', 'name': 'ARVOR-DO Profiling Float', 'description': 'Argo POLAND', 'comment': None, 'activity': None, 'type': 'ARVOR'}'
Key: platform.code, Count: 19045, Types: str, Example value: '7902194'
Key: platform.name, Count: 19045, Types: str, Example value: 'ARVOR-DO Profiling Float'
Key: platform.description, Count: 19045, Types: str, Example value: 'Argo POLAND'
Key: platform.comment, Count: 19045, Types: str, NoneType, Example value: 'None'
Key: platform.activity, Count: 19045, Types: str, NoneType, Example value: 'None'
Key: platform.type, Count: 19045, Types: NoneType, str, Example value: 'ARVOR'
Key: sensors, Count: 19045, Types: list, Example value: '[]'
Key: parameters, Count: 19045, Types: list,

Unnamed: 0,Property,Count,Types,Example
0,wmo,19045,str,7902194
1,serialInst,19045,"NoneType, str",AI2632-23EU018
2,serialIMEI,19045,NoneType,
3,platform,19045,dict,"{'code': '7902194', 'name': 'ARVOR-DO Profilin..."
4,platform.code,19045,str,7902194
...,...,...,...,...
1116,configurations.missionCycles.9992,1,list,[9992]
1117,configurations.missionCycles.9995,1,list,[9995]
1118,configurations.missionCycles.9990,1,list,[9990]
1119,configurations.missionCycles.9988,1,list,[9988]


In [29]:
#get list of wmo codes:
wmo_codes = list(summary['wmo']['wmo'])

In [30]:
# Analysed endpoint (selected from API docs)
#argo_metadata_basic_url = "https://fleetmonitoring.euro-argo.eu/floats/basic/{wmo}"
#argo_metadata_detail_url = "https://fleetmonitoring.euro-argo.eu/floats/{wmo}"

for wmo in wmo_codes[0:1]: #assuming properties the same for different floats (higly likely since also the case for properties in root url of api)
    detail_url = f"https://fleetmonitoring.euro-argo.eu/floats/{wmo}"
    print(detail_url)
    endpoint_response = requests.get(detail_url).json()

    summ = json_keys(endpoint_response)

    for key, info in summ.items():
        print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")
    
    # Convert the summary dictionary to a pandas DataFrame
    df_summ= pd.DataFrame([
        {
            "Property": key,
            "Count": info["count"],
            "Types": ', '.join(info["types"]),
            "Example": info["example"]
        }
        for key, info in summ.items()
    ])
df_summ.to_csv("properties/ARGO_JSONAPI_metadata_floats_2.csv", index=False)
df_summ

https://fleetmonitoring.euro-argo.eu/floats/6900716
Key: wmo, Count: 1, Types: str, Example value: '6900716'
Key: serialInst, Count: 1, Types: str, Example value: 'OIN-08-S3-45'
Key: serialIMEI, Count: 1, Types: NoneType, Example value: 'None'
Key: platform, Count: 1, Types: dict, Example value: '{'code': '6900716', 'name': 'PROVOR Profiling Float', 'description': 'GOODHOPE', 'comment': None, 'activity': None, 'type': 'PROVOR'}'
Key: platform.code, Count: 1, Types: str, Example value: '6900716'
Key: platform.name, Count: 1, Types: str, Example value: 'PROVOR Profiling Float'
Key: platform.description, Count: 1, Types: str, Example value: 'GOODHOPE'
Key: platform.comment, Count: 1, Types: NoneType, Example value: 'None'
Key: platform.activity, Count: 1, Types: NoneType, Example value: 'None'
Key: platform.type, Count: 1, Types: str, Example value: 'PROVOR'
Key: sensors, Count: 1, Types: list, Example value: '[{'dimLevel': 1, 'id': 'CTD_PRES', 'maker': 'SBE', 'model': 'SBE', 'serial': 'n

Unnamed: 0,Property,Count,Types,Example
0,wmo,1,str,6900716
1,serialInst,1,str,OIN-08-S3-45
2,serialIMEI,1,NoneType,
3,platform,1,dict,"{'code': '6900716', 'name': 'PROVOR Profiling ..."
4,platform.code,1,str,6900716
...,...,...,...,...
356,firstCycle.lastStation.direction,1,str,A
357,firstCycle.lastStation.date,1,str,2016-02-01T03:15:00.000+00:00
358,firstCycle.lastStation.profiles,1,list,[]
359,firstCycle.lastStation.firstProfile,1,NoneType,


Analyses of both endpoints gives a view on the properties associated with ARGO floats and is visualized as follows:
![image.png](images/ARGO_JSONAPI_Float.drawio.png)

#### ARGO data API

API points to: "ARGO Floats data"

API documentation (swagger): https://dataselection.euro-argo.eu/swagger-ui.html#/

##### Basins

In [2]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/basins-tree"

In [3]:
endpoint_response = requests.get(argo_data_url).json()

In [4]:
summary = json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 11, Types: int, Example value: '23617'
Key: name, Count: 11, Types: str, Example value: 'ARCTIC OCEAN'
Key: subBasins, Count: 11, Types: list, Example value: '[{'id': 24027, 'name': 'KARA SEA'}, {'id': 24029, 'name': 'BARENTS SEA'}, {'id': 24026, 'name': 'LAPTEV SEA'}, {'id': 24020, 'name': 'WHITE SEA'}, {'id': 24021, 'name': 'ICELAND SEA'}, {'id': 24024, 'name': 'NORWEGIAN SEA'}, {'id': 24025, 'name': 'EAST SIBERIAN SEA'}, {'id': 24028, 'name': 'NORTHWESTERN PASSAGES'}, {'id': 24019, 'name': 'DAVIS STRAIT'}, {'id': 24030, 'name': 'BAFFIN BAY'}, {'id': 24032, 'name': 'LINCOLN SEA'}, {'id': 24018, 'name': 'HUDSON BAY'}, {'id': 24023, 'name': 'BEAUFORT SEA'}, {'id': 36279, 'name': 'GREENLAND SEA'}, {'id': 24017, 'name': 'HUDSON STRAIT'}, {'id': 24022, 'name': 'CHUKCHI SEA'}, {'id': 26579, 'name': 'FRAM STRAIT'}]'
Key: subBasins.id, Count: 236, Types: int, Example value: '26579'
Key: subBasins.name, Count: 236, Types: str, Example value: 'FRAM STRAIT'


In [5]:
# Convert the summary dictionary to a pandas DataFrame
df_summ= pd.DataFrame([
    {
        "Property": key,
        "Count": info["count"],
        "Types": ', '.join(info["types"]),
        "Example": info["example"]
    }
    for key, info in summary.items()
])
df_summ.to_csv("properties/ARGO_JSONAPI_metadata_basins.csv", index=False)

This endpoint returns a list of ocean basins and their sub-basins,  
this can be visualized as:  
![image.png](images/ARGO_JSONAPI_oceanbasins.drawio.png)

##### Cycle by cycle-id

In [36]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/find-by-id/760094"

In [37]:
endpoint_response = requests.get(argo_data_url).json()

In [38]:
summary = json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 1, Types: int, Example value: '760094'
Key: cvNumber, Count: 1, Types: int, Example value: '1'
Key: startDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: endDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: coordinate, Count: 1, Types: dict, Example value: '{'lat': -28.184, 'lon': -22.997}'
Key: coordinate.lat, Count: 1, Types: float, Example value: '-28.184'
Key: coordinate.lon, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField, Count: 1, Types: dict, Example value: '{'x': -22.997, 'y': -28.184, 'coordinates': [-22.997, -28.184], 'type': 'Point'}'
Key: globalGeoShapeField.x, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField.y, Count: 1, Types: float, Example value: '-28.184'
Key: globalGeoShapeField.coordinates, Count: 1, Types: list, Example value: '[-22.997, -28.184]'
Key: globalGeoShapeField.type, Count: 1, Types: str, Example value: 'Point'
Key: platformCode, 

In [39]:
# Convert the summary dictionary to a pandas DataFrame
df_summ= pd.DataFrame([
    {
        "Property": key,
        "Count": info["count"],
        "Types": ', '.join(info["types"]),
        "Example": info["example"]
    }
    for key, info in summary.items()
])
df_summ.to_csv("properties/ARGO_JSONAPI_metadata_cycle.csv", index=False)

The endpoint returns metadata associated with the given cycle
this can be visualized as follows: 
![image.png](images/ARGO_JSONAPI_cycle.drawio.png)

##### Cycle by platformID and cvNumber

In [40]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/find-by-platformid/3900675/cvnumber/1"

In [41]:
endpoint_response = requests.get(argo_data_url).json()

In [42]:
summary = json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 1, Types: int, Example value: '760094'
Key: cvNumber, Count: 1, Types: int, Example value: '1'
Key: startDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: endDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: coordinate, Count: 1, Types: dict, Example value: '{'lat': -28.184, 'lon': -22.997}'
Key: coordinate.lat, Count: 1, Types: float, Example value: '-28.184'
Key: coordinate.lon, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField, Count: 1, Types: dict, Example value: '{'x': -22.997, 'y': -28.184, 'coordinates': [-22.997, -28.184], 'type': 'Point'}'
Key: globalGeoShapeField.x, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField.y, Count: 1, Types: float, Example value: '-28.184'
Key: globalGeoShapeField.coordinates, Count: 1, Types: list, Example value: '[-22.997, -28.184]'
Key: globalGeoShapeField.type, Count: 1, Types: str, Example value: 'Point'
Key: platformCode, 

Endpoint returns the metadata associated with a cycle that is defined by the given platformcode and cvNumber  
This metadata consists of the same properties as the metadata returned by the previous endpoint (https://dataselection.euro-argo.eu/api/find-by-id/{cycleID})  
And is visualized by the same diagram as above

No other endpoints, that allow Get requests, in the API documentation

### Analysis results:

- both endpoints (data API and metadata API) are json APIs,
- data findable and machine accessible 
- requires knowledge on API structure/model in order to understand and use data  
but this information is available with the Swagger API documentation 

- some of the Swagger APi documentation is not clear to non-domain experts  
  - e.g. some api paths require knowledge on the wmo-codes, platform-codes, ... 
  - the semantics are not clear

- Data returned by APIs:
  - 19045 Floats:
    - described by 39 properties (many more when taking nested properties into account (see diagram for overview))

  - 11 Basins:
    - described by 3 properties
    - 236 sub-basins

  - 900+ Cycles:
    - described by 30 properties (51 taking nested properties into account)
    - no exact number on total number of cycles

- Semantic ambiguity:
  - use of string/number codes as identifiers
  - limited/no use of persistent identifiers in data
  - limited/no use of external standard terms
  - sometimes unclear ambiguity of properties 