In [2]:
import pandas as pd
import requests
from collections import defaultdict
import json

### Argo metadata & data API

#### Notes/findings:
- both json API, no RDF ~ not directly machine accessible, requires getting to know the data model
- Swagger APi documentation avaiable 
  - the semantics are not clear
  - requires knowledge on the wmo-codes, platform-codes, ... 
- limited/no use of standard terms in output of '/floats' path  
  
- data granularity is unclear

#### ARGO metadata API

API points to: "ARGO Floats metadata"

API documentation (swagger): https://fleetmonitoring.euro-argo.eu/swagger-ui.html

In [24]:
# Analysed endpoint (selected from API docs)
argo_metadata_url = "https://fleetmonitoring.euro-argo.eu/floats"

In [4]:
#Helper function
def summarize_json_keys(data, summary=None, parent_key=""):
    """
    Recursively summarizes all keys in a JSON object.

    :param data: The JSON data (as a Python dictionary or list).
    :param summary: A dictionary to store the summary of keys (default: None).
    :param parent_key: A string representing the parent key (for nested keys).
    :return: A dictionary summarizing the occurrence and types of all keys.
    """
    if summary is None:
        summary = defaultdict(lambda: {"count": 0, "types": set(), "example": "", "wmo": set()})

    if isinstance(data, dict):
        for key, value in data.items():
            # Create a full key path for nested keys
            full_key = f"{parent_key}.{key}" if parent_key else key
            summary[full_key]["count"] += 1
            summary[full_key]["types"].add(type(value).__name__)
            summary[full_key]["example"] = value
            if key == "wmo":
                summary[full_key]["wmo"].add(value)
            summarize_json_keys(value, summary, full_key)

    elif isinstance(data, list):
        for item in data:
            summarize_json_keys(item, summary, parent_key)

    return summary

In [25]:
endpoint_response = requests.get(argo_metadata_url).json()

In [6]:
# number of objects offered by json API
print(f"The root url of this API returns a list of JSON objects, each representing a ARGO float. \nThe number of floats: {len(endpoint_response)} \nEach float is described through the listed properties (also visualized further along).")

The root url of this API returns a list of JSON objects, each representing a ARGO float. 
The number of floats: 19032 
Each float is described through the listed properties (also visualized further along).


In [26]:
summary = summarize_json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

# Convert the summary dictionary to a pandas DataFrame
df_summary = pd.DataFrame([
    {
        "Property": key,
        "Count": info["count"],
        "Types": ', '.join(info["types"]),
        "Example": info["example"]
    }
    for key, info in summary.items()
])

df_summary

Key: wmo, Count: 19032, Types: str, Example value: '7902194'
Key: serialInst, Count: 19032, Types: str, NoneType, Example value: 'AI2632-23EU018'
Key: serialIMEI, Count: 19032, Types: NoneType, Example value: 'None'
Key: platform, Count: 19032, Types: dict, Example value: '{'code': '7902194', 'name': 'ARVOR-DO Profiling Float', 'description': 'Argo POLAND', 'comment': None, 'activity': None, 'type': 'ARVOR'}'
Key: platform.code, Count: 19032, Types: str, Example value: '7902194'
Key: platform.name, Count: 19032, Types: str, Example value: 'ARVOR-DO Profiling Float'
Key: platform.description, Count: 19032, Types: str, Example value: 'Argo POLAND'
Key: platform.comment, Count: 19032, Types: str, NoneType, Example value: 'None'
Key: platform.activity, Count: 19032, Types: str, NoneType, Example value: 'None'
Key: platform.type, Count: 19032, Types: str, NoneType, Example value: 'ARVOR'
Key: sensors, Count: 19032, Types: list, Example value: '[]'
Key: parameters, Count: 19032, Types: list,

Unnamed: 0,Property,Count,Types,Example
0,wmo,19032,str,7902194
1,serialInst,19032,"str, NoneType",AI2632-23EU018
2,serialIMEI,19032,NoneType,
3,platform,19032,dict,"{'code': '7902194', 'name': 'ARVOR-DO Profilin..."
4,platform.code,19032,str,7902194
...,...,...,...,...
1116,configurations.missionCycles.9992,1,list,[9992]
1117,configurations.missionCycles.9995,1,list,[9995]
1118,configurations.missionCycles.9990,1,list,[9990]
1119,configurations.missionCycles.9988,1,list,[9988]


In [10]:
#get list of wmo codes:
wmo_codes = list(summary['wmo']['wmo'])

In [27]:
# Analysed endpoint (selected from API docs)
#argo_metadata_basic_url = "https://fleetmonitoring.euro-argo.eu/floats/basic/{wmo}"
#argo_metadata_detail_url = "https://fleetmonitoring.euro-argo.eu/floats/{wmo}"

for wmo in wmo_codes[0:1]: #assuming properties the same for different floats (higly likely since also the case for properties in root url of api)
    detail_url = f"https://fleetmonitoring.euro-argo.eu/floats/{wmo}"
    print(detail_url)
    endpoint_response = requests.get(detail_url).json()

    summ = summarize_json_keys(endpoint_response)

    for key, info in summ.items():
        print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")
    
    # Convert the summary dictionary to a pandas DataFrame
    df_summ= pd.DataFrame([
        {
            "Property": key,
            "Count": info["count"],
            "Types": ', '.join(info["types"]),
            "Example": info["example"]
        }
        for key, info in summ.items()
    ])

df_summ

https://fleetmonitoring.euro-argo.eu/floats/3900675
Key: wmo, Count: 1, Types: str, Example value: '3900675'
Key: serialInst, Count: 1, Types: str, Example value: 'SL824'
Key: serialIMEI, Count: 1, Types: NoneType, Example value: 'None'
Key: platform, Count: 1, Types: dict, Example value: '{'code': '3900675', 'name': 'SOLO Profiling Float SBE_SL824', 'description': 'Argo WHOI', 'comment': None, 'activity': None, 'type': 'SOLO_W'}'
Key: platform.code, Count: 1, Types: str, Example value: '3900675'
Key: platform.name, Count: 1, Types: str, Example value: 'SOLO Profiling Float SBE_SL824'
Key: platform.description, Count: 1, Types: str, Example value: 'Argo WHOI'
Key: platform.comment, Count: 1, Types: NoneType, Example value: 'None'
Key: platform.activity, Count: 1, Types: NoneType, Example value: 'None'
Key: platform.type, Count: 1, Types: str, Example value: 'SOLO_W'
Key: sensors, Count: 1, Types: list, Example value: '[{'dimLevel': 1, 'id': 'CTD_TEMP', 'maker': 'SBE', 'model': 'SBE41CP

Unnamed: 0,Property,Count,Types,Example
0,wmo,1,str,3900675
1,serialInst,1,str,SL824
2,serialIMEI,1,NoneType,
3,platform,1,dict,"{'code': '3900675', 'name': 'SOLO Profiling Fl..."
4,platform.code,1,str,3900675
...,...,...,...,...
350,firstCycle.lastStation.direction,1,str,A
351,firstCycle.lastStation.date,1,str,2011-05-16T12:47:43.000+00:00
352,firstCycle.lastStation.profiles,1,list,[]
353,firstCycle.lastStation.lastProfile,1,NoneType,


Analyses of both endpoints gives a view on the properties associated with ARGO floats and is visualized as follows:
![image.png](images/ARGO_JSONAPI_Float.drawio.png)

#### ARGO data API

API points to: "ARGO Floats data"

API documentation (swagger): https://dataselection.euro-argo.eu/swagger-ui.html#/

##### Basins

In [32]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/basins-tree"

In [33]:
endpoint_response = requests.get(argo_data_url).json()

In [22]:
summary = summarize_json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 11, Types: int, Example value: '23617'
Key: name, Count: 11, Types: str, Example value: 'ARCTIC OCEAN'
Key: subBasins, Count: 11, Types: list, Example value: '[{'id': 24027, 'name': 'KARA SEA'}, {'id': 24029, 'name': 'BARENTS SEA'}, {'id': 24026, 'name': 'LAPTEV SEA'}, {'id': 24020, 'name': 'WHITE SEA'}, {'id': 24021, 'name': 'ICELAND SEA'}, {'id': 24024, 'name': 'NORWEGIAN SEA'}, {'id': 24025, 'name': 'EAST SIBERIAN SEA'}, {'id': 24028, 'name': 'NORTHWESTERN PASSAGES'}, {'id': 24019, 'name': 'DAVIS STRAIT'}, {'id': 24030, 'name': 'BAFFIN BAY'}, {'id': 24032, 'name': 'LINCOLN SEA'}, {'id': 24018, 'name': 'HUDSON BAY'}, {'id': 24023, 'name': 'BEAUFORT SEA'}, {'id': 36279, 'name': 'GREENLAND SEA'}, {'id': 24017, 'name': 'HUDSON STRAIT'}, {'id': 24022, 'name': 'CHUKCHI SEA'}, {'id': 26579, 'name': 'FRAM STRAIT'}]'
Key: subBasins.id, Count: 236, Types: int, Example value: '26579'
Key: subBasins.name, Count: 236, Types: str, Example value: 'FRAM STRAIT'


This endpoint returns a list of ocean basins and their sub-basins,  
this can be visualized as:  
![image.png](images/ARGO_JSONAPI_oceanbasins.drawio.png)

##### Cycle by cycle-id

In [28]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/find-by-id/760094"

In [29]:
endpoint_response = requests.get(argo_data_url).json()

In [30]:
summary = summarize_json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 1, Types: int, Example value: '760094'
Key: cvNumber, Count: 1, Types: int, Example value: '1'
Key: startDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: endDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: coordinate, Count: 1, Types: dict, Example value: '{'lat': -28.184, 'lon': -22.997}'
Key: coordinate.lat, Count: 1, Types: float, Example value: '-28.184'
Key: coordinate.lon, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField, Count: 1, Types: dict, Example value: '{'x': -22.997, 'y': -28.184, 'coordinates': [-22.997, -28.184], 'type': 'Point'}'
Key: globalGeoShapeField.x, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField.y, Count: 1, Types: float, Example value: '-28.184'
Key: globalGeoShapeField.coordinates, Count: 1, Types: list, Example value: '[-22.997, -28.184]'
Key: globalGeoShapeField.type, Count: 1, Types: str, Example value: 'Point'
Key: platformCode, 

The endpoint returns metadata associated with the given cycle
this can be visualized as follows: 
![image.png](images/ARGO_JSONAPI_cycle.drawio.png)

##### Cycle by platformID and cvNumber

In [36]:
# Analysed endpoint (selected from API docs)
argo_data_url = "https://dataselection.euro-argo.eu/api/find-by-platformid/3900675/cvnumber/1"

In [37]:
endpoint_response = requests.get(argo_data_url).json()

In [38]:
summary = summarize_json_keys(endpoint_response)

for key, info in summary.items():
    print(f"Key: {key}, Count: {info['count']}, Types: {', '.join(info['types'])}, Example value: '{info['example']}'")

Key: id, Count: 1, Types: int, Example value: '760094'
Key: cvNumber, Count: 1, Types: int, Example value: '1'
Key: startDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: endDate, Count: 1, Types: str, Example value: '2008-03-23T02:00:54.000+0000'
Key: coordinate, Count: 1, Types: dict, Example value: '{'lat': -28.184, 'lon': -22.997}'
Key: coordinate.lat, Count: 1, Types: float, Example value: '-28.184'
Key: coordinate.lon, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField, Count: 1, Types: dict, Example value: '{'x': -22.997, 'y': -28.184, 'coordinates': [-22.997, -28.184], 'type': 'Point'}'
Key: globalGeoShapeField.x, Count: 1, Types: float, Example value: '-22.997'
Key: globalGeoShapeField.y, Count: 1, Types: float, Example value: '-28.184'
Key: globalGeoShapeField.coordinates, Count: 1, Types: list, Example value: '[-22.997, -28.184]'
Key: globalGeoShapeField.type, Count: 1, Types: str, Example value: 'Point'
Key: platformCode, 

Endpoint returns the metadata associated with a cycle that is defined by the given platformcode and cvNumber  
This metadata consists of the same properties as the metadata returned by the previous endpoint (https://dataselection.euro-argo.eu/api/find-by-id/{cycleID})  
And si visualized by the same diagram as above

No other endpoints, that allow Get requests, in the API documentation