In [None]:
glider_names = [f"ifm{n:02d}" for n in range(1, 16)]
max_depl_num = 99

In [None]:
print(glider_names)

In [None]:
import requests
import re
import pandas as pd

In [None]:
def find_latest_deployment_url(glider_name, max_depl_num=max_depl_num):
    url_scheme = "https://gliderweb.geomar.de/html/{glider_name}/depl{deployment:02d}/diveinfo.txt"
    last_good_url = None
    for deployment in range(1, max_depl_num + 1):
        url = url_scheme.format(
            glider_name=glider_name,
            deployment=deployment,
        )
        status = requests.get(url).status_code
        if status == 404:
            break
        else:
            last_good_url = url
    return last_good_url

In [None]:
latest_deployment_urls = {
    glider_name: find_latest_deployment_url(glider_name)
    for glider_name in glider_names
}

In [None]:
latest_deployment_urls

In [None]:
dive_infos = {k: requests.get(v).content for k, v in latest_deployment_urls.items()}

In [None]:
def find_latest_position(dive_info):
    lines = dive_info.decode("utf8").split("\n")

    first_pos_line = next(
        (n for n in range(len(lines)) if lines[n] == "Position:"),
        None
    )
    
    if first_pos_line is not None:
        return lines[first_pos_line + 1]
    else:
        return ""

In [None]:
def parse_pos_string(pos_string):
    p = re.compile((
        r'(?P<latdeg>\d+)\^o\s*(?P<latmin>\d+\.\d+)\'(?P<latdir>[N,S])'
        r'\s*'
        r'(?P<londeg>\d+)\^o\s*(?P<lonmin>\d+\.\d+)\'(?P<londir>[W,E])'
    ))
    m = p.match(pos_string)
    if m is not None:
        lat = (
            (int(m.group("latdir") == "N") - int(m.group("latdir") == "S"))
            * int(m.group("latdeg")) + 1 / 60 * float(m.group("latmin"))
        )
        lon = (
            (int(m.group("londir") == "E") - int(m.group("londir") == "W"))
            * int(m.group("londeg")) + 1 / 60 * float(m.group("lonmin"))
        )
        return {"latitude": lat, "longitude": lon}
    else:
        return {"latitude": None, "longitude": None}

In [None]:
positions = {
    glider_name: parse_pos_string(find_latest_position(dive_info))
    for glider_name, dive_info in dive_infos.items()
}

In [None]:
positions = pd.DataFrame.from_dict(positions, orient="index")

In [None]:
positions["platform"] = positions.index

In [None]:
positions

In [None]:
positions.dropna().to_csv("data/gliders.csv")