# Read SO284 Drift Buoy positions

In [None]:
import pandas as pd
import datetime

In [None]:
def month_iterator(start_year, start_month, end_year, end_month):
    year, month = start_year, start_month
    while (year, month) <= (end_year, end_month):
        yield year, month
        year_diff, month_new = divmod(month, 12)
        year += year_diff
        month = month_new + 1
        

# test for single step
mi = list(month_iterator(2001, 1, 2001, 1))
assert mi[0] == (2001, 1)
assert mi[-1] == (2001, 1)
assert len(mi) == 1
        
        
# test for full year
mi = list(month_iterator(2001, 1, 2002, 1))
assert mi[0] == (2001, 1)
assert mi[-1] == (2002, 1)
assert len(mi) == 13

In [None]:
def url_iterator(start_year=None, start_month=None, end_year=None, end_month=None):
    for year, month in month_iterator(start_year, start_month, end_year, end_month):
        yield (
            f"https://data.geomar.de/realtime/data/sbd/300034013902340/"
            f"300034013902340_{year:04d}{month:02d}.pos"
        )

In [None]:
def read_db_file(file_name):
    try:
        df = pd.read_csv(
            file_name,
            skiprows=2,
            delim_whitespace=True,
        )

        df["Time"] = [
            datetime.datetime(*tup)
            for tup in zip(df["%YYYY"], df["MM"], df["DD"], df["hh"], df["mm"], df["ss"])
        ]

        df = df.drop(columns=[
            '%YYYY', 'MM', 'DD', 'hh', 'mm', 'ss', 
            'BattVolt', 'MOMSN', 'ParsingDateTime',
        ])

        df = df.set_index("Time")

        return df
    except:
        return None

In [None]:
urls = url_iterator(start_year=2021, start_month=7, end_year=2021, end_month=8)

In [None]:
dfs = filter(lambda df: df is not None, map(read_db_file, urls))

In [None]:
df = pd.concat(dfs)

In [None]:
df

In [None]:
df = df.resample("1H").mean()
df

In [None]:
df.to_csv("tmp_buoy_history.csv")

In [None]:
df.plot.scatter(x="Longitude", y="Latitude")