# Download buoy data

We'll download all realtime buoy data from <ftp://ftp.pmel.noaa.gov/high_resolution/realtime/cdf/> (need to be logged in to see this directory).

For now, we'll get:
- temperature `t[0-9]*hr.cdf*`
- currents `cur*hr.cdf*`
- winds `w[0-9]*hr.cdf*`

In [None]:
# parameters
buoy_file_name = "tmp_buoy_data"  # ....nc / ....csv
raw_data_dir = "raw_buoy_data"
buoy_positions_file = "tmp_buoy_positions.csv"

## Tech preamble

In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import hvplot.pandas, hvplot.xarray
from pathlib import Path
from functools import reduce

## Download all data

In [None]:
wget_call = (
    'wget -e robots=off -nv -r -c -np -nH --cut-dirs=3'
    ' --user="$TAO_FTP_USER" --password="$TAO_FTP_PASS"'
    ' --accept "t[0-9]*hr.cdf*,cur*hr.cdf*,w[0-9]*hr.cdf*"'
    f' -P "{raw_data_dir}"'
    ' "ftp://ftp.pmel.noaa.gov/high_resolution/realtime/cdf/"'
)
wget_call_w_cred_sourcing = f"'source .ftp_cred && {wget_call}'"
wget_call = f"'{wget_call}'"

In [None]:
!rm -rfv {raw_data_dir}
!mkdir -p {raw_data_dir}
# try download with credentials from the env vars:
!bash -c {wget_call}
# if this fails, sourcing the credentials from a file may work:
!bash -c {wget_call_w_cred_sourcing}
!gunzip -v {raw_data_dir}/*.gz

## Find all data files and load separately

In [None]:
data_files = list(sorted(Path(raw_data_dir).glob("*hr.cdf")))
data_files

In [None]:
len(data_files)

In [None]:
# use Dask backend, because without, merging seems slow?
data_sets = {
    p.name: xr.open_dataset(p, chunks={}) for p in data_files
}

## Merge into one xarray Dataset

In [None]:
ds = reduce(lambda d0, d1: xr.merge((d0, d1)), data_sets.values())

In [None]:
ds = ds.compute()

In [None]:
display(ds)

In [None]:
ds.to_netcdf(f"{buoy_file_name}.nc")

In [None]:
!ncdump -h {buoy_file_name}.nc

## Cast to Pandas dataframes and save as one CSV file

In [None]:
dfs = [ds.to_dataframe().reset_index() for ds in data_sets.values()]
df = pd.concat(dfs, ignore_index=True)

In [None]:
display(df)

In [None]:
df.to_csv(f"{buoy_file_name}.csv", index=False)

In [None]:
!head -n10 {buoy_file_name}.csv

## Extract buoy positions and save to file

In [None]:
buoy_positions = df.groupby(["lat", "lon"]).size().reset_index().drop(columns=0)
buoy_positions = buoy_positions.sort_values(["lat", "lon"], ascending=False)
buoy_positions = buoy_positions.reset_index(drop=True)
buoy_positions

In [None]:
buoy_positions.to_csv(buoy_positions_file, index=False)

## Some sanity check: Plot time series

In [None]:
df = pd.read_csv(f"{buoy_file_name}.csv")
df = df.set_index(["lat", "lon", "depth"])
display(df)

In [None]:
lat, lon, depth = df.index[0]

In [None]:
(
    df.loc[(lat, lon, depth)].hvplot.line("time", "U_320")
    * df.loc[(lat, lon, depth)].hvplot.line("time", "V_321")
)

In [None]:
ds = xr.open_dataset(f"{buoy_file_name}.nc")
display(ds)

In [None]:
(
    ds.sel(lat=lat, lon=lon, depth=depth, drop=True)["U_320"].hvplot.line()
    * ds.sel(lat=lat, lon=lon, depth=depth, drop=True)["V_321"].hvplot.line()
)

---

In [None]:
!echo "Finished: $(date -Ins) UTC"

---
See https://github.com/willirath/nia-prediction-low-latitudes for details.