# Download Pirata current data

In [None]:
# parameters
pirata_file_name = "tmp_pirata_currents"  # ....nc / ....csv

## Tech preamble

In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import hvplot.pandas, hvplot.xarray
from pathlib import Path
from functools import reduce

## Download all data

In [None]:
!rm -rfv "pirata_data/"
!mkdir -p "pirata_data/"
!bash -c 'wget -e robots=off -nv -r -c -np -nH --cut-dirs=3 --user="$TAO_FTP_USER" --password="$TAO_FTP_PASS" --accept "cur*cdf*" -P "pirata_data" "ftp://ftp.pmel.noaa.gov/high_resolution/realtime/cdf/"'
!bash -c 'source .ftp_cred && wget -e robots=off -nv -r -c -np -nH --cut-dirs=3 --user="$TAO_FTP_USER" --password="$TAO_FTP_PASS" --accept "cur*cdf*" -P "pirata_data" "ftp://ftp.pmel.noaa.gov/high_resolution/realtime/cdf/"'
!gunzip -v pirata_data/*.gz

## Find all data files and load separately

In [None]:
data_files = list(sorted(Path("pirata_data/").glob("cur*cdf")))
data_files

In [None]:
len(data_files)

In [None]:
# use Dask backend, because without, merging seems slow?
data_sets = {
    p.name: xr.open_dataset(p, chunks={}) for p in data_files
}

## Merge into one xarray Dataset

In [None]:
ds = reduce(lambda d0, d1: xr.merge((d0, d1)), data_sets.values())

In [None]:
ds = ds.compute()

In [None]:
display(ds)

In [None]:
ds.to_netcdf(f"{pirata_file_name}.nc")

In [None]:
!ncdump -h {pirata_file_name}.nc

## Cast to Pandas dataframes and save as one CSV file

In [None]:
dfs = [ds.to_dataframe().reset_index() for ds in data_sets.values()]
df = pd.concat(dfs, ignore_index=True)

In [None]:
display(df)

In [None]:
df.to_csv(f"{pirata_file_name}.csv", index=False)

In [None]:
!head -n10 pirata_currents.csv

## Some sanity check: Plot time series

In [None]:
df = pd.read_csv(f"{pirata_file_name}.csv")
display(df)

In [None]:
# df = df.set_index(["lat", "lon", "depth"])
lat, lon, depth = df.index[0]

In [None]:
(
    df.loc[(lat, lon, depth)].hvplot.line("time", "U_320")
    * df.loc[(lat, lon, depth)].hvplot.line("time", "V_321")
)

In [None]:
ds = xr.open_dataset(f"{pirata_file_name}.nc")
display(ds)

In [None]:
(
    ds.sel(lat=lat, lon=lon, depth=depth, drop=True).hvplot.line(x="time", y="U_320")
    * ds.sel(lat=lat, lon=lon, depth=depth, drop=True).hvplot.line(x="time", y="V_321")
)