In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import utils
import xarray as xr


# Quinsy

In [2]:
sites_files = list(Path('/home/callum/Documents/hack/olamur_process/data_from_cruise/quinsy/LogFiles').glob('*Manual_Fix*.txt'))
quinsy = pd.DataFrame()
for fn in sites_files:
    quinsy_add = pd.read_csv(fn, parse_dates=[['Date', 'Time']])
    quinsy = pd.concat((quinsy, quinsy_add))
quinsy = quinsy[quinsy['A-Frame_Extended Longitude']<13]
quinsy.to_csv('data_cleaned/quinsy.csv', index=False)

# YUCO

In [3]:
yuco_files = list(Path('data_from_cruise/yuco/exports/YUCO-00410025').glob('*/*.csv'))

yuco = pd.DataFrame()
for fn in yuco_files:
    yuco_add = pd.read_csv(fn)
    yuco = pd.concat((yuco, yuco_add))
yuco.dropna(how='all', axis=1, inplace=True)
yuco = yuco[yuco['INX Latitude (�)'] > 50]
yuco = yuco[yuco['INX Latitude (�)'] < 55]

yuco = yuco[yuco['INX Longitude (�)'] > 10]
yuco = yuco[yuco['AUV Status'] =='MISSION']
# check the two lats, check good/bad gps
# oxygen is weird, spikes, steps
# salinity correction for oxgyen
# check pressure sensors
# altitude
# velocities are weird
yuco['datetime'] = pd.to_datetime(yuco['Timestamp (s) UTC+0'],  unit='s')
yuco = yuco.sort_values('datetime')
yuco.index = np.arange(len(yuco))

In [4]:
yuco.to_csv('data_cleaned/yuco.csv')

In [6]:
ds_yuco = utils.ds_from_df(yuco)
ds_yuco.attrs["instrument name"] = "Seaber YUCO AUV" 
ds_yuco.attrs["instrument serial"] = "YUCO-00410025"
ds_yuco.attrs["instrument calibration date"] = "2021-03-12"
date_str = ds_yuco.attrs["date_created"]
iso_str = date_str.split(".")[0].replace("-", "").replace(" ", "T").replace(":", "") + "Z"
fn = f"Baltic-sea_YUCO_OLAMUR-WP4_{iso_str}"
ds_yuco.attrs["dataset_id"] = fn
ds_yuco.attrs["title"] = fn
ds_yuco.attrs["summary"] = "AUV data collected during the Offshore Low-trophic Aquaculture in Multi-Use Scenario Realisation (OLAMUR) project 2023"
ds_yuco.to_netcdf(f"data_for_erddap/yuco/{fn}.nc")

# CTD

In [7]:
ctd_files = list(Path('data_from_cruise/ctd/').glob('*.TOB'))


In [8]:
def clean_csv(fn):
    with open(fn,encoding = "ISO-8859-1") as fp:
        for i, line in enumerate(fp):
            if 'Datasets' in line:
                variables_line = line
            if '[ Volt]' in line:
                units_line = line
            if '          1 ' in line:
                skips = i
                break
    variables = variables_line.split()[2:]
    units = units_line.replace(' ', '')[2:-2].split('][')
    var_names = ['sample [number]'] + [f'{var} [{unit}]' for var, unit in zip(variables, units)]
    df = pd.read_csv(fn, skiprows=skips,  encoding = "ISO-8859-1", names = var_names, parse_dates = [['IntD [Date]', 'IntT [Time]']], delim_whitespace=True)
    df = df.rename({'IntD [Date]_IntT [Time]': "datetime"}, axis=1)
    
    if not 'DO [μmol/L]' in list(df):
        df['DO [μmol/L]'] = df['DO_mg [mg/l]'] * 31.252
    cast_name = fn.name.split('.')[-2]
    cast_loc = cast_name.split('_')[-2]
    loc_df = quinsy[quinsy['[Mainline] Name']==cast_loc]
    df["latitude"] = loc_df['A-Frame_Extended Latitude'].mean()
    df["longitude"] = loc_df['A-Frame_Extended Longitude'].mean()
    fn_out = Path(f'data_cleaned/ctd/csv/{cast_name}.csv')
    df.to_csv(fn_out, index=False)
    df["cast_name"] = cast_name

    return df
df_ctd = pd.DataFrame()

for fn in ctd_files:
    df_add = clean_csv(fn)
    df_ctd = pd.concat((df_ctd, df_add))


In [9]:
for fn in ctd_files:
    df = clean_csv(fn)
    cast_name = fn.name.split('.')[-2][7:]
    if np.isnan(df.longitude).all():
        print("location not found, skipping:", cast_name)
        continue
    ds_ctd = utils.ds_from_df(df)
    ds_ctd.attrs["instrument name"] = "Sea & Sun Technology" 
    ds_ctd.attrs["instrument serial"] = "CTM1794"
    ds_ctd.attrs["instrument calibration date"] = "2021-07-28"
    date_str = ds_ctd.attrs["date_created"]
    iso_str = date_str.split(".")[0].replace("-", "").replace(" ", "T").replace(":", "") + "Z"
    fn = f"Baltic-sea_{cast_name}_OLAMUR-WP4_{iso_str}"
    ds_ctd.attrs["dataset_id"] = fn
    ds_ctd.attrs["title"] = fn
    ds_ctd.attrs["summary"] = "CTD data collected during the Offshore Low-trophic Aquaculture in Multi-Use Scenario Realisation (OLAMUR) project 2023"
    ds_ctd.to_netcdf(f"data_for_erddap/ctd/{fn}.nc")
    

location not found, skipping: 20230912_CTD_CAL_5
location not found, skipping: 20230912_CTD_CAL_3
location not found, skipping: 20230914_CTD_CAL_8
location not found, skipping: 20230912_CTD_CAL_1
location not found, skipping: 20230912_CTD_CAL_2
location not found, skipping: 20230914_CTD_CAL_7
location not found, skipping: 20230912_CTD_CAL_4
location not found, skipping: 20230914_CTD_CAL_6


In [11]:
df_ctd = df_ctd.sort_values("datetime")
df_ctd.index = np.arange(len(df_ctd))
df_ctd["cast_number"] = 0
for i, cast_name in enumerate(df_ctd.cast_name.unique()):
    df_ctd.loc[df_ctd.cast_name == cast_name, "cast_number"] = i

ds_ctd = utils.ds_from_df(df_ctd)

date_str = ds_ctd.attrs["date_created"]
iso_str = date_str.split(".")[0].replace("-", "").replace(" ", "T").replace(":", "") + "Z"
fn = f"Baltic-sea_all_ctd_OLAMUR-WP4_{iso_str}"
ds_ctd.attrs["dataset_id"] = fn
ds_ctd.attrs["title"] = fn
ds_ctd.attrs["summary"] = "CTD data collected during the Offshore Low-trophic Aquaculture in Multi-Use Scenario Realisation (OLAMUR) project"

#ds_ctd.to_netcdf(f"data_for_erddap/ctd/{fn}.nc")

In [12]:
df_clean = ds_ctd.to_pandas()
df_clean["datetime"] = df_clean.index

In [13]:
ds = xr.Dataset()
casts = df_ctd.cast_number.unique()
box_depth = 1
pressure_bins = np.arange(0.5, 31.5, box_depth)
ds['cast_number'] = ('cast_number', casts, {"name": "cast_number"})
ds['pressure_bin'] = ('pressure_bin',pressure_bins, {"name": "pressure_bin"})
vars = list(ds_ctd)
vars.append("datetime")

for name in vars:
    if "cast" in name:
        continue
    if name == "datetime":
        values = np.empty((len(pressure_bins), len(casts)), dtype=datetime.datetime)
    else:
        values = np.empty((len(pressure_bins), len(casts)))
    pressure = df_clean['pressure']
    for i, cast_num in enumerate(casts):
        df_cast = df_clean[df_clean.cast_number == cast_num]
        for j, pressure_centre in enumerate(pressure_bins):
            max_pressure = pressure_centre + 0.5 * box_depth
            min_pressure = pressure_centre - 0.5 * box_depth
            min_box = df_cast[df_cast.pressure >= min_pressure]
            box = min_box[min_box.pressure < max_pressure]
            if box[name].dtype =='<M8[ns]':
                try:
                    values[j, i] = box[name].values[0]
                except:
                    values[j, i] = None
            else:
                values[j, i] = np.nanmean(box[name])
    ds[name] = (('pressure_bin', 'cast_number'), values, utils.attrs_dict[name])
ds.attrs = utils.attrs
ds_ctd_gridded = ds
#ds_ctd_gridded.to_netcdf("data_cleaned/ctd/nc/ctd_gridded.nc")

  values[j, i] = np.nanmean(box[name])


KeyError: 'TEMP'