In [1]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import utils
import xarray as xr


# Quinsy

In [2]:
sites_files = list(Path('/home/callum/Documents/hack/olamur_process/data_from_cruise/quinsy/LogFiles').glob('*Manual_Fix*.txt'))
quinsy = pd.DataFrame()
for fn in sites_files:
    quinsy_add = pd.read_csv(fn, parse_dates=[['Date', 'Time']])
    quinsy = pd.concat((quinsy, quinsy_add))
quinsy = quinsy[quinsy['A-Frame_Extended Longitude']<13]
quinsy.to_csv('data_cleaned/quinsy.csv', index=False)

# YUCO

In [3]:
yuco_files = list(Path('data_from_cruise/yuco/exports/YUCO-00410025').glob('*/*.csv'))

yuco = pd.DataFrame()
for fn in yuco_files:
    yuco_add = pd.read_csv(fn)
    yuco_add.columns = yuco_add.columns.str.replace('�', 'deg')
    yuco = pd.concat((yuco, yuco_add))
yuco.dropna(how='all', axis=1, inplace=True)
yuco = yuco[yuco['INX Latitude (deg)'] > 50]
yuco = yuco[yuco['INX Latitude (deg)'] < 55]

yuco = yuco[yuco['INX Longitude (deg)'] > 10]
yuco = yuco[yuco['AUV Status'] =='MISSION']
# check the two lats, check good/bad gps
# oxygen is weird, spikes, steps
# salinity correction for oxgyen
# check pressure sensors
# altitude
# velocities are weird
yuco['datetime'] = pd.to_datetime(yuco['Timestamp (s) UTC+0'],  unit='s')
yuco = yuco.sort_values('datetime')
yuco.index = np.arange(len(yuco))

In [4]:
yuco.to_csv('data_cleaned/yuco.csv')

406556 rows × 42 columns

In [5]:
yuco

Unnamed: 0,Timestamp (s) UTC+0,Time since startup (s),AUV Status,Step Number,At surface (Y/N),INX Latitude (deg),INX Longitude (deg),Corrected Latitude (deg),Corrected Longitude (deg),GPS Coordinates Accepted (Y/N),...,Legato3 Turbidity 01 (NTU),Legato3 Chlorophyll (?),Internal Temperature (degC),External Temperature (degC),Battery Charge (%),Legato3 Cond. Cell Temp. (degC),Legato3 Temperature (degC),Legato3 ODO Temperature (degC),Legato3 Turbidity (?),datetime
0,1.694525e+09,437.920,MISSION,0.0,Y,54.977700,12.784680,,,Y,...,,,,19.97,95.0,,,,,2023-09-12 13:30:31.416000000
1,1.694525e+09,437.971,MISSION,1.0,Y,54.977700,12.784680,,,Y,...,,0.0,,19.97,95.0,23.779,19.2308,19.3089,0.0,2023-09-12 13:30:31.466000128
2,1.694525e+09,438.010,MISSION,1.0,Y,54.977700,12.784680,,,Y,...,,0.0,28.48,19.97,95.0,23.779,19.2308,19.3089,0.0,2023-09-12 13:30:31.506000128
3,1.694525e+09,438.070,MISSION,1.0,Y,54.977700,12.784680,,,Y,...,,0.0,,19.97,95.0,23.779,19.2289,19.3089,0.0,2023-09-12 13:30:31.565999872
4,1.694525e+09,438.120,MISSION,1.0,Y,54.977700,12.784680,,,Y,...,,,,19.97,95.0,,,,,2023-09-12 13:30:31.616000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540399,1.717064e+09,2769.822,MISSION,22.0,N,54.979233,12.785708,54.979233,12.785708,N,...,0.0,0.0,,,,,,,,2024-05-30 10:20:29.318000128
540400,1.717064e+09,2769.873,MISSION,22.0,N,54.979233,12.785708,54.979233,12.785708,N,...,0.0,0.0,,,,,,,,2024-05-30 10:20:29.368999936
540401,1.717064e+09,2769.922,MISSION,22.0,N,54.979234,12.785708,54.979330,12.785191,N,...,,,,,,,,,,2024-05-30 10:20:29.417999872
540402,1.717064e+09,2769.974,MISSION,22.0,N,54.979235,12.785708,54.979330,12.785191,N,...,0.0,0.0,,,,,,,,2024-05-30 10:20:29.469000192


In [6]:
ds_yuco = utils.ds_from_df(yuco)
ds_yuco.attrs["instrument name"] = "Seaber YUCO AUV" 
ds_yuco.attrs["instrument serial"] = "YUCO-00410025"
ds_yuco.attrs["instrument calibration date"] = "2021-03-12"
date_str = ds_yuco.attrs["date_created"]
iso_str = date_str.split(".")[0].replace("-", "").replace(" ", "T").replace(":", "") + "Z"
fn = f"Baltic-sea_YUCO_OLAMUR-WP4_{iso_str}"
ds_yuco.attrs["dataset_id"] = fn
ds_yuco.attrs["title"] = fn
ds_yuco.attrs["summary"] = "AUV data collected during the Offshore Low-trophic Aquaculture in Multi-Use Scenario Realisation (OLAMUR) project 2023"
ds_yuco.to_netcdf(f"data_for_erddap/yuco/{fn}.nc")

# CTD

In [7]:
ctd_files = list(Path('data_from_cruise/ctd/').glob('*.TOB'))


In [8]:
positions = pd.read_csv('data_from_cruise/ctd/Positions.txt', sep=' - ', names=['CTD_id', 'location', 'comment'], engine='python')

loc_table = positions.location.str.split(expand=True)
positions['lat'] = loc_table[0].astype(int) + loc_table[1].str[:-2].astype(float) / 60
positions['lon'] = loc_table[2].astype(int) + loc_table[3].str[:-1].astype(float) / 60

positions

Unnamed: 0,CTD_id,location,comment,lat,lon
0,I6040950_7,55 08.923'N 13 25.789',Just CTD,55.148717,13.429817
1,I6040950_10,55 08.914'N 13 25.814',Samples & YUCO,55.148567,13.430233
2,I6040950_11,55 08.353'N 12 57.537',SB deployment,55.139217,12.95895
3,I6040950_12,55 07.815'N 12 52.641',Just CTD,55.13025,12.87735


In [9]:
def clean_csv(fn):
    with open(fn,encoding = "ISO-8859-1") as fp:
        for i, line in enumerate(fp):
            if 'Datasets' in line:
                variables_line = line
            if '[ Volt]' in line:
                units_line = line
            if '          1 ' in line:
                skips = i
                break
    variables = variables_line.split()[2:]
    units = units_line.replace(' ', '')[2:-2].split('][')
    var_names = ['sample [number]'] + [f'{var} [{unit}]' for var, unit in zip(variables, units)]
    df = pd.read_csv(fn, skiprows=skips,  encoding = "ISO-8859-1", names = var_names, parse_dates = [['IntD [Date]', 'IntT [Time]']], delim_whitespace=True)
    df = df.rename({'IntD [Date]_IntT [Time]': "datetime"}, axis=1)
    
    if not 'DO [μmol/L]' in list(df):
        df['DO [μmol/L]'] = df['DO_mg [mg/l]'] * 31.252
    cast_name = fn.name.split('.')[-2]
    cast_loc = cast_name.split('_')[-2]
    if 'I60' in cast_name:
        df["latitude"] = positions[positions.CTD_id==cast_name].lat.mean()
        df["longitude"] = positions[positions.CTD_id==cast_name].lon.mean()
    else:
        loc_df = quinsy[quinsy['[Mainline] Name']==cast_loc]
        df["latitude"] = loc_df['A-Frame_Extended Latitude'].mean()
        df["longitude"] = loc_df['A-Frame_Extended Longitude'].mean()
    fn_out = Path(f'data_cleaned/ctd/csv/{cast_name}.csv')
    df.to_csv(fn_out, index=False)
    df["cast_name"] = cast_name

    return df
df_ctd = pd.DataFrame()

for fn in ctd_files:
    df_add = clean_csv(fn)
    df_ctd = pd.concat((df_ctd, df_add))


In [10]:
for fn in ctd_files:
    df = clean_csv(fn)
    cast_name = fn.name.split('.')[-2][7:]
    if np.isnan(df.longitude).all():
        print("location not found, skipping:", cast_name)
        continue
    ds_ctd = utils.ds_from_df(df)
    ds_ctd.attrs["instrument name"] = "Sea & Sun Technology" 
    ds_ctd.attrs["instrument serial"] = "CTM1794"
    ds_ctd.attrs["instrument calibration date"] = "2021-07-28"
    date_str = ds_ctd.attrs["date_created"]
    iso_str = date_str.split(".")[0].replace("-", "").replace(" ", "T").replace(":", "") + "Z"
    fn = f"Baltic-sea_{cast_name}_OLAMUR-WP4_{iso_str}"
    ds_ctd.attrs["dataset_id"] = fn
    ds_ctd.attrs["title"] = fn
    ds_ctd.attrs["summary"] = "CTD data collected during the Offshore Low-trophic Aquaculture in Multi-Use Scenario Realisation (OLAMUR) project 2023-2024"
    ds_ctd.to_netcdf(f"data_for_erddap/ctd/{fn}.nc")
    

location not found, skipping: 20230912_CTD_CAL_5
location not found, skipping: 20230912_CTD_CAL_3
location not found, skipping: 20230914_CTD_CAL_8
location not found, skipping: 20230912_CTD_CAL_1
location not found, skipping: 20230912_CTD_CAL_2
location not found, skipping: 20230914_CTD_CAL_7
location not found, skipping: 20230912_CTD_CAL_4
location not found, skipping: 20230914_CTD_CAL_6


# gridded ctd data

"""
ds = xr.Dataset()
casts = df_ctd.cast_number.unique()
box_depth = 1
pressure_bins = np.arange(0.5, 31.5, box_depth)
ds['cast_number'] = ('cast_number', casts, {"name": "cast_number"})
ds['pressure_bin'] = ('pressure_bin',pressure_bins, {"name": "pressure_bin"})
vars = list(ds_ctd)
vars.append("datetime")

for name in vars:
    if "cast" in name:
        continue
    if name == "datetime":
        values = np.empty((len(pressure_bins), len(casts)), dtype=datetime.datetime)
    else:
        values = np.empty((len(pressure_bins), len(casts)))
    pressure = df_clean['pressure']
    for i, cast_num in enumerate(casts):
        df_cast = df_clean[df_clean.cast_number == cast_num]
        for j, pressure_centre in enumerate(pressure_bins):
            max_pressure = pressure_centre + 0.5 * box_depth
            min_pressure = pressure_centre - 0.5 * box_depth
            min_box = df_cast[df_cast.pressure >= min_pressure]
            box = min_box[min_box.pressure < max_pressure]
            if box[name].dtype =='<M8[ns]':
                try:
                    values[j, i] = box[name].values[0]
                except:
                    values[j, i] = None
            else:
                values[j, i] = np.nanmean(box[name])
    ds[name] = (('pressure_bin', 'cast_number'), values, utils.attrs_dict[name])
ds.attrs = utils.attrs
ds_ctd_gridded = ds
#ds_ctd_gridded.to_netcdf("data_cleaned/ctd/nc/ctd_gridded.nc")
"""