In [4]:

from pathlib import Path
import itertools as it
import datetime as dt
from dateutil import parser as dtparser
import pandas as pd
import fsspec
import xarray as xr
import matplotlib.pyplot as plt
import hvplot.xarray
import csv
import echopype as ep
import os
import warnings
from echopype import open_raw
import dask
from dask.distributed import Client
from tqdm import tqdm
import requests

warnings.simplefilter("ignore", category=DeprecationWarning)
fs = fsspec.filesystem('https')

In [5]:
from importlib.metadata import version
version('echopype')

'0.8.1'


Making Data folder in current working directory to store files. Please note this folder won't be pushed on github as it's included in .gitignore.
From now on the code will use 'current_directory' variable for accessing the files, and 'data_folder' for accessing data files.


In [6]:

# Get the current working directory
current_directory = os.getcwd()

# Define the relative path to the "data" folder
data_folder = os.path.join(current_directory, 'data')

# If the "data" folder doesn't exist, create it
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

In [7]:
ooi_raw_url = (
    "https://rawdata.oceanobservatories.org/files/"
    "CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/"
)

In [8]:
from datetime import datetime
def in_range(raw_file: str, start: dt.datetime, end: dt.datetime) -> bool:
    """Check if file url is in datetime range"""
    print(raw_file)
    file_name = Path(raw_file).name
    print('File Name')
    print(file_name)
    if 'OOI-' in file_name :
        file_datetime = dtparser.parse(file_name, fuzzy=True)
    #else :
    #    format_string = "ZPLSCB102-D%Y%m%d-T%H%M%S.raw"
    #    file_datetime = datetime.strptime(file_name, format_string)
        print(file_datetime)
        return file_datetime >= start and file_datetime <= end
    return False

In [9]:
start_datetime = dt.datetime(2017, 8, 21, 0, 0)
end_datetime = dt.datetime(2017, 8, 22, 0, 0)


In [10]:
desired_day_urls = []
for year in range(start_datetime.year, end_datetime.year + 1):
    for month in range(start_datetime.month, end_datetime.month + 1):
        for day  in range(start_datetime.day, end_datetime.day + 1):
            if len(str(day)) == 2 and len(str(month)) ==2 :
                desired_day_urls.append(ooi_raw_url+ str(year) + '/' + str(month) + '/' + str(day))
            elif len(str(day)) == 2 and len(str(month)) ==1 :
                desired_day_urls.append(ooi_raw_url+ str(year) + '/0' + str(month) + '/' + str(day))
            elif len(str(day)) == 1 and len(str(month)) ==2 :
                desired_day_urls.append(ooi_raw_url+ str(year) + '/' + str(month) + '/0' + str(day))
            elif len(str(day)) == 1 and len(str(month)) ==1 :
                desired_day_urls.append(ooi_raw_url+ str(year) + '/0' + str(month) + '/0' + str(day))

In [11]:
#all_raw_file_urls = it.chain.from_iterable([fs.glob(f"{day_url}/*.raw") for day_url in desired_day_urls])
all_raw_file_urls = it.chain.from_iterable([fs.glob(f"{day_url}/*.raw") for day_url in desired_day_urls if  requests.get(day_url).status_code ==200 ])

In [12]:
desired_raw_file_urls = list(filter(
    lambda raw_file: in_range(
        raw_file,
        start_datetime-dt.timedelta(hours=0),  # 3 hour buffer to select files
        end_datetime+dt.timedelta(hours=0)
    ),
    all_raw_file_urls
))
print(f"There are {len(desired_raw_file_urls)} raw files within the specified datetime range.")

https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T000000.raw
File Name
OOI-D20170821-T000000.raw
2017-08-21 00:00:00
https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T013914.raw
File Name
OOI-D20170821-T013914.raw
2017-08-21 01:39:14
https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T031816.raw
File Name
OOI-D20170821-T031816.raw
2017-08-21 03:18:16
https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T045717.raw
File Name
OOI-D20170821-T045717.raw
2017-08-21 04:57:17
https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T063618.raw
File Name
OOI-D20170821-T063618.raw
2017-08-21 06:36:18
https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T081522.raw


In [13]:
len(desired_raw_file_urls)

16

In [14]:

# Creating new CSV file for storing raw files urls

csv_file_path =  os.path.join(data_folder, 'raw_files.csv')

# Create an empty CSV file
with open(csv_file_path, 'w', newline='') as csvfile:
    # Create a CSV writer object
    csv_writer = csv.writer(csvfile)

    # Write an empty row to the CSV file (optional)
    csv_writer.writerow([])

print(f"Empty CSV file created at: {csv_file_path}")

Empty CSV file created at: /Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/raw_files.csv


In [15]:
pd.DataFrame(desired_raw_file_urls).to_csv(csv_file_path)

In [16]:
desired_raw_file_urls = pd.read_csv(csv_file_path)
desired_raw_file_urls.drop(['Unnamed: 0'], axis =1 , inplace=True)
desired_raw_file_urls = list(desired_raw_file_urls['0'])
desired_raw_file_urls

['https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T000000.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T013914.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T031816.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T045717.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T063618.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T081522.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T095435.raw',
 'https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T113343.raw',
 'https:

In [18]:
%%time

client = Client()

def process_raw_file(raw_file_url, output_dpath):
    try:
        print(f"Processing: {raw_file_url}")
        ed = ep.open_raw(raw_file=raw_file_url, sonar_model='ek60', use_swap=True)
        ed.to_zarr(save_path=data_folder, overwrite=True)
        print(f"Completed processing: {raw_file_url}")
    except Exception as e:
        print(f"Error processing {raw_file_url}: {e}")


# Create Dask delayed objects for processing each raw file
delayed_processing = [dask.delayed(process_raw_file)(raw_file_url, data_folder) for raw_file_url in tqdm(desired_raw_file_urls)]

# Trigger Dask computations
dask.compute(*delayed_processing)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 59701 instead
100%|██████████| 16/16 [00:00<00:00, 10618.49it/s]
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, _ = coding.times.encode_cf_datetime(
  encoded_data, _, 

CPU times: user 4min 25s, sys: 50 s, total: 5min 15s
Wall time: 25min 58s


(None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None)

In [19]:
client.close()


Processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T145147.raw
Error processing https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T145147.raw: 
Processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T031816.raw
Processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T063618.raw
Completed processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T063618.raw
Completed processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/21/OOI-D20170821-T031816.raw
Processing: https://rawdata.oceanobservatories.org/files/CE04OSPS/PC01B/ZPLSCB102_10.33.10.143/2017/08/22/OOI-D20170822-T000000.raw
Processing: https://rawdata.oceanobservatories.or

In [25]:
# output_dpath = Path(data_folder)
from pathlib import Path
data_folder_path = Path(data_folder)  # Convert the string to a Path object

print(data_folder_path)
ed_list = []
for converted_file in sorted(data_folder_path.glob("*.zarr")):
    print(converted_file)
    ed_list.append(ep.open_converted(converted_file))

/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T000000.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T013914.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T031816.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T045717.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T063618.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T095435.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T180952.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-D20170821-T194853.zarr
/Users/ishankvasania/Documents/Projects/Technologies/Python/ooi-workflow/data/OOI-

In [26]:
ed = ep.combine_echodata(ed_list)

In [27]:
ds_Sv = ep.calibrate.compute_Sv(ed).compute()

In [28]:
ds_Sv