### Reading in SeaBASS (.sb) Datafiles

##### Ceridwyn Hunter - 2025/08/04
_____________________________________________________

In [None]:
# Imports

import re
import math
import pandas as pd
import xarray as xr
import holoviews as hv
import geopandas as gpd
from io import StringIO
from pathlib import Path
from holoviews import opts
from datetime import datetime
from holoviews.element.tiles import CartoLight

hv.extension('bokeh')


In [None]:
# ─── 1) DEFINE SB_PARSER ───────────────────────────────────────────────────────

def parse_sb(file_path):
    text = Path(file_path).read_text()
    header, body = text.split('/end_header', 1)

    # 1a) extract header entries
    metadata = {}
    for line in header.splitlines():
        if not line.startswith('/'): 
            continue
        if '=' not in line:    
            continue
        key, val = line[1:].split('=', 1)
        key, val = key.strip(), re.sub(r'\[.*?\]', '', val).strip()
        if key in {
            'station','data_file_name',
            'start_date','end_date',
            'start_time','end_time',
            'fields',
            'north_latitude','south_latitude',
            'west_longitude','east_longitude'
        }:
            metadata[key] = val

    # 1b) parse the /fields list & store it
    fields_list = [f.strip() for f in metadata['fields'].split(',')]
    metadata['fields_list'] = fields_list

    # 1c) read the body into a DataFrame
    df = pd.read_csv(
        StringIO(body.strip()),
        sep=',',
        names=fields_list,
        comment='/'
    )

    # 1d) detect single‐spectrum files & fill lat/lon if missing
    is_spectrum = ('lat' not in df.columns or 'lon' not in df.columns)
    metadata['single_spectrum'] = is_spectrum
    if is_spectrum:
        lat0 = float(metadata.get('north_latitude', metadata.get('south_latitude', 0)))
        lon0 = float(metadata.get('west_longitude',  metadata.get('east_longitude',  0)))
        df['lat'], df['lon'] = lat0, lon0

    # 1e) ensure date/time exist
    if 'date' not in df.columns:
        df['date'] = metadata['start_date']
    if 'time' not in df.columns:
        df['time'] = metadata['start_time']

    # 1f) build datetime column
    df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'])

    # 1g) parse header start/end datetimes
    def _pd(d, t):
        t_clean = re.sub(r'\[.*?\]', '', t)
        dfmt = '%Y-%m-%d' if '-' in d else '%Y%m%d'
        return datetime.strptime(f"{d} {t_clean}", f"{dfmt} %H:%M:%S")
    metadata['start_datetime'] = _pd(metadata['start_date'], metadata['start_time'])
    metadata['end_datetime']   = _pd(metadata['end_date'],   metadata['end_time'])

    # 1h) keep only the five columns you care about
    df = df[['date','time','lat','lon' '','datetime']]

    return metadata, df


In [None]:
# ─── 2.a) COLLECT & PREPARE ALL FILES ──────────────────────────────────────────

root = Path("/home/jovyan/shared-public/pace-hackweek/SeePACE/Hackweek_PACE-PAX_09_22-28")
metadata_list = []
search_types = ['Rrs', 'AOP'] #  Add additional search parameters if desired ***********************

for sb in root.rglob('*.sb'):
    meta, df = parse_sb(sb)

    # collapse spectral files to exactly one row
    if meta['single_spectrum']:
        df = df.head(1)

    meta['data'] = df
    # detect which of your search_types appear in the original fields
    meta['Data_Type'] = [
        t for t in search_types
        if any(t in f for f in meta['fields_list'])
    ]
    metadata_list.append(meta)


In [None]:
# ─── 2.b) CONFIRM PATH/FILES EXIST ──────────────────────────────────────────

# i. Does that directory exist?
print("Exists (T/F) ", root.exists())
print("Is directory (T/F) ", root.is_dir())

# ii. If it does, list the first few entries to check
if root.exists() and root.is_dir():
    for i, p in enumerate(root.iterdir()):
        print("-", p.name)
        if i >= 9:   # only showing the first 10 items
            break
else:
    print("Path not found—check your spelling or mount points.")


In [None]:
# ─── 3) BUILD SUMMARY DATAFRAME ─────────────────────────────────────────────

summary_df = pd.DataFrame([{
    'Station':        m['station'],
    'Data_File_Name': m['data_file_name'],
    'Start_Datetime': m['start_datetime'],
    'End_Datetime':   m['end_datetime'],
    'Data_Type':      m['Data_Type']
} for m in metadata_list])

display(summary_df)


In [None]:
# ─── 4) BUILD & CONCATENATE XR DATASETS ─────────────────────────────────────

ds_list    = []
file_names = []

for m in metadata_list:
    df = m['data'].reset_index(drop=True).reset_index().rename(columns={'index':'record'})
    ds = xr.Dataset(
        {
            'lat':      ('record', df['lat']),
            'lon':      ('record', df['lon']),
            'datetime': ('record', df['datetime'])
        },
        coords={'record': df['record']}
    )
    # promote to a 2-D Dataset along new 'file' dim
    ds = ds.expand_dims(file=[m['data_file_name']])
    ds_list.append(ds)
    file_names.append(m['data_file_name'])

ds_combined = xr.concat(
    ds_list,
    dim='file',
    coords='minimal',
    compat='override'
)

print(ds_combined)

In [None]:
display(metadata_list[30]['data'])

_____________________________________________________
### Initial Plot of SeaBASS Datafiles


In [None]:

import holoviews as hv
from holoviews.element.tiles import CartoLight
from bokeh.models import HoverTool
import math

# Initialize HoloViews for Bokeh
hv.extension('bokeh')

# Helper: convert lon/lat → Web Mercator
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

# Prepare list of HoloViews elements
elements = []

for m in metadata_list:
    name = m['data_file_name']
    df = m['data'].copy()

    # Annotate for hover
    df['File']          = name
    df['Start_str']     = m['start_datetime'].strftime("%Y-%m-%d %H:%M:%S")
    df['End_str']       = m['end_datetime'].strftime("%Y-%m-%d %H:%M:%S")
    df['Data_Type_str'] = ", ".join(m['Data_Type'])

    # Project to Web Mercator
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat)
        for lon, lat in zip(df['lon'], df['lat'])
    ])

    # If multiple points, draw a line
    if len(df) > 1:
        coords = df[['merc_x','merc_y']].values
        line = hv.Path(coords, kdims=['x','y']).opts(
            color='red',
            line_width=2
        )
        elements.append(line)

    # Always draw points
    points = hv.Points(
        df,
        kdims=['merc_x','merc_y'],
        vdims=['File','Start_str','End_str','Data_Type_str']
    ).opts(
        size=6,
        color='blue',
        tools=[
            HoverTool(tooltips=[
                ('File', '@File'),
                ('Start','@Start_str'),
                ('End',  '@End_str'),
                ('Type', '@Data_Type_str')
            ]),
            'wheel_zoom','pan'
        ],
        active_tools=['wheel_zoom']
    )
    elements.append(points)

# Basemap
tiles = CartoLight().opts(
    width=900,
    height=700,
    xaxis=None,
    yaxis=None,
    tools=[],
    active_tools=[]
)

# Overlay everything
overlay = tiles * hv.Overlay(elements)
overlay.opts(title="Trajectories & Points of .sb Files")



In [None]:

import holoviews as hv
from holoviews.element.tiles import CartoLight
from holoviews import opts
from bokeh.models import HoverTool
import math
import pandas as pd
import textwrap

# Initialize HoloViews for Bokeh
hv.extension('bokeh')

# Helper: convert lon/lat → Web Mercator
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

# 1) Build trajectory lines and collect all point records
elements = []
all_points = []
for m in metadata_list:
    name = m['data_file_name']
    df = m['data'].copy()

    # Annotate for grouping
    df['File']          = name
    df['Start_str']     = m['start_datetime'].strftime('%Y-%m-%d %H:%M:%S')
    df['End_str']       = m['end_datetime'].strftime('%Y-%m-%d %H:%M:%S')
    df['Data_Type_str'] = ', '.join(m['Data_Type'])

    # Project coordinates
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat) for lon, lat in zip(df['lon'], df['lat'])
    ])

    # Draw trajectory line if multi-point
    if len(df) > 1:
        coords = df[['merc_x','merc_y']].values
        elements.append(
            hv.Path(coords, kdims=['x','y']).opts(color='red', line_width=2)
        )

    # Add to global points list
    all_points.append(df)

# 2) Concatenate all points and group duplicates by location
full_df = pd.concat(all_points, ignore_index=True)

# Combine unique filenames into a comma-separated string and wrap lines

def wrap_files(vals):
    u = sorted(set(vals))
    joined = ", ".join(u)
    # wrap into lines ≤30 chars, then convert newlines to <br>
    return textwrap.fill(joined, width=100).replace("\n", '<br>')

# Group and combine hover fields
grouped = full_df.groupby(['merc_x','merc_y'], as_index=False).agg({
    'File':          wrap_files,
    'Start_str':     lambda vals: '<br>'.join(sorted(set(vals))),
    'End_str':       lambda vals: '<br>'.join(sorted(set(vals))),
    'Data_Type_str': lambda vals: '<br>'.join(sorted(set(vals)))
})

# 3) Single Points layer with combined hover data
elements.append(
    hv.Points(
        grouped,
        kdims=['merc_x','merc_y'],
        vdims=['File','Start_str','End_str','Data_Type_str']
    )
)

# 4) Unified HoverTool snapping to data with HTML-safe tooltips
hover = HoverTool(
    tooltips="""
    <div style=\"max-width:600px;\">  <!-- constrain width -->
      <strong>File:</strong><br>@File{safe}<br>
      <strong>Start:</strong> @Start_str<br>
      <strong>End:</strong> @End_str<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    point_policy='snap_to_data'
)
# 5) Basemap) Basemap
tiles = CartoLight().opts(
    width=900, height=700,
    xaxis=None, yaxis=None,
    tools=[], active_tools=[]
)

# 6) Overlay trajectories + combined points
overlay = tiles * hv.Overlay(elements)

# 7) Style Points only and hide path legend
overlay = overlay.opts(
    opts.Points(
        size=6,
        color='blue',
        tools=[hover, 'wheel_zoom', 'pan'],
        active_tools=['wheel_zoom']
    ),
    opts.Path(show_legend=False)
)

# 8) Add title and render
overlay = overlay.opts(title='Trajectories & Combined Points of .sb Files')

overlay



In [None]:

import holoviews as hv
import panel as pn
from holoviews.element.tiles import CartoLight
from bokeh.models import HoverTool
import math, pandas as pd, textwrap

# Initialize HoloViews and Panel for Bokeh
hv.extension('bokeh')  
pn.extension()

# Helper: convert lon/lat → Web Mercator
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

# --- assemble full DataFrame of points ---
all_pts = []
for m in metadata_list:
    df = m['data'].copy()
    df['File']          = m['data_file_name']
    df['datetime']      = m['start_datetime']
    df['Data_Type_str'] = ', '.join(m['Data_Type'])
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat) for lon, lat in zip(df['lon'], df['lat'])
    ])
    all_pts.append(df)
full_df = pd.concat(all_pts, ignore_index=True)

# Wrap-and-merge filenames for hover
def wrap_files(vals):
    names = sorted(set(vals))
    joined = ", ".join(names)
    return textwrap.fill(joined, width=30).replace("\n", '<br>')

# Base tiles
# compute global Mercator extent and add padding for fixed view
x_min, x_max = full_df['merc_x'].min(), full_df['merc_x'].max()
y_min, y_max = full_df['merc_y'].min(), full_df['merc_y'].max()
# add 5% padding on each side
pad_x = (x_max - x_min) * 0.05
pad_y = (y_max - y_min) * 0.05

base_tiles = CartoLight().opts(
    width=800, height=600,
    xaxis=None, yaxis=None,
    tools=[], active_tools=[],
    xlim=(x_min - pad_x, x_max + pad_x),  # padded horizontal range
    ylim=(y_min - pad_y, y_max + pad_y)   # padded vertical range
)

# HoverTool
hover = HoverTool(
    tooltips="""
    <div style='max-width:200px;'>
      <strong>Time:</strong> @datetime{%F}<br>
      <strong>File:</strong> @File{safe}<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    formatters={'@datetime':'datetime'},
    point_policy='snap_to_data'
)

# Plotting function
def make_plot(selected_date=None, show_all=False):
    if show_all:
        df = full_df
    else:
        # show only that day's points
        df = full_df[full_df['datetime'].dt.date == pd.to_datetime(selected_date).date()]
    grouped = df.groupby(['merc_x','merc_y'], as_index=False).agg({
        'datetime':       'max',
        'File':           wrap_files,
        'Data_Type_str':  lambda v: '<br>'.join(sorted(set(v)))
    })
    pts = hv.Points(
        grouped,
        kdims=['merc_x','merc_y'],
        vdims=['datetime','File','Data_Type_str']
    ).opts(
        size=8, color='blue', tools=[hover, 'wheel_zoom', 'pan'], active_tools=['wheel_zoom']
    )
    return base_tiles * pts

# Widgets: Date slider + 'Show All' toggle
dates = full_df['datetime'].dt.date
slider = pn.widgets.DateSlider(
    name='Day',
    start=dates.min(),
    end=dates.max(),
    value=dates.min(),
    step=1,
    width=800
)
toggle = pn.widgets.Checkbox(
    name='Show All Days',
    value=False
)

# Callback
@pn.depends(day=slider.param.value, show_all=toggle.param.value)
def update(day, show_all):
    return make_plot(day, show_all)

# Layout
dashboard = pn.Column(
    pn.Row(slider, toggle),
    update
)

dashboard



In [None]:

import holoviews as hv
import panel as pn
from holoviews.element.tiles import CartoLight
from bokeh.models import HoverTool
import math, pandas as pd, textwrap

# Initialize HoloViews and Panel for Bokeh
hv.extension('bokeh')  
pn.extension()

# Helper: convert lon/lat → Web Mercator
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

# --- assemble full DataFrame of points ---
all_pts = []
for m in metadata_list:
    df = m['data'].copy()
    df['File']          = m['data_file_name']
    df['datetime']      = m['start_datetime']
    df['Data_Type_str'] = ', '.join(m['Data_Type'])
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat) for lon, lat in zip(df['lon'], df['lat'])
    ])
    all_pts.append(df)
full_df = pd.concat(all_pts, ignore_index=True)

# Wrap-and-merge filenames for hover
def wrap_files(vals):
    names = sorted(set(vals))
    joined = ", ".join(names)
    return textwrap.fill(joined, width=30).replace("\n", '<br>')

# Base tiles
# compute global Mercator extent and add padding for fixed view
x_min, x_max = full_df['merc_x'].min(), full_df['merc_x'].max()
y_min, y_max = full_df['merc_y'].min(), full_df['merc_y'].max()
# add 5% padding on each side
pad_x = (x_max - x_min) * 0.05
pad_y = (y_max - y_min) * 0.05

base_tiles = CartoLight().opts(
    width=800, height=600,
    xaxis=None, yaxis=None,
    tools=[], active_tools=[],
    xlim=(x_min - pad_x, x_max + pad_x),  # padded horizontal range
    ylim=(y_min - pad_y, y_max + pad_y)   # padded vertical range
)

# HoverTool
hover = HoverTool(
    tooltips="""
    <div style='max-width:200px;'>
      <strong>Time:</strong> @datetime{%F}<br>
      <strong>File:</strong> @File{safe}<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    formatters={'@datetime':'datetime'},
    point_policy='snap_to_data'
)

# Plotting function (with transect lines)
def make_plot(selected_date=None, show_all=False):
    # filter points
    if show_all:
        df = full_df.copy()
    else:
        df = full_df[full_df['datetime'].dt.date == pd.to_datetime(selected_date).date()]

    # Group and merge point duplicates
    grouped = df.groupby(['merc_x','merc_y'], as_index=False).agg({
        'datetime':       'max',
        'File':           wrap_files,
        'Data_Type_str':  lambda v: '<br>'.join(sorted(set(v)))
    })
    pts = hv.Points(
        grouped,
        kdims=['merc_x','merc_y'],
        vdims=['datetime','File','Data_Type_str']
    ).opts(
        size=8, color='blue', tools=[hover, 'wheel_zoom', 'pan'], active_tools=['wheel_zoom']
    )

    # Build transect lines per file
    lines = []
    for fname, sub in df.groupby('File'):
        if len(sub) > 1:
            # sort by datetime
            sub_sorted = sub.sort_values('datetime')
            coords = list(zip(sub_sorted['merc_x'], sub_sorted['merc_y']))
            line = hv.Path([coords], kdims=['x','y']).opts(color='red', line_width=2)
            lines.append(line)
    # Combine all lines
    if lines:
        traj = hv.Overlay({f.name if hasattr(f, 'name') else i: f for i, f in enumerate(lines)})
        return base_tiles * traj * pts
    else:
        return base_tiles * pts

# Widgets: Date slider + 'Show All' toggle + 'Show All' toggle
dates = full_df['datetime'].dt.date
slider = pn.widgets.DateSlider(
    name='Day',
    start=dates.min(),
    end=dates.max(),
    value=dates.min(),
    step=1,
    width=800
)
toggle = pn.widgets.Checkbox(
    name='Show All Days',
    value=False
)

# Callback
@pn.depends(day=slider.param.value, show_all=toggle.param.value)
def update(day, show_all):
    return make_plot(day, show_all)

# Layout
dashboard = pn.Column(
    pn.Row(slider, toggle),
    update
)

dashboard



_______________________________________________________
## Next Step

### Plot the same map with the hover showing a plot when you hover over with the in-situ spectra


#### for those points where there is spectra at identical points, use 1 plot (and plot multiple spectra)