In [35]:
from io import StringIO
from pathlib import Path
import re
import math
import textwrap
from datetime import datetime
import earthaccess
import xarray as xr
import numpy as np
import pandas as pd
import panel as pn
from collections import defaultdict
from tqdm.notebook import tqdm

import functions as fc

import holoviews as hv
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geoviews as gv
from geoviews import Feature
from bokeh.models import HoverTool
from holoviews import opts
hv.extension('bokeh')
gv.extension('bokeh')
pn.extension()

In [36]:
tspan = ("2024-09-22", "2024-09-28")
bbox = (-125., 32., -116., 38.)  # minlon, minlat, maxlon, maxlat
proj = ccrs.PlateCarree()

In [3]:
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP",
    temporal=tspan,
    bounding_box=bbox,
    # cloud_cover=clouds,
)
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/16 [00:00<?, ?it/s]

In [14]:
granules_data = []
for grmeta, path in tqdm(zip(results, paths), desc='Processing Granules'):
    gr_name = grmeta['umm']['GranuleUR']
    gr_time = grmeta['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime']
    polygons = grmeta['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons']
    polygon_coord = [(pt['Longitude'], pt['Latitude']) for pt in polygons[0]['Boundary']['Points']]
    granules_data.append({'time': gr_time, 'granule': gr_name, 'geometry': polygon_coord, 'path': path})

Processing Granules: 0it [00:00, ?it/s]

In [5]:
def get_OCI_PACE_truecolor(time, size=(400, 800), bbox=(-180, -90, 180, 90)):
    import numpy as np
    from owslib.wms import WebMapService
    import lxml.etree as xmltree
    import xml.etree.ElementTree as xmlet
    import requests
    from skimage import io
    """
      time: in format of YYYY-MM-DD
      size: (height, width)
      bbox: bounding box (minlon, minlat, maxlon, maxlat)
    """
    height, width = size
    minlon, minlat, maxlon, maxlat = bbox
    #  Construct Geographic projection URL.
    gibs_url = 'https://gibs.earthdata.nasa.gov/wms/epsg4326/best/wms.cgi?version=1.3.0&service=WMS&request=GetMap&format=image/png&STYLE=default'
    proj4326 = f'{gibs_url}&bbox={int(minlat)},{int(minlon)},{int(maxlat)},{int(maxlon)}&CRS=EPSG:4326&HEIGHT={height}&WIDTH={width}&TIME={time}&layers=OCI_PACE_True_Color'
    
    # Request image.
    img = io.imread(proj4326)
    x = np.linspace(minlon, maxlon, img.shape[1])
    y = np.linspace(minlat, maxlat, img.shape[0])
    img = img[::-1, :]

    return x, y, img

In [6]:
# ─── 1) DEFINE SB_PARSER ───────────────────────────────────────────────────────

def parse_sb(file_path):
    text = Path(file_path).read_text()
    header, body = text.split('/end_header', 1)

    # 1a) extract header entries
    metadata = {}
    for line in header.splitlines():
        if not line.startswith('/'): 
            continue
        if '=' not in line:    
            continue
        key, val = line[1:].split('=', 1)
        key, val = key.strip(), re.sub(r'\[.*?\]', '', val).strip()
        if key in {
            'station','data_file_name',
            'start_date','end_date',
            'start_time','end_time',
            'fields',
            'north_latitude','south_latitude',
            'west_longitude','east_longitude'
        }:
            metadata[key] = val

    # 1b) parse the /fields list & store it
    fields_list = [f.strip() for f in metadata['fields'].split(',')]
    metadata['fields_list'] = fields_list

    # 1c) read the body into a DataFrame
    df = pd.read_csv(
        StringIO(body.strip()),
        sep=',',
        names=fields_list,
        comment='/'
    )

    # 1d) detect single‐spectrum files & fill lat/lon if missing
    is_spectrum = ('lat' not in df.columns or 'lon' not in df.columns)
    metadata['single_spectrum'] = is_spectrum
    if is_spectrum:
        lat0 = float(metadata.get('north_latitude', metadata.get('south_latitude', 0)))
        lon0 = float(metadata.get('west_longitude',  metadata.get('east_longitude',  0)))
        df['lat'], df['lon'] = lat0, lon0

    # 1e) ensure date/time exist
    if 'date' not in df.columns:
        df['date'] = metadata['start_date']
    if 'time' not in df.columns:
        df['time'] = metadata['start_time']

    # 1f) build datetime column
    df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'])

    # 1g) parse header start/end datetimes
    def _pd(d, t):
        t_clean = re.sub(r'\[.*?\]', '', t)
        dfmt = '%Y-%m-%d' if '-' in d else '%Y%m%d'
        return datetime.strptime(f"{d} {t_clean}", f"{dfmt} %H:%M:%S")
    metadata['start_datetime'] = _pd(metadata['start_date'], metadata['start_time'])
    metadata['end_datetime']   = _pd(metadata['end_date'],   metadata['end_time'])

    # 1h) keep only the five columns you care about
    df = df[['date','time','lat','lon' '','datetime']]

    return metadata, df


In [37]:
dates = fc.get_dates(tspan[0], tspan[1], 24)
imgs = {}
for date in tqdm(dates, desc="Fetching true color from NASA WorldView"):
    daystr = date.strftime('%Y-%m-%d')
    x, y, img = get_OCI_PACE_truecolor(daystr, size=(600, 900), bbox=bbox)
    # imgs[daystr] = hv.RGB((x, y, img))
    imgs[daystr] = gv.RGB((x, y, img), crs=proj)

Fetching true color from NASA WorldView:   0%|          | 0/7 [00:00<?, ?it/s]

In [8]:
# ─── 2.a) COLLECT & PREPARE ALL FILES ──────────────────────────────────────────

root = Path("/home/jovyan/shared-public/pace-hackweek/SeePACE/Hackweek_PACE-PAX_09_22-28")
metadata_list = []
search_types = ['Rrs', 'AOP'] #  Add additional search parameters if desired ***********************

for sb in root.rglob('*.sb'):
    meta, df = parse_sb(sb)

    # collapse spectral files to exactly one row
    if meta['single_spectrum']:
        df = df.head(1)

    meta['data'] = df
    # detect which of your search_types appear in the original fields
    meta['Data_Type'] = [
        t for t in search_types
        if any(t in f for f in meta['fields_list'])
    ]
    metadata_list.append(meta)

summary_df = pd.DataFrame([{
    'Station':        m['station'],
    'Data_File_Name': m['data_file_name'],
    'Start_Datetime': m['start_datetime'],
    'End_Datetime':   m['end_datetime'],
    'Data_Type':      m['Data_Type']
} for m in metadata_list])

display(summary_df)

Unnamed: 0,Station,Data_File_Name,Start_Datetime,End_Datetime,Data_Type
0,30,PACE-PAX_Shearwater_2024_GER_St_30.sb,2024-09-22 19:25:46,2024-09-22 19:25:46,[Rrs]
1,31,PACE-PAX_Shearwater_2024_GER_St_31.sb,2024-09-22 20:34:41,2024-09-22 20:34:41,[Rrs]
2,32,PACE-PAX_Shearwater_2024_GER_St_32.sb,2024-09-23 18:05:43,2024-09-23 18:05:43,[Rrs]
3,33,PACE-PAX_Shearwater_2024_GER_St_33.sb,2024-09-23 20:08:33,2024-09-23 20:08:33,[Rrs]
4,34,PACE-PAX_Shearwater_2024_GER_St_34.sb,2024-09-25 19:36:20,2024-09-25 19:36:20,[Rrs]
...,...,...,...,...,...
67,36,PVST_POL_PACE-PAX_Shearwater_above_water_radio...,2024-09-25 21:56:00,2024-09-25 21:58:00,[Rrs]
68,37,PVST_POL_PACE-PAX_Shearwater_above_water_radio...,2024-09-25 23:07:00,2024-09-25 23:09:00,[Rrs]
69,38,PVST_POL_PACE-PAX_Shearwater_above_water_radio...,2024-09-26 17:06:00,2024-09-26 17:08:00,[Rrs]
70,39,PVST_POL_PACE-PAX_Shearwater_above_water_radio...,2024-09-26 19:37:00,2024-09-26 19:39:00,[Rrs]


In [9]:
# ─── 4) BUILD & CONCATENATE XR DATASETS ─────────────────────────────────────

ds_list    = []
file_names = []

for m in metadata_list:
    df = m['data'].reset_index(drop=True).reset_index().rename(columns={'index':'record'})
    ds = xr.Dataset(
        {
            'lat':      ('record', df['lat']),
            'lon':      ('record', df['lon']),
            'datetime': ('record', df['datetime'])
        },
        coords={'record': df['record']}
    )
    # promote to a 2-D Dataset along new 'file' dim
    ds = ds.expand_dims(file=[m['data_file_name']])
    ds_list.append(ds)
    file_names.append(m['data_file_name'])

ds_combined = xr.concat(
    ds_list,
    dim='file',
    coords='minimal',
    compat='override'
)

print(ds_combined)

<xarray.Dataset> Size: 18kB
Dimensions:   (file: 72, record: 10)
Coordinates:
  * record    (record) int64 80B 0 1 2 3 4 5 6 7 8 9
  * file      (file) object 576B 'PACE-PAX_Shearwater_2024_GER_St_30.sb' ... ...
Data variables:
    lat       (file, record) float64 6kB 33.68 nan nan nan ... nan nan nan nan
    lon       (file, record) float64 6kB -119.6 nan nan nan ... nan nan nan nan
    datetime  (file, record) datetime64[ns] 6kB 2024-09-22T19:25:46 NaT ... NaT


In [12]:
# --- assemble full DataFrame of points ---
all_pts = []
for m in metadata_list:
    df = m['data'].copy()
    df['File']          = m['data_file_name']
    df['datetime']      = m['start_datetime']
    df['Data_Type_str'] = ', '.join(m['Data_Type'])
    all_pts.append(df)
full_df = pd.concat(all_pts, ignore_index=True)

In [52]:
for g in granules_by_date.get('2024-09-22', []):
    geom = g['geometry']
    xs, ys = zip(*geom)
    print(list(xs), list(ys))

[-94.19838, -125.59848, -118.16396, -92.49064, -94.19838] [44.21503, 38.4507, 21.14498, 26.36114, 44.21503]
[-118.81709, -150.15126, -142.72395, -117.06376, -118.81709] [44.17486, 38.42171, 21.11386, 26.32861, 44.17486]


In [66]:
granules_by_date = defaultdict(list)
for g in granules_data:
    date = g['time'][:10]  # 'YYYY-MM-DD'
    granules_by_date[date].append(g)

# Sort the available days
available_days = sorted(set(imgs.keys()) | set(granules_by_date.keys()))

# Date slider
day_slider = pn.widgets.DiscreteSlider(name="Date", options=available_days)

# Wrap-and-merge filenames for hover
def wrap_files(vals):
    if len(vals)>1:
        names = sorted(set(vals))
        joined = ", ".join(names)
        text = textwrap.fill(joined, width=30).replace("\n", '<br>')
    else:
        text = textwrap.fill(str(vals[0]), width=30).replace("\n", '<br>')
    return text

# Function to generate a polygon from selected time
def make_granule_polygon(granules):
    poly_data = []
    for g in granules:
        xs, ys = zip(*g['geometry'])
        poly_data.append({
            # ('x', 'y'): g['geometry'],
            'xs': list(xs),
            'ys': list(ys),
            'granule': wrap_files([g['granule']]),
            'time': g['time']
        })
    return gv.Polygons(poly_data, kdims=['xs', 'ys'], vdims=['granule', 'time'], crs=proj).opts(
        fill_alpha=0.3,
        fill_color='pink',
        line_color='red',
        tools=[granule_hover]
    )

def make_insitu_points(df):
    grouped = df.groupby(['lon','lat'], as_index=False).agg({
        'datetime':       'max',
        'File':           wrap_files,
        'Data_Type_str':  lambda v: '<br>'.join(sorted(set(v)))
    })
    return gv.Points(
        grouped,
        kdims=['lon', 'lat'],
        vdims=['datetime', 'File', 'Data_Type_str'],
        crs=proj
    ).opts(
        size=8, color='blue', tools=[hover, 'wheel_zoom', 'pan'], active_tools=['wheel_zoom']
    )

def make_transects_lines(df):
    # Build transect lines per file
    lines = []
    for fname, sub in df.groupby('File'):
        if len(sub) > 1:
            # sort by datetime
            sub_sorted = sub.sort_values('datetime')
            coords = list(zip(sub_sorted['lon'], sub_sorted['lat']))
            line = gv.Path([coords], kdims=['lon','lat'], crs=proj).opts(color='red', line_width=2)
            lines.append(line)
    # Combine all lines
    if lines:
        return gv.Overlay(lines)
        # {f.name if hasattr(f, 'name') else i: f for i, f in enumerate(lines)}
    else:
        # Return an empty element if no line exists
        return gv.Path([])

granule_hover = HoverTool(
    tooltips="""
    <div style='max-width:300px;'>
      <strong>Granule:</strong> @granule<br>
      <strong>Time:</strong> @time
    </div>
    """,
    point_policy='follow_mouse'
)

hover = HoverTool(
    tooltips="""
    <div style='max-width:200px;'>
      <strong>Time:</strong> @datetime{%F}<br>
      <strong>File:</strong> @File{safe}<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    formatters={'@datetime':'datetime'},
    point_policy='snap_to_data'
)

def make_plot(selected_day=None, show_all=None):
    base = imgs.get(selected_day)
    if base is None:
        return gv.Text(0, 0, f"No image for {selected_day}").opts(width=600, height=500)

    if show_all:
        granules = sum(granules_by_date.values(), [])
        df = full_df.copy()
    else:
        granules = granules_by_date.get(selected_day, [])
        df = full_df[full_df['datetime'].dt.date == pd.to_datetime(selected_day).date()]
        
    polygons = make_granule_polygon(granules)
    points = make_insitu_points(df)
    transects = make_transects_lines(df)

    if not polygons:
        return gv.Text(0, 0, "No granules available").opts(width=600, height=500)
        
    n_points = len(df)
    coast = gf.coastline.opts(line_color='white', line_width=1.0)
    # elements = polygons + points + transects
    # overlay = base * hv.Overlay(elements)
    # overlay = base * polygons #* points * transects
    overlay = base
    return overlay.opts(
        ylim=(bbox[1], bbox[3]),
        xlim=(bbox[0], bbox[2]),
        width=x.size,
        height=y.size,
        xlabel='Longitude',
        ylabel='Latitude',
        framewise=False,
        title=f"Granules on {selected_day} / {n_points} in-situ points"
    )

toggle = pn.widgets.Checkbox(
    name='Show All Days',
    value=False
)

# Bind to panel
dynamic_map = pn.bind(make_plot, selected_day=day_slider, show_all=toggle)

# Layout
app = pn.Column(
    "# Daily Granules and In-Situ Viewer",
    pn.Row(day_slider, toggle),
    dynamic_map
)

app.servable()