In [30]:
from io import StringIO
from shapely.geometry import Point, shape
from pathlib import Path
import re
import math
import textwrap
from datetime import datetime
import earthaccess
import xarray as xr
import numpy as np
import pandas as pd
import panel as pn
from collections import defaultdict
from tqdm.notebook import tqdm
import cartopy.crs as ccrs

import functions as fc

import holoviews as hv
from bokeh.models import HoverTool, MercatorTickFormatter
from holoviews import opts, streams
from holoviews.element.tiles import EsriImagery, OSM, CartoLight
hv.extension('bokeh')
pn.extension()

In [2]:
tspan = ("2024-09-22", "2024-09-28")
bbox = (-125., 32., -116., 38.)
areasize = (600, 900)
height, width = areasize
proj = ccrs.PlateCarree()

In [3]:
results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP",
    temporal=tspan,
    bounding_box=bbox,
    # cloud_cover=clouds,
)
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/16 [00:00<?, ?it/s]

In [4]:
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

def bbox_to_mercator(bbox):
    minlon, minlat, maxlon, maxlat = bbox
    minmerc_x, minmerc_y = lonlat_to_mercator(minlon, minlat)
    maxmerc_x, maxmerc_y = lonlat_to_mercator(maxlon, maxlat)
    return minmerc_x, minmerc_y, maxmerc_x, maxmerc_y
    
def get_OCI_PACE_truecolor(time, size=(400, 800), bbox=(-180, -90, 180, 90)):
    import numpy as np
    from owslib.wms import WebMapService
    import lxml.etree as xmltree
    import xml.etree.ElementTree as xmlet
    import requests
    from skimage import io
    """
      time: in format of YYYY-MM-DD
      size: (height, width)
      bbox: bounding box (minlon, minlat, maxlon, maxlat)
    """
    height, width = size
    minlon, minlat, maxlon, maxlat = bbox
    #  Construct Geographic projection URL.
    gibs_url = 'https://gibs.earthdata.nasa.gov/wms/epsg4326/best/wms.cgi?version=1.3.0&service=WMS&request=GetMap&format=image/png&STYLE=default'
    proj4326 = f'{gibs_url}&bbox={int(minlat)},{int(minlon)},{int(maxlat)},{int(maxlon)}&CRS=EPSG:4326&HEIGHT={height}&WIDTH={width}&TIME={time}&layers=OCI_PACE_True_Color'
    
    # Request image.
    img = io.imread(proj4326)
    minmerc_x, minmerc_y, maxmerc_x, maxmerc_y = bbox_to_mercator(bbox)
    x = np.linspace(minmerc_x, maxmerc_x, img.shape[1])
    y = np.linspace(minmerc_y, maxmerc_y, img.shape[0])
    img = img[::-1, :]

    return x, y, img

In [5]:
granules_data = []
for i, grmeta in enumerate(tqdm(results, desc='Processing Granules')):
    gr_name = grmeta['umm']['GranuleUR']
    gr_time = grmeta['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime']
    polygons = grmeta['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons']
    polygon_coord = [(lonlat_to_mercator(pt['Longitude'], pt['Latitude'])) for pt in polygons[0]['Boundary']['Points']]
    granules_data.append({'time': gr_time, 'granule_index': i, 'granule': gr_name, 'geometry': polygon_coord})

Processing Granules:   0%|          | 0/16 [00:00<?, ?it/s]

In [6]:
# ─── 1) DEFINE SB_PARSER ───────────────────────────────────────────────────────

def parse_sb(file_path):
    text = Path(file_path).read_text()
    header, body = text.split('/end_header', 1)

    # 1a) extract header entries
    metadata = {}
    for line in header.splitlines():
        if not line.startswith('/'): 
            continue
        if '=' not in line:    
            continue
        key, val = line[1:].split('=', 1)
        key, val = key.strip(), re.sub(r'\[.*?\]', '', val).strip()
        if key in {
            'station','data_file_name',
            'start_date','end_date',
            'start_time','end_time',
            'fields',
            'north_latitude','south_latitude',
            'west_longitude','east_longitude'
        }:
            metadata[key] = val

    # 1b) parse the /fields list & store it
    fields_list = [f.strip() for f in metadata['fields'].split(',')]
    metadata['fields_list'] = fields_list

    # 1c) read the body into a DataFrame
    df = pd.read_csv(
        StringIO(body.strip()),
        sep=',',
        names=fields_list,
        comment='/'
    )

    # 1d) detect single‐spectrum files & fill lat/lon if missing
    is_spectrum = ('lat' not in df.columns or 'lon' not in df.columns)
    metadata['single_spectrum'] = is_spectrum
    if is_spectrum:
        lat0 = float(metadata.get('north_latitude', metadata.get('south_latitude', 0)))
        lon0 = float(metadata.get('west_longitude',  metadata.get('east_longitude',  0)))
        df['lat'], df['lon'] = lat0, lon0

    # 1e) ensure date/time exist
    if 'date' not in df.columns:
        df['date'] = metadata['start_date']
    if 'time' not in df.columns:
        df['time'] = metadata['start_time']

    # 1f) build datetime column
    df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'])

    # 1g) parse header start/end datetimes
    def _pd(d, t):
        t_clean = re.sub(r'\[.*?\]', '', t)
        dfmt = '%Y-%m-%d' if '-' in d else '%Y%m%d'
        return datetime.strptime(f"{d} {t_clean}", f"{dfmt} %H:%M:%S")
    metadata['start_datetime'] = _pd(metadata['start_date'], metadata['start_time'])
    metadata['end_datetime']   = _pd(metadata['end_date'],   metadata['end_time'])

    # 1h) keep only the five columns you care about
    df = df[['date','time','lat','lon' '','datetime']]

    return metadata, df


In [8]:
dates = fc.get_dates(tspan[0], tspan[1], 24)
imgs = {}
for date in tqdm(dates, desc="Fetching true color from NASA WorldView"):
    daystr = date.strftime('%Y-%m-%d')
    x, y, img = get_OCI_PACE_truecolor(daystr, size=areasize, bbox=bbox)
    imgs[daystr] = hv.RGB((x, y, img))

Fetching true color from NASA WorldView:   0%|          | 0/7 [00:00<?, ?it/s]

In [9]:
# ─── 2.a) COLLECT & PREPARE ALL FILES ──────────────────────────────────────────

root = Path("/home/jovyan/shared-public/pace-hackweek/SeePACE/Hackweek_PACE-PAX_Rrs")
metadata_list = []
search_types = ['Rrs', 'AOP'] #  Add additional search parameters if desired ***********************

for sb in root.rglob('*.sb'):
    meta, df = parse_sb(sb)

    # collapse spectral files to exactly one row
    if meta['single_spectrum']:
        df = df.head(1)

    meta['data'] = df
    # detect which of your search_types appear in the original fields
    meta['Data_Type'] = [
        t for t in search_types
        if any(t in f for f in meta['fields_list'])
    ]
    metadata_list.append(meta)

summary_df = pd.DataFrame([{
    'Station':        m['station'],
    'Data_File_Name': m['data_file_name'],
    'Start_Datetime': m['start_datetime'],
    'End_Datetime':   m['end_datetime'],
    'Data_Type':      m['Data_Type']
} for m in metadata_list])

display(summary_df)

Unnamed: 0,Station,Data_File_Name,Start_Datetime,End_Datetime,Data_Type
0,30,PACE-PAX_Shearwater_2024_GER_St_30.sb,2024-09-22 19:25:46,2024-09-22 19:25:46,[Rrs]
1,31,PACE-PAX_Shearwater_2024_GER_St_31.sb,2024-09-22 20:34:41,2024-09-22 20:34:41,[Rrs]
2,32,PACE-PAX_Shearwater_2024_GER_St_32.sb,2024-09-23 18:05:43,2024-09-23 18:05:43,[Rrs]
3,33,PACE-PAX_Shearwater_2024_GER_St_33.sb,2024-09-23 20:08:33,2024-09-23 20:08:33,[Rrs]
4,34,PACE-PAX_Shearwater_2024_GER_St_34.sb,2024-09-25 19:36:20,2024-09-25 19:36:20,[Rrs]
...,...,...,...,...,...
274,,PVST_SBCR_04_20240920_194506_C-OPS_Rrs_Lu0_Es_...,2024-09-20 19:45:01,2024-09-20 19:45:01,[Rrs]
275,,PVST_SBCR_04_20240920_195005_C-OPS_Rrs_Lu0_Es_...,2024-09-20 19:50:02,2024-09-20 19:50:02,[Rrs]
276,,PVST_SBCR_04_20240920_202300_C-OPS_Rrs_Lu0_Es_...,2024-09-20 20:22:53,2024-09-20 20:22:53,[Rrs]
277,,PVST_SBCR_04_20240920_202844_C-OPS_Rrs_Lu0_Es_...,2024-09-20 20:28:43,2024-09-20 20:28:43,[Rrs]


In [10]:
# ─── 4) BUILD & CONCATENATE XR DATASETS ─────────────────────────────────────

ds_list    = []
file_names = []

for m in metadata_list:
    df = m['data'].reset_index(drop=True).reset_index().rename(columns={'index':'record'})
    ds = xr.Dataset(
        {
            'lat':      ('record', df['lat']),
            'lon':      ('record', df['lon']),
            'datetime': ('record', df['datetime'])
        },
        coords={'record': df['record']}
    )
    # promote to a 2-D Dataset along new 'file' dim
    ds = ds.expand_dims(file=[m['data_file_name']])
    ds_list.append(ds)
    file_names.append(m['data_file_name'])

ds_combined = xr.concat(
    ds_list,
    dim='file',
    coords='minimal',
    compat='override'
)

print(ds_combined)

# --- assemble full DataFrame of points ---
all_pts = []
for m in metadata_list:
    df = m['data'].copy()
    df['File']          = m['data_file_name']
    df['datetime']      = m['start_datetime']
    df['Data_Type_str'] = ', '.join(m['Data_Type'])
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat) for lon, lat in zip(df['lon'], df['lat'])
    ])
    all_pts.append(df)
full_df = pd.concat(all_pts, ignore_index=True)

<xarray.Dataset> Size: 76kB
Dimensions:   (file: 279, record: 11)
Coordinates:
  * record    (record) int64 88B 0 1 2 3 4 5 6 7 8 9 10
  * file      (file) object 2kB 'PACE-PAX_Shearwater_2024_GER_St_30.sb' ... '...
Data variables:
    lat       (file, record) float64 25kB 33.68 nan nan nan ... nan nan nan nan
    lon       (file, record) float64 25kB -119.6 nan nan nan ... nan nan nan nan
    datetime  (file, record) datetime64[ns] 25kB 2024-09-22T19:25:46 NaT ... NaT


In [16]:
granules_by_date = defaultdict(list)
for g in granules_data:
    date = g['time'][:10]  # 'YYYY-MM-DD'
    granules_by_date[date].append(g)

# Sort the available days
available_days = sorted(set(imgs.keys()) | set(granules_by_date.keys()))

minmerc_x, minmerc_y, maxmerc_x, maxmerc_y = bbox_to_mercator(bbox)
x_range = (minmerc_x, maxmerc_x)
y_range = (minmerc_y, maxmerc_y)

In [47]:
# Wrap-and-merge filenames for hover
def wrap_files(vals):
    if len(vals)>1:
        names = sorted(set(vals))
        joined = ", ".join(names)
        text = textwrap.fill(joined, width=30).replace("\n", '<br>')
    else:
        text = textwrap.fill(str(vals[:]), width=30).replace("\n", '<br>')
    return text

# Function to generate a polygon from selected time
def make_granule_polygon(granules):
    poly_data = []
    for g in granules:
        poly_data.append({
            ('x', 'y'): g['geometry'],
            'granule': wrap_files([g['granule']]),
            'time': g['time']
        })
    return hv.Polygons(poly_data, vdims=['granule', 'time']).opts(
        fill_alpha=0.3,
        fill_color='pink',
        line_color='red',
        tools=[granule_hover]
    )

def make_insitu_points(df):
    # global selection
    grouped = df.groupby(['merc_x','merc_y'], as_index=False).agg({
        'datetime':       'max',
        'File':           wrap_files,
        'Data_Type_str':  lambda v: '<br>'.join(sorted(set(v)))
    })
    points = hv.Points(
        grouped,
        kdims=['merc_x','merc_y'],
        vdims=['datetime','File','Data_Type_str']
    ).opts(
        size=8, color='blue', tools=[hover, 'wheel_zoom', 'pan'], active_tools=['wheel_zoom']
    )
    # Attach or update Selection1D stream here
    # selection = streams.Selection1D(source=points)
    return points

def make_transects_lines(df):
    # Build transect lines per file
    lines = []
    for fname, sub in df.groupby('File'):
        if len(sub) > 1:
            # sort by datetime
            sub_sorted = sub.sort_values('datetime')
            coords = list(zip(sub_sorted['merc_x'], sub_sorted['merc_y']))
            line = hv.Path([coords], kdims=['x','y']).opts(color='red', line_width=2)
            lines.append(line)
    # Combine all lines
    if lines:
        return hv.Overlay(lines)
        # {f.name if hasattr(f, 'name') else i: f for i, f in enumerate(lines)}
    else:
        # Return an empty element if no line exists
        return hv.Path([])

granule_hover = HoverTool(
    tooltips="""
    <div style='max-width:300px;'>
      <strong>Granule:</strong> @granule<br>
      <strong>Time:</strong> @time
    </div>
    """,
    point_policy='follow_mouse'
)

hover = HoverTool(
    tooltips="""
    <div style='max-width:200px;'>
      <strong>Time:</strong> @datetime{%F}<br>
      <strong>File:</strong> @File{safe}<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    formatters={'@datetime':'datetime'},
    point_policy='snap_to_data'
)

def find_granules_for_point(point, granules):
    pt = Point(point)
    gridx = []
    grfile = []
    # result = {}
    for g in granules:
        geom = shape(g['geometry'])
        print(geom)
        if geom.contains(pt):
            print('find granules')
            gridx.append(g['granule_index'])
            grfile.append(g['granule'])
            # result.append(g['granule'])
    result = {'granule_index': gridx, 'granule': grfile}
    return result

def make_plot(selected_day=None, show_alldays=False, show_worldview=False, show_granules=False, show_esri=False, opacity=0.5):
    truecolor = imgs.get(selected_day)
    if truecolor is None:
        return hv.Text(0, 0, f"No image for {selected_day}").opts(width=600, height=500)

    if show_alldays:        
        granules = sum(granules_by_date.values(), [])
        df = full_df.copy()
    else:
        granules = granules_by_date.get(selected_day, [])
        df = full_df[full_df['datetime'].dt.date == pd.to_datetime(selected_day).date()]
        
    polygons = [make_granule_polygon(granules)]
    points_obj = make_insitu_points(df)
    # latest_points['points'] = points_obj  # store for selection use
    # points = [points_obj]
    points = [make_insitu_points(df)]
    transects = [make_transects_lines(df)]
    selection = streams.Selection1D(source=lambda: latest_points['points'])

    # Create Carto
    carto = CartoLight().opts(
        width=width, height=height,
        xaxis='bottom', yaxis='left',
        xformatter=MercatorTickFormatter(),
        yformatter=MercatorTickFormatter(),
        xlim=x_range, ylim=y_range,
        xlabel='Longitude',
        ylabel='Latitude',
    )
    # true-color world imagery
    imagery = EsriImagery().opts(
        width=width, height=height,
        xaxis='bottom', yaxis='left',
        xformatter=MercatorTickFormatter(),
        yformatter=MercatorTickFormatter(),
        xlim=x_range,
        ylim=y_range,
        xlabel='Longitude',
        ylabel='Latitude',
    )
    
    if not polygons:
        return hv.Text(0, 0, "No granules available").opts(width=600, height=500)
        
    n_points = len(df)
    elements = points + transects
    
    # Choose base
    overlay = imagery if show_esri else carto

    if show_worldview:
        overlay = overlay * truecolor.opts(alpha=opacity)
    if show_granules:
        overlay = overlay * hv.Overlay(polygons)
    overlay = overlay * hv.Overlay(elements)

    # return overlay
    return overlay.opts(
        ylim=y_range,
        xlim=x_range,
        width=x.size,
        height=y.size,
        xlabel='Longitude',
        ylabel='Latitude',
        framewise=False,
        title=f"Granules on {selected_day} / {n_points} in-situ points"
    ), points[0], granules

# Metadata display pane
insitu_pane = pn.pane.DataFrame(
    pd.DataFrame(), name='Selected In-situ'
)
granule_pane = pn.pane.DataFrame(
    pd.DataFrame(), name='Available Granules'
)

# Tap callback
selected_metadata = None
avail_granules = None

def tap_callback(x, y):
    global selected_metadata, current_granules
    # find nearest point
    df = full_df.copy()
    # compute distances in mercator space
    df['dist'] = ((df['merc_x'] - x)**2 + (df['merc_y'] - y)**2)
    # find the minimum distance
    min_dist = df['dist'].min()
    # select all rows whose dist equals that minimum
    nearest_df = df[df['dist'] == min_dist]
    # build your selected‐metadata table from _all_ of them
    sel_df = nearest_df[['File','datetime','Data_Type_str']].rename(
        columns={'datetime':'Datetime','Data_Type_str':'Type'}
    ).reset_index(drop=True)
    results_dict = find_granules_for_point((x, y), current_granules)
    gr_df = pd.DataFrame(results_dict)

    selected_metadata = sel_df
    insitu_pane.object = sel_df

    avail_granules = gr_df
    granule_pane.object = avail_granules

tap = hv.streams.Tap(x=None, y=None)
tap.add_subscriber(tap_callback)

# Define toggles
alldays_toggle = pn.widgets.Checkbox(
    name='Show All Days',
    value=False
)
granules_toggle = pn.widgets.Checkbox(
    name='Show Granules',
    value=False
)
truecolor_toggle = pn.widgets.Checkbox(
    name='Show WorldView',
    value=False
)
toggle_esri = pn.widgets.Checkbox(name='Use Esri Imagery', value=False)

# Sliders
day_slider = pn.widgets.DiscreteSlider(name="Date", options=available_days)
alpha_slider = pn.widgets.FloatSlider(name='Overlay Opacity', start=0.0, end=1.0, step=0.05, value=1.0)

current_granules = []
# Bind to panel
@pn.depends(
    selected_day=day_slider.param.value, 
    show_alldays=alldays_toggle.param.value,
    show_worldview=truecolor_toggle.param.value,
    show_granules=granules_toggle.param.value,
    show_esri=toggle_esri.param.value,
    opacity=alpha_slider.param.value,
)
def update(selected_day, show_alldays, show_worldview, show_granules, show_esri, opacity):
    global current_granules
    plot, pts, grs = make_plot(selected_day, show_alldays, show_worldview, show_granules, show_esri, opacity)
    current_granules = grs
    tap.source = pts
    return plot

# Layout
app = pn.Column(
    "# Daily Granules and In-Situ Viewer",
    pn.Row(day_slider, alpha_slider),
    pn.Row(alldays_toggle, truecolor_toggle, granules_toggle, toggle_esri),
    update,
    pn.Spacer(height=20),
    pn.Row(insitu_pane),
    pn.Row(granule_pane),
)

app.servable()