In [1]:
from io import StringIO
import builtins
from shapely.geometry import Point, Polygon
from pathlib import Path
import re
import math
import textwrap
from datetime import datetime, timedelta
import earthaccess
import xarray as xr
import numpy as np
import pandas as pd
import panel as pn
from collections import defaultdict
from tqdm.notebook import tqdm
import cartopy.crs as ccrs

import SeaPACE_functions as fc

import holoviews as hv
from bokeh.models import HoverTool, MercatorTickFormatter
from holoviews import opts, streams
from holoviews.element.tiles import EsriImagery, OSM, CartoLight
hv.extension('bokeh')
pn.extension()

In [2]:
tspan = ("2024-09-22", "2024-09-28")
bbox = (-125., 32., -116., 38.)
areasize = (600, 900)
height, width = areasize
proj = ccrs.PlateCarree()

In [3]:
search_results = earthaccess.search_data(
    short_name="PACE_OCI_L2_AOP",
    temporal=tspan,
    bounding_box=bbox,
)

In [4]:
def lonlat_to_mercator(lon, lat):
    k = 6378137.0
    x = lon * (math.pi/180.0) * k
    y = math.log(math.tan((90 + lat) * math.pi/360.0)) * k
    return x, y

def mercator_to_lonlat(merc_x, merc_y):
    k = 6378137.0
    lon_deg = merc_x / (math.pi * k) * 180.0
    lat_rad = math.atan(math.exp(merc_y / k)) * 2 - math.pi / 2
    lat_deg = math.degrees(lat_rad)
    return lon_deg, lat_deg

def bbox_to_mercator(bbox):
    minlon, minlat, maxlon, maxlat = bbox
    minmerc_x, minmerc_y = lonlat_to_mercator(minlon, minlat)
    maxmerc_x, maxmerc_y = lonlat_to_mercator(maxlon, maxlat)
    return minmerc_x, minmerc_y, maxmerc_x, maxmerc_y
    
def get_OCI_PACE_truecolor(time, size=(400, 800), bbox=(-180, -90, 180, 90)):
    import numpy as np
    from owslib.wms import WebMapService
    import lxml.etree as xmltree
    import xml.etree.ElementTree as xmlet
    import requests
    from skimage import io
    """
      time: in format of YYYY-MM-DD
      size: (height, width)
      bbox: bounding box (minlon, minlat, maxlon, maxlat)
    """
    height, width = size
    minlon, minlat, maxlon, maxlat = bbox
    #  Construct Geographic projection URL.
    gibs_url = 'https://gibs.earthdata.nasa.gov/wms/epsg4326/best/wms.cgi?version=1.3.0&service=WMS&request=GetMap&format=image/png&STYLE=default'
    proj4326 = f'{gibs_url}&bbox={int(minlat)},{int(minlon)},{int(maxlat)},{int(maxlon)}&CRS=EPSG:4326&HEIGHT={height}&WIDTH={width}&TIME={time}&layers=OCI_PACE_True_Color'
    
    # Request image.
    img = io.imread(proj4326)
    minmerc_x, minmerc_y, maxmerc_x, maxmerc_y = bbox_to_mercator(bbox)
    x = np.linspace(minmerc_x, maxmerc_x, img.shape[1])
    y = np.linspace(minmerc_y, maxmerc_y, img.shape[0])
    img = img[::-1, :]

    return x, y, img

# ─── 1) DEFINE SB_PARSER ───────────────────────────────────────────────────────
def parse_sb(file_path):
    text = Path(file_path).read_text()
    header, body = text.split('/end_header', 1)

    # 1a) extract header entries
    metadata = {}
    for line in header.splitlines():
        if not line.startswith('/'): 
            continue
        if '=' not in line:    
            continue
        key, val = line[1:].split('=', 1)
        key, val = key.strip(), re.sub(r'\[.*?\]', '', val).strip()
        if key in {
            'station','data_file_name',
            'start_date','end_date',
            'start_time','end_time',
            'fields',
            'north_latitude','south_latitude',
            'west_longitude','east_longitude'
        }:
            metadata[key] = val

    # 1b) parse the /fields list & store it
    fields_list = [f.strip() for f in metadata['fields'].split(',')]
    metadata['fields_list'] = fields_list

    # 1c) read the body into a DataFrame
    df = pd.read_csv(
        StringIO(body.strip()),
        sep=',',
        names=fields_list,
        comment='/'
    )

    # 1d) detect single‐spectrum files & fill lat/lon if missing
    is_spectrum = ('lat' not in df.columns or 'lon' not in df.columns)
    metadata['single_spectrum'] = is_spectrum
    if is_spectrum:
        lat0 = float(metadata.get('north_latitude', metadata.get('south_latitude', 0)))
        lon0 = float(metadata.get('west_longitude',  metadata.get('east_longitude',  0)))
        df['lat'], df['lon'] = lat0, lon0

    # 1e) ensure date/time exist
    if 'date' not in df.columns:
        df['date'] = metadata['start_date']
    if 'time' not in df.columns:
        df['time'] = metadata['start_time']

    # 1f) build datetime column
    df['datetime'] = pd.to_datetime(df['date'].astype(str) + ' ' + df['time'])

    # 1g) parse header start/end datetimes
    def _pd(d, t):
        t_clean = re.sub(r'\[.*?\]', '', t)
        dfmt = '%Y-%m-%d' if '-' in d else '%Y%m%d'
        return datetime.strptime(f"{d} {t_clean}", f"{dfmt} %H:%M:%S")
    metadata['start_datetime'] = _pd(metadata['start_date'], metadata['start_time'])
    metadata['end_datetime']   = _pd(metadata['end_date'],   metadata['end_time'])

    # 1h) keep only the five columns you care about
    df = df[['date','time','lat','lon' '','datetime']]

    return metadata, df

def read_sb(filename_sb):
    """Read SeaBASS .sb file, parse header and data, 
       then attach profile_lat, profile_lon, profile_time."""
    # 1) Load all lines
    with builtins.open(filename_sb, "r") as f:
        lines = [l.rstrip("\n") for l in f]

    # 2) Find where the header ends
    endh = next(i for i, L in enumerate(lines) if L == "/end_header")

    # 3) Parse header into a dict, but only lines with "/" **and** "="
    headers = {}
    for line in lines[:endh]:
        if not line.startswith("/") or "=" not in line:
            continue
        key, val = line[1:].split("=", 1)  # strip "/" then split
        headers[key] = val

    # 4) Read the CSV portion into a DataFrame
    df = pd.read_csv(
        filename_sb,
        skiprows=endh + 1,
        names=headers["fields"].split(","),
        na_values=headers.get("missing", "")
    )

    # 5) Build the datetime index (your existing routine)
    get_sb_datetime(df)

    # 6) Extract & clean metadata from headers
    #    Strip off any "[...]" before converting to float
    lat_str = headers["north_latitude"].split("[", 1)[0]
    lon_str = headers["east_longitude"].split("[", 1)[0]
    lat = float(lat_str)
    lon = float(lon_str)

    #    Strip "[GMT]" from the time field
    time_str = headers["start_time"].split("[", 1)[0]
    dt0 = pd.to_datetime(headers["start_date"] + " " + time_str)

    # 7) Attach them as new columns on every row
    df["profile_lat"]  = lat
    df["profile_lon"]  = lon
    df["profile_time"] = dt0

    return df

def get_sb_datetime(df):
    """Parse datetime from different combinations of dates and times."""
    if all(col in df.columns for col in ["year", "month", "day",
                                         "hour", "minute", "second"]):
        df["datetime"] = pd.to_datetime(df[["year", "month", "day",
                                            "hour", "minute", "second"]])
    elif all(col in df.columns for col in ["year", "month", "day", "time"]):
        df["datetime"] = pd.to_datetime(
            df["year"].astype(str) + df["month"].astype(str).str.zfill(2)
            + df["day"].astype(str).str.zfill(2) + ' ' + df["time"])
    elif all(col in df.columns for col in ["date", "time"]):
        df["datetime"] = pd.to_datetime(
            df["date"].astype(str) + ' ' + df["time"])
    elif all(col in df.columns for col in ["year", "month", "day"]):
        df["datetime"] = pd.to_datetime(df[["year", "month", "day"]])
    elif all(col in df.columns for col in ["date", "hour",
                                           "minute", "second"]):
        df["datetime"] = pd.to_datetime(
            df["date"].astype(str) + ' ' + df["hour"].astype(str).str.zfill(2)
            + ':' + df["minute"].astype(str).str.zfill(2) + ':'
            + df["second"].astype(str).str.zfill(2))
    else:
        print("Unrecognized date/time format in DataFrame columns."
              "\nMay be a profile, but doublecheck.")
        return

    # Reindex the dataframe with the new datetime
    df.set_index("datetime", inplace=True)
    
def clean_sb_column(col):
    match = re.match(r'^Rrs(\d+(?:\.\d+)?)(_unc)?$', col)
    if match:
        num = float(match.group(1))
        if match.group(2):  # If _unc exists
            return f"{num}_unc"   # Keep as string with "_unc"
        else:
            # Use int if no decimal part
            return int(num) if num.is_integer() else num
    else:
        return col

In [5]:
granules_data = []
for i, grmeta in enumerate(tqdm(search_results, desc='Processing Granules')):
    gr_name = grmeta['umm']['GranuleUR']
    gr_time = grmeta['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime']
    polygons = grmeta['umm']['SpatialExtent']['HorizontalSpatialDomain']['Geometry']['GPolygons']
    polygon_coord = [(lonlat_to_mercator(pt['Longitude'], pt['Latitude'])) for pt in polygons[0]['Boundary']['Points']]
    granules_data.append({'time': gr_time, 'granule_index': i, 'granule': gr_name, 'geometry': polygon_coord})

Processing Granules:   0%|          | 0/16 [00:00<?, ?it/s]

In [6]:
dates = fc.get_dates(tspan[0], tspan[1], 24)
imgs = {}
for date in tqdm(dates, desc="Fetching true color from NASA WorldView"):
    daystr = date.strftime('%Y-%m-%d')
    x, y, img = get_OCI_PACE_truecolor(daystr, size=areasize, bbox=bbox)
    imgs[daystr] = hv.RGB((x, y, img))

Fetching true color from NASA WorldView:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
# ─── 2.a) COLLECT & PREPARE ALL FILES ──────────────────────────────────────────

root = Path("/home/jovyan/shared-public/pace-hackweek/SeePACE/Hackweek_PACE-PAX_Rrs")
metadata_list = []
search_types = ['Rrs', 'AOP'] #  Add additional search parameters if desired ***********************

for sb in root.rglob('*.sb'):
    meta, df = parse_sb(sb)

    # collapse spectral files to exactly one row
    if meta['single_spectrum']:
        df = df.head(1)

    meta['data'] = df
    # detect which of your search_types appear in the original fields
    meta['Data_Type'] = [
        t for t in search_types
        if any(t in f for f in meta['fields_list'])
    ]
    metadata_list.append(meta)

summary_df = pd.DataFrame([{
    'Station':        m['station'],
    'Data_File_Name': m['data_file_name'],
    'Start_Datetime': m['start_datetime'],
    'End_Datetime':   m['end_datetime'],
    'Data_Type':      m['Data_Type']
} for m in metadata_list])

display(summary_df)

Unnamed: 0,Station,Data_File_Name,Start_Datetime,End_Datetime,Data_Type
0,30,PACE-PAX_Shearwater_2024_GER_St_30.sb,2024-09-22 19:25:46,2024-09-22 19:25:46,[Rrs]
1,31,PACE-PAX_Shearwater_2024_GER_St_31.sb,2024-09-22 20:34:41,2024-09-22 20:34:41,[Rrs]
2,32,PACE-PAX_Shearwater_2024_GER_St_32.sb,2024-09-23 18:05:43,2024-09-23 18:05:43,[Rrs]
3,33,PACE-PAX_Shearwater_2024_GER_St_33.sb,2024-09-23 20:08:33,2024-09-23 20:08:33,[Rrs]
4,34,PACE-PAX_Shearwater_2024_GER_St_34.sb,2024-09-25 19:36:20,2024-09-25 19:36:20,[Rrs]
...,...,...,...,...,...
274,,PVST_SBCR_04_20240920_194506_C-OPS_Rrs_Lu0_Es_...,2024-09-20 19:45:01,2024-09-20 19:45:01,[Rrs]
275,,PVST_SBCR_04_20240920_195005_C-OPS_Rrs_Lu0_Es_...,2024-09-20 19:50:02,2024-09-20 19:50:02,[Rrs]
276,,PVST_SBCR_04_20240920_202300_C-OPS_Rrs_Lu0_Es_...,2024-09-20 20:22:53,2024-09-20 20:22:53,[Rrs]
277,,PVST_SBCR_04_20240920_202844_C-OPS_Rrs_Lu0_Es_...,2024-09-20 20:28:43,2024-09-20 20:28:43,[Rrs]


In [8]:
# ─── 4) assemble full DataFrame of points ─────────────────────────────────────
all_pts = []
for m in metadata_list:
    df = m['data'].copy()
    df['File']          = m['data_file_name']
    df['datetime']      = m['start_datetime']
    df['Data_Type_str'] = ', '.join(m['Data_Type'])
    df['merc_x'], df['merc_y'] = zip(*[
        lonlat_to_mercator(lon, lat) for lon, lat in zip(df['lon'], df['lat'])
    ])
    all_pts.append(df)
full_df = pd.concat(all_pts, ignore_index=True)

granules_by_date = defaultdict(list)
for g in granules_data:
    date = g['time'][:10]  # 'YYYY-MM-DD'
    granules_by_date[date].append(g)

# Sort the available days
available_days = sorted(set(imgs.keys()) | set(granules_by_date.keys()))

minmerc_x, minmerc_y, maxmerc_x, maxmerc_y = bbox_to_mercator(bbox)
x_range = (minmerc_x, maxmerc_x)
y_range = (minmerc_y, maxmerc_y)

In [14]:
# Wrap-and-merge filenames for hover
def wrap_files(vals):
    if len(vals)>1:
        names = sorted(set(vals))
        joined = ", ".join(names)
        text = textwrap.fill(joined, width=30).replace("\n", '<br>')
    else:
        text = textwrap.fill(str(vals[:]), width=30).replace("\n", '<br>')
    return text

# Function to generate a polygon from selected time
def make_granule_polygon(granules):
    poly_data = []
    for g in granules:
        poly_data.append({
            ('x', 'y'): g['geometry'],
            'granule': wrap_files([g['granule']]),
            'time': g['time']
        })
    return hv.Polygons(poly_data, vdims=['granule', 'time']).opts(
        fill_alpha=0.3,
        fill_color='pink',
        line_color='red',
        tools=[granule_hover]
    )

def make_insitu_points(df):
    # global selection
    grouped = df.groupby(['merc_x','merc_y'], as_index=False).agg({
        'datetime':       'max',
        'File':           wrap_files,
        'Data_Type_str':  lambda v: '<br>'.join(sorted(set(v)))
    })
    points = hv.Points(
        grouped,
        kdims=['merc_x','merc_y'],
        vdims=['datetime','File','Data_Type_str']
    ).opts(
        size=8, color='blue', tools=[hover, 'wheel_zoom', 'pan'], active_tools=['wheel_zoom']
    )
    return points

def make_transects_lines(df):
    # Build transect lines per file
    lines = []
    for fname, sub in df.groupby('File'):
        if len(sub) > 1:
            # sort by datetime
            sub_sorted = sub.sort_values('datetime')
            coords = list(zip(sub_sorted['merc_x'], sub_sorted['merc_y']))
            line = hv.Path([coords], kdims=['x','y']).opts(color='red', line_width=2)
            lines.append(line)
    # Combine all lines
    if lines:
        return hv.Overlay(lines)
        # {f.name if hasattr(f, 'name') else i: f for i, f in enumerate(lines)}
    else:
        # Return an empty element if no line exists
        return hv.Path([])

def make_plot(selected_day=None, show_alldays=False, show_worldview=False, show_granules=False, show_esri=False, opacity=0.5):
    truecolor = imgs.get(selected_day)
    if truecolor is None:
        return hv.Text(0, 0, f"No image for {selected_day}").opts(width=600, height=500)

    if show_alldays:        
        granules = sum(granules_by_date.values(), [])
        df = full_df.copy()
    else:
        granules = granules_by_date.get(selected_day, [])
        df = full_df[full_df['datetime'].dt.date == pd.to_datetime(selected_day).date()]

    if len(df) == 0:
        df = pd.DataFrame()
        
    polygons = [make_granule_polygon(granules)]
    points_obj = make_insitu_points(df)
    points = [make_insitu_points(df)]
    transects = [make_transects_lines(df)]
    selection = streams.Selection1D(source=lambda: latest_points['points'])

    # Create Carto
    carto = CartoLight().opts(
        width=width, height=height,
        xaxis='bottom', yaxis='left',
        xformatter=MercatorTickFormatter(),
        yformatter=MercatorTickFormatter(),
        xlim=x_range, ylim=y_range,
        xlabel='Longitude',
        ylabel='Latitude',
    )
    # true-color world imagery
    imagery = EsriImagery().opts(
        width=width, height=height,
        xaxis='bottom', yaxis='left',
        xformatter=MercatorTickFormatter(),
        yformatter=MercatorTickFormatter(),
        xlim=x_range,
        ylim=y_range,
        xlabel='Longitude',
        ylabel='Latitude',
    )
    
    if not polygons:
        return hv.Text(0, 0, "No granules available").opts(width=600, height=500)
        
    n_points = len(df)
    elements = points + transects
    
    # Choose base
    overlay = imagery if show_esri else carto

    if show_worldview:
        overlay = overlay * truecolor.opts(alpha=opacity)
    if show_granules:
        overlay = overlay * hv.Overlay(polygons)
    overlay = overlay * hv.Overlay(elements)

    # return overlay
    return overlay.opts(
        ylim=y_range,
        xlim=x_range,
        width=x.size,
        height=y.size,
        xlabel='Longitude',
        ylabel='Latitude',
        framewise=False,
        title=f"Granules on {selected_day} / {n_points} in-situ points"
    ), points[0], df, granules

def find_granules_for_point(x, y, pt_dt, granules):
    pt = Point(x, y)
    df = pd.DataFrame()
    for g in granules:
        geom = Polygon(g['geometry'])
        dt_beg = (selected_metadata['Datetime'] - timedelta(minutes=window)).dt.tz_localize('UTC')
        dt_end = (selected_metadata['Datetime'] + timedelta(minutes=window)).dt.tz_localize('UTC')
        grdt = pd.to_datetime(g['time'])
        in_window = (grdt >= dt_beg) & (grdt <= dt_end)
        if geom.contains(pt) and any(in_window):
            tmpdf = pd.DataFrame({
                'granule_index': [g['granule_index']],
                'datetime': [grdt],
                'granule': [g['granule']],
            })
            df = pd.concat((df, tmpdf)).reset_index(drop=True)
    return df

def tap_callback(x, y):
    global selected_metadata, avail_granules
    # find nearest point
    df = current_insitu.copy()
    # compute distances in mercator space
    df['dist'] = ((df['merc_x'] - x)**2 + (df['merc_y'] - y)**2)
    # find the minimum distance
    min_dist = df['dist'].min()
    # select all rows whose dist equals that minimum
    nearest_df = df[df['dist'] == min_dist]
    # build your selected‐metadata table from _all_ of them
    sel_df = nearest_df[['File','datetime','Data_Type_str']].rename(
        columns={'datetime':'Datetime','Data_Type_str':'Type'}
    ).reset_index(drop=True)
    sel_df['x'] = x
    sel_df['y'] = y
    selected_metadata = sel_df
    insitu_pane.object = sel_df
    
    gr_df = find_granules_for_point(x, y, sel_df['Datetime'], current_granules)
    avail_granules = gr_df
    granule_pane.object = gr_df

def make_spectral_plot(matchup_df):
    # Satellite data
    sat_cols = [c for c in df.columns if c.startswith('sat_rrs')]
    sat_wl   = np.array([float(c.replace('sat_rrs','')) for c in sat_cols])
    sat_val  = first[sat_cols].astype(float).values
    
    # In-situ data (numeric column names)
    insitu_cols = []
    for c in df.columns:
        try:
            float(c)
            insitu_cols.append(c)
        except ValueError:
            continue
    
    insitu_wl  = np.array([float(c) for c in insitu_cols])
    insitu_val = first[insitu_cols].astype(float).values
    
    # Sort in-situ spectrum for interpolation
    sort_idx          = np.argsort(insitu_wl)
    insitu_wl_sorted  = insitu_wl[sort_idx]
    insitu_val_sorted = insitu_val[sort_idx]
    
    # Interpolate in-situ at satellite wavelengths
    insitu_interp = np.interp(sat_wl, insitu_wl_sorted, insitu_val_sorted)
    
    # Satellite Curve
    sat_curve = hv.Curve(
        (sat_wl, sat_val),
        'Wavelength (nm)',
        'Reflectance'
    ).opts(marker='s', color='blue', line_width=2, tools=['hover'], legend_label='Satellite')
    
    # Insitu Curve
    insitu_curve  = hv.Curve(
        (sat_wl, insitu_interp),
        'Wavelength (nm)',
        'Reflectance'
    ).opts(
        marker='x',
        color='orange',
        line_dash='dashed',
        line_width=2,
        tools=['hover'],
        legend_label='In-situ interp @ sat bands'
    )
    
    return (sat_curve * insitu_curve).opts(
        title='First Row: In-situ vs. Satellite (Interpolated Only)',
        legend_position='right',
        width=800,
        height=400
    )

def get_sat_insitu_matchup(granules, insitudf):
    list4open = [search_results[i] for i in avail_granules['granule_index']]
    files = earthaccess.open(list4open)
    with xr.open_dataset(files[0], group="sensor_band_parameters") as ds_bands:
        rrs_wavelengths = ds_bands["wavelength_3d"].values

    longitude, latitude = mercator_to_lonlat(merc_x, merc_y)
    # Loop through files and process
    sat_rows = []
    for idx, file in enumerate(files):
        granule_date = pd.to_datetime(
            file.granule["umm"]["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
        )
        print(f"Running Granule: {granule_date}")
        row = fc.get_fivebyfive(file, latitude, longitude, rrs_wavelengths)
        sat_rows.append(row)
    df_sat = pd.DataFrame(sat_rows)

    df_insitu = pd.DataFrame()
    for sb_file in selected_metadata['File']:
        for sb in root.rglob(sb_file):
            df = read_sb(sb)
            if any(col.lower() == 'wavelength' for col in df.columns):
                if 'Wavelength' in df.columns:
                    df = df.rename(columns={col: 'wavelength' for col in df.columns if col.lower() == 'wavelength'})
                df_wide = (
                    df
                    .pivot(index=["profile_time","profile_lat","profile_lon"],
                           columns="wavelength",
                           values="Rrs")
                    .reset_index()
                    .rename(columns={
                        "profile_time":"datetime",
                        "profile_lat":"lat",
                        "profile_lon":"lon"
                    })
                )
                df_wide["date"] = df_wide["datetime"].dt.strftime("%Y%m%d")
                df_wide["time"] = df_wide["datetime"].dt.strftime("%H:%M:%S")
            else:
                df_wide = df.reset_index()
                df_wide.columns = [clean_column(col) for col in df_wide.columns]
        wls = sorted(c for c in df_wide.columns if isinstance(c,(int,float)))
        df_insitu = pd.concat((df_insitu, df_wide[["datetime","date","time","lat","lon"] + wls]))
    
    return fc.match_data(
        df_sat,
        df_insitu,
        cv_max            = 0.60,
        senz_max          = 60.0,
        min_percent_valid = 55.0,
        max_time_diff     = window,
        std_max           = 1.5,
    )

# Hover tools setup
granule_hover = HoverTool(
    tooltips="""
    <div style='max-width:300px;'>
      <strong>Granule:</strong> @granule<br>
      <strong>Time:</strong> @time
    </div>
    """,
    point_policy='follow_mouse'
)

hover = HoverTool(
    tooltips="""
    <div style='max-width:200px;'>
      <strong>Time:</strong> @datetime{%F}<br>
      <strong>File:</strong> @File{safe}<br>
      <strong>Type:</strong> @Data_Type_str
    </div>
    """,
    formatters={'@datetime':'datetime'},
    point_policy='snap_to_data'
)

# Metadata display pane
insitu_pane = pn.pane.DataFrame(
    pd.DataFrame(), name='Selected In-situ'
)
granule_pane = pn.pane.DataFrame(
    pd.DataFrame(), name='Available Granules'
)

# Tap callback
selected_metadata = None
avail_granules = None

tap = hv.streams.Tap(x=None, y=None)
tap.add_subscriber(tap_callback)

# Define toggles
alldays_toggle = pn.widgets.Checkbox(
    name='Show All Days',
    value=False
)
granules_toggle = pn.widgets.Checkbox(
    name='Show Granules',
    value=False
)
truecolor_toggle = pn.widgets.Checkbox(
    name='Show WorldView',
    value=False
)
esri_toggle = pn.widgets.Checkbox(
    name='Use Esri Imagery',
    value=False
)

# Sliders
day_slider = pn.widgets.DiscreteSlider(name="Date", options=available_days)
alpha_slider = pn.widgets.FloatSlider(name='Overlay Opacity', start=0.0, end=1.0, step=0.05, value=1.0)
matchup_window_slider = pn.widgets.IntSlider(name='Match-up Window (mins)', start=0, end=240, step=60, value=240)

current_granules = []
# Bind to panel
@pn.depends(
    selected_day=day_slider.param.value, 
    show_alldays=alldays_toggle.param.value,
    show_worldview=truecolor_toggle.param.value,
    show_granules=granules_toggle.param.value,
    show_esri=esri_toggle.param.value,
    opacity=alpha_slider.param.value,
    matchup_window=matchup_window_slider.param.value,
)
def map_update(selected_day, show_alldays, show_worldview, show_granules, show_esri, opacity, matchup_window):
    global current_granules, current_insitu, window
    plot, pts, insitudf, grs = make_plot(selected_day, show_alldays, show_worldview, show_granules, show_esri, opacity)
    current_granules = grs
    current_insitu = insitudf
    window = matchup_window
    tap.source = pts
    return plot

def spectral_update():
    spectral_plot = make_spectral_plot()
    return spectral_plot

# Layout
app = pn.Column(
    "# Daily Granules and In-Situ Viewer",
    pn.Row(day_slider, alpha_slider, matchup_window_slider),
    pn.Row(alldays_toggle, truecolor_toggle, granules_toggle, esri_toggle),
    map_update,
    pn.Spacer(height=20),
    pn.Row(insitu_pane),
    pn.Row(granule_pane),
    # pn.Row(spectral_update) 
)

app.servable()

In [161]:
avail_granules

Unnamed: 0,granule_index,datetime,granule
0,8,2024-09-25 21:29:12+00:00,PACE_OCI_L2_AOP_PACE_OCI.20240925T212912.L2.OC...


In [26]:
list4open = [search_results[i] for i in avail_granules['granule_index']]
files = earthaccess.open(list4open)

QUEUEING TASKS | :   0%|          | 0/2 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/2 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
merc_x, merc_y = (-1.332486e+07, 4.053864e+06)
list4open = [search_results[i] for i in avail_granules['granule_index']]
files = earthaccess.open(list4open)
with xr.open_dataset(files[0], group="sensor_band_parameters") as ds_bands:
    rrs_wavelengths = ds_bands["wavelength_3d"].values

longitude, latitude = mercator_to_lonlat(merc_x, merc_y)
# Loop through files and process
sat_rows = []
for idx, file in enumerate(files):
    granule_date = pd.to_datetime(
        file.granule["umm"]["TemporalExtent"]["RangeDateTime"]["BeginningDateTime"]
    )
    print(f"Running Granule: {granule_date}")
    row = fc.get_fivebyfive(file, latitude, longitude, rrs_wavelengths)
    sat_rows.append(row)
df_sat = pd.DataFrame(sat_rows)

QUEUEING TASKS | :   0%|          | 0/2 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/2 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/2 [00:00<?, ?it/s]

Running Granule: 2024-09-25 19:50:55+00:00
Running Granule: 2024-09-25 21:29:12+00:00


In [75]:
def test_match_data(
    df_sat,
    df_field,
    cv_max=0.15,
    senz_max=60.0,
    min_percent_valid=55.0,
    max_time_diff=180,
    std_max=1.5,
):
    """Create matchup dataframe based on selection criteria.

    Parameters
    ----------
    df_sat : pandas dataframe
        Satellite data from flat validation file.
    df_field : pandas dataframe
        Field data from flat validation file.
    cv_max : float, default 0.15
        Maximum coefficient of variation (stdev/mean) for sat data.
    senz_max : float, default 60.0
        Maximum sensor zenith for sat data.
    min_percent_valid : float, default 55.0
        Minimum percentage of valid satellite pixels.
    max_time_diff : int, default 180
        Maximum time difference (minutes) between sat and field matchup.
    std_max : float, default 1.5
        If multiple valid field matchups, select within std_max stdevs of mean.

    Returns
    -------
    pandas dataframe of matchups for product

    Notes
    -----
    This is hard-coded to match on Rrs for the demo. For other products, take
    out the cv parameter and make the row product column search more generic.
    """
    # Setup
    time_window = pd.Timedelta(minutes=max_time_diff)
    df_match_list = []

    #TODO: add dependency on AERONET or SeaBass file
    # 1) prepare your field table
    df_field_filtered = df_field.copy()

    # pull real datetimes out of the index
    df_field_filtered["field_datetime"] = df_field_filtered['datetime']
    # ensure tz-naive
    df_field_filtered["field_datetime"] = (
        pd.to_datetime(df_field_filtered["field_datetime"])
          .dt.tz_localize('UTC')
    )
    print(df_field_filtered["field_datetime"])

    # rename lat/lon
    df_field_filtered["field_latitude"]  = df_field_filtered["lat"]
    df_field_filtered["field_longitude"] = df_field_filtered["lon"]
    
    # Filter Field data based on Solar Zenith
    # df_field_filtered = df_field[df_field["field_solar_zenith"] <= senz_max]

    # Filter satellite data based on cv threshold
    df_sat_filtered = df_sat[df_sat["sat_cv"] <= cv_max]

    # Filter satellite data based on percent good pixels
    df_sat_filtered = df_sat_filtered[
        df_sat_filtered["sat_pixel_valid"] >= min_percent_valid * 25 / 100
    ]

    for _, sat_row in df_sat_filtered.iterrows():
        # Filter field data based on time difference and coordinates
        time_diff = abs(
            df_field_filtered["field_datetime"] - sat_row["sat_datetime"]
            )
        time_mask = time_diff <= time_window
        lat_mask = 0.2 >= abs(
            df_field_filtered["field_latitude"] - sat_row["sat_latitude"]
        )
        lon_mask = 0.2 >= abs(
            df_field_filtered["field_longitude"] - sat_row["sat_longitude"]
        )
        field_matches = df_field_filtered[time_mask & lat_mask & lon_mask]

        if field_matches.shape[0] > 5:
            # Filter by Standard Deviation for rrs columns
            rrs_cols = [
                col for col in field_matches.columns
                if col.startswith("field_rrs")
                and int(col.rsplit("_rrs")[1]) >= 400
                and int(col.rsplit("_rrs")[1]) <= 700
            ]
            if rrs_cols:
                mean_spectra = field_matches[rrs_cols].mean(axis=0)
                std_spectra = field_matches[rrs_cols].std(axis=0)
                within_std = (
                    abs(field_matches[rrs_cols] - mean_spectra) <= std_max * std_spectra
                )
                field_matches = field_matches[within_std.all(axis=1)]

        if not field_matches.empty:
            # Select the best match based on time delta
            time_diff = abs(
                field_matches["field_datetime"] - sat_row["sat_datetime"]
                )
            best_match = field_matches.loc[time_diff.idxmin()]
            df_match_list.append({**best_match.to_dict(), **sat_row.to_dict()})

    df_match = pd.DataFrame(df_match_list)
    return df_match

In [13]:
df_insitu = pd.DataFrame()
for sb_file in selected_metadata['File']:
    for sb in root.rglob(sb_file):
        df = read_sb(sb)
        if any(col.lower() == 'wavelength' for col in df.columns):
            if 'Wavelength' in df.columns:
                df = df.rename(columns={col: 'wavelength' for col in df.columns if col.lower() == 'wavelength'})
            df_wide = (
                df
                .pivot(index=["profile_time","profile_lat","profile_lon"],
                       columns="wavelength",
                       values="Rrs")
                .reset_index()
                .rename(columns={
                    "profile_time":"datetime",
                    "profile_lat":"lat",
                    "profile_lon":"lon"
                })
            )
            df_wide["date"] = df_wide["datetime"].dt.strftime("%Y%m%d")
            df_wide["time"] = df_wide["datetime"].dt.strftime("%H:%M:%S")
        else:
            df_wide = df.reset_index()
            df_wide.columns = [clean_column(col) for col in df_wide.columns]
    wls = sorted(c for c in df_wide.columns if isinstance(c,(int,float)))
    df_insitu = pd.concat((df_insitu, df_wide[["datetime","date","time","lat","lon"] + wls]))
    
df_insitu

Unrecognized date/time format in DataFrame columns.
May be a profile, but doublecheck.
Unrecognized date/time format in DataFrame columns.
May be a profile, but doublecheck.


wavelength,datetime,date,time,lat,lon,350,351,352,353,354,...,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075
0,2024-09-25 22:02:19,20240925,22:02:19,34.1872,-119.6993,0.004287,0.004337,0.004381,0.004409,0.004433,...,,,,,,,,,,
0,2024-09-25 21:56:00,20240925,21:56:00,34.1872,-119.6993,0.003192,0.003204,0.00325,0.003366,0.003443,...,-0.000113,0.000491,0.001089,0.000356,6.2e-05,0.000737,0.001104,0.001136,0.000963,0.000674


In [12]:
matchups = fc.match_data(
    df_sat,
    df_wide,
    cv_max            = 0.60,
    senz_max          = 60.0,
    min_percent_valid = 55.0,
    max_time_diff     = window,
    std_max           = 1.5,
)
matchups

Unnamed: 0,datetime,date,time,lat,lon,350,351,352,353,354,...,sat_rrs706,sat_rrs707,sat_rrs708,sat_rrs709,sat_rrs711,sat_rrs712,sat_rrs713,sat_rrs714,sat_rrs717,sat_rrs719
0,2024-09-25 21:56:00,20240925,21:56:00,34.1872,-119.6993,0.003192,0.003204,0.00325,0.003366,0.003443,...,-0.0001,-8.9e-05,-7e-05,-5.9e-05,-5.1e-05,-4.1e-05,-3.6e-05,-4.5e-05,-8.9e-05,0.000177


In [65]:
df_sat

Unnamed: 0,sat_datetime,sat_cv,sat_latitude,sat_longitude,sat_pixel_valid,sat_rrs346,sat_rrs348,sat_rrs351,sat_rrs353,sat_rrs356,...,sat_rrs706,sat_rrs707,sat_rrs708,sat_rrs709,sat_rrs711,sat_rrs712,sat_rrs713,sat_rrs714,sat_rrs717,sat_rrs719
0,2024-09-25 19:50:55+00:00,,34.184734,-119.708313,0,,,,,,...,,,,,,,,,,
1,2024-09-25 21:29:12+00:00,0.122689,34.184734,-119.711494,21,-0.001617,-0.001517,-0.001413,-0.001323,-0.000832,...,-0.0001,-8.9e-05,-7e-05,-5.9e-05,-5.1e-05,-4.1e-05,-3.6e-05,-4.5e-05,-8.9e-05,0.000177


In [38]:

df = pd.DataFrame(columns=[
    'Rrs443.0_unc', 'Rrs490.0_unc', 'other_column'
])
print(df.columns)
# Rename
df.columns = df.columns.str.replace(r'^Rrs(\d+\.\d+)_unc$', r'\1', regex=True)

print(df.columns)

Index(['Rrs443.0_unc', 'Rrs490.0_unc', 'other_column'], dtype='object')
Index(['443.0', '490.0', 'other_column'], dtype='object')
