In [1]:
#SETUP

#%pip install plotly

import os
print(os.getcwd())

import plotly.express as px
import pandas as pd
import panel as pn
pn.extension('plotly')
import holoviews as hv
from holoviews import streams
import numpy as np


# Load the CSV file with matchup data
#file_path = "SeaPACE/matchups_all.csv"

df = pd.read_csv("matchups_all.csv")
#df.head()




/home/jovyan/SeaPACE


In [2]:
# Show basic info for the matchup data
#print(df.shape)
#print(df.columns)
#df.head()
#df.info()
#df.columns

In [3]:
#Explore shared wavelengths

# Load the data
df = pd.read_csv("matchups_all.csv")

# Extract satellite and seabass Rrs columns
sat_cols = [col for col in df.columns if col.startswith("sat_rrs")]
seabass_cols = [col for col in df.columns if col.isdigit()]

# Extract wavelengths as numbers
sat_wavelengths = [int(col.replace("sat_rrs", "")) for col in sat_cols]
seabass_wavelengths = [int(col) for col in seabass_cols]

# Find common wavelengths
common_wavelengths = sorted(set(sat_wavelengths).intersection(seabass_wavelengths))

#print(f"Common wavelengths: {common_wavelengths}")


In [4]:
#Take a look at the columns
#print(f"SeaBASS columns sample: {list(df.columns[:20])}")
#print(f"Satellite columns sample: {[c for c in df.columns if c.startswith('sat_rrs')][:20]}")


In [5]:
#Take a look at the common wavelengths
seabass_wls = set(col for col in df.columns if col.isdigit())
satellite_wls = set(col.replace("sat_rrs", "") for col in df.columns if col.startswith("sat_rrs"))
common_wls = sorted(int(wl) for wl in seabass_wls.intersection(satellite_wls))
#print("Common wavelengths:", common_wls)


In [6]:
#Calculate Mean Square Error
mse_list = []
for wl in common_wls:
    seabass_col = str(wl)
    satellite_col = f"sat_rrs{wl}"
    paired = df[[seabass_col, satellite_col]].dropna()
    if paired.empty:
        continue
    mse = ((paired[seabass_col] - paired[satellite_col])**2).mean()
    mse_list.append({'wavelength': wl, 'mse': mse})
mse_df = pd.DataFrame(mse_list)


In [7]:
#Place holder for scatterplot
pn.extension()

scatter_obj = pn.Column()  # empty placeholder for the scatterplot


In [8]:
#Create MSE Plot

hv.extension('bokeh')

scatter_mse = hv.Scatter(mse_df, kdims='wavelength', vdims='mse').opts(
    tools=['tap', 'hover'],
    size=10,
    color='blue',
    width=1000,
    height=500,
    title="MSE vs Wavelength",
    xlabel="Wavelength (nm)",
    ylabel="Mean Squared Error",
    fontsize={
        'xlabel': '16pt',
        'ylabel': '16pt',
        'xticks': '12pt',
        'yticks': '12pt',
        'title': '18pt'
    }
)



selection = streams.Selection1D(source=scatter_mse)

def selection_callback(index):
    new_plot = update_scatterplot(index)
    scatter_obj.clear()
    scatter_obj.append(new_plot)

selection.add_subscriber(selection_callback)



In [9]:
#Create scatteplot

from scipy.stats import pearsonr

def update_scatterplot(index):
    if not index:
        return pn.pane.Markdown("**No wavelength selected**")

    selected_wavelength = mse_df.iloc[index[0]]['wavelength']
    seabass_col = str(int(selected_wavelength))
    satellite_col = f"sat_rrs{int(selected_wavelength)}"

    if seabass_col not in df.columns or satellite_col not in df.columns:
        return pn.pane.Markdown(f"**Data columns missing for wavelength {selected_wavelength} nm**")

    paired = df[[seabass_col, satellite_col]].dropna()
    x = paired[seabass_col].values
    y = paired[satellite_col].values

    # Linear regression
    coef = np.polyfit(x, y, 1)
    slope, intercept = coef
    poly1d_fn = np.poly1d(coef)

    # Correlation stats
    r, p_value = pearsonr(x, y)
    r_squared = r**2

    # Scatter plot
    scatter = hv.Scatter(paired, kdims=seabass_col, vdims=satellite_col).opts(
        width=900,
        height=500,
        size=10,
        tools=['hover'],
        title=f"Satellite vs SeaBASS Rrs at {selected_wavelength} nm",
        xlabel=f"SeaBASS Rrs at {seabass_col} nm",
        ylabel=f"Satellite Rrs at {satellite_col.replace('sat_rrs', '')} nm",
        fontsize={
            'xlabel': '16pt',
            'ylabel': '16pt',
            'xticks': '12pt',
            'yticks': '12pt',
            'title': '18pt'
        }
    )

    # Regression line
    x_line = np.linspace(x.min(), x.max(), 100)
    y_line = poly1d_fn(x_line)
    regression_line = hv.Curve((x_line, y_line), kdims=seabass_col, vdims=satellite_col).opts(color='red', line_width=2)

    # Text annotation
    x_text = x.min() + 0.05 * (x.max() - x.min())
    y_text = y.max() - 0.1 * (y.max() - y.min())
    stats_text = (
        f"Slope = {slope:.3f}\n"
        f"Intercept = {intercept:.3f}\n"
        f"R² = {r_squared:.3f}\n"
        f"r = {r:.3f}\n"
        f"p = {p_value:.3g}"
    )
    text = hv.Text(x_text, y_text, stats_text).opts(
        text_font_size='12pt',
        text_color='black'
    )

    overlay = scatter * regression_line * text
    return pn.pane.HoloViews(overlay)




In [10]:
#Selection callback
def selection_callback(index):
    new_plot = update_scatterplot(index)
    scatter_obj.clear()
    scatter_obj.append(new_plot)

selection = streams.Selection1D(source=scatter_mse)
selection.add_subscriber(selection_callback)



In [11]:
message_box = pn.pane.HTML("""
<div style="font-size:18pt; font-weight:500; margin-bottom:6px;">
    📌 Click a data point below to see the in situ vs satellite Rrs scatterplot for that wavelength 
</div>
<div style="font-size:14pt; color:#444;">
    🔍 You can zoom in and move around the plot to better choose the wavelength  you want
</div>
""")


layout = pn.Column(
    message_box,
    scatter_mse,
    scatter_obj
)

layout.servable()

