In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import requests
# used for checking if files exist
import os

In [2]:
csv_file = 'streamway_data.csv'
df_list = []

# Column mapping for renaming - only keeping field1 (streamway depth)
column_mapping = {
    'field1': 'streamway_depth_mm',
}

# Try to load existing data from CSV
if os.path.exists(csv_file):
    print("Loading existing data from CSV...")
    df_existing = pd.read_csv(csv_file, index_col=0, parse_dates=True)
    # Ensure existing data is timezone-naive
    if df_existing.index.tz is not None:
        df_existing.index = df_existing.index.tz_localize(None)
    
    latest_timestamp = df_existing.index.max()
    print(f"Latest data in CSV: {latest_timestamp}")
    
    # Start fetching from the latest timestamp in the CSV
    current_end = pd.to_datetime('now').tz_localize(None)
    end_date = latest_timestamp
    
    if current_end > end_date:
        print(f"Fetching new data from {current_end} back to {end_date}")
        fetch_new_data = True
    else:
        print("No new data to fetch")
        fetch_new_data = False
        df = df_existing
else:
    print("No existing CSV found")

Loading existing data from CSV...
Latest data in CSV: 2025-11-13 16:50:47
Fetching new data from 2025-11-14 02:24:10.369034 back to 2025-11-13 16:50:47


In [3]:
# the data should be on 10-minute intervals
df_sorted = df_existing.sort_index()
df_resampled = df_sorted.resample('10min').nearest()
df_resampled

Unnamed: 0_level_0,entry_id,streamway_depth_mm
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-05-17 01:50:00,1038,615.0
2024-05-17 02:00:00,1039,610.0
2024-05-17 02:10:00,1040,615.0
2024-05-17 02:20:00,1041,610.0
2024-05-17 02:30:00,1042,615.0
...,...,...
2025-11-13 16:10:00,102447,954.0
2025-11-13 16:20:00,102448,954.0
2025-11-13 16:30:00,102449,945.0
2025-11-13 16:40:00,102452,945.0


In [4]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=df_resampled.index, y=df_resampled['streamway_depth_mm'], mode='lines+markers', name='Streamway Depth (mm)'))
fig.update_layout(title='Streamway Depth Over Time', xaxis_title='Time', yaxis_title='Depth (mm)')
fig.show()

In [5]:
# their are data gaps for the steamwaty data
# let's build two arrays one of the data gaps and another of the continuous data segments

#gaps where streamway depth is NaN
gaps = df_resampled[df_resampled['streamway_depth_mm'].isna()]

# continuous segments where streamway depth is not NaN
continuous_segments = df_resampled[df_resampled['streamway_depth_mm'].notna()]
continuous_segments

Unnamed: 0_level_0,entry_id,streamway_depth_mm
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-05-17 01:50:00,1038,615.0
2024-05-17 02:00:00,1039,610.0
2024-05-17 02:10:00,1040,615.0
2024-05-17 02:20:00,1041,610.0
2024-05-17 02:30:00,1042,615.0
...,...,...
2025-11-13 16:10:00,102447,954.0
2025-11-13 16:20:00,102448,954.0
2025-11-13 16:30:00,102449,945.0
2025-11-13 16:40:00,102452,945.0


In [6]:
# plot of streamway depth with gaps highlighted x axis segments
fig=go.Figure()
fig.add_trace(go.Scatter(x=df_resampled.index, y=df_resampled['streamway_depth_mm'], mode='lines+markers', name='Streamway Depth (mm)'))

# Highlight gap segments using gap_segments_df
for idx, row in gap_segments_df.iterrows():
    fig.add_vrect(x0=row['start'], x1=row['end'], fillcolor="red", opacity=0.3, layer="below", line_width=0)

fig.update_layout(title='Streamway Depth with Data Gaps Highlighted', xaxis_title='Time', yaxis_title='Depth (mm)')
fig.show()

NameError: name 'gap_segments_df' is not defined

In [None]:
# break gaps into an array of dataframes based on continuous NaN segments based on 10-minute intervals
gap_segments = []
if not gaps.empty:
    gap_start = gaps.index[0]
    gap_end = gaps.index[0]

    for current_time in gaps.index[1:]:
        if (current_time - gap_end) == pd.Timedelta(minutes=10):
            gap_end = current_time
        else:
            gap_segments.append((gap_start, gap_end))
            gap_start = current_time
            gap_end = current_time
    gap_segments.append((gap_start, gap_end))
    gap_segments_df = pd.DataFrame(gap_segments, columns=['start', 'end'])
    gap_segments_df['duration'] = gap_segments_df['end'] - gap_segments_df['start']
gap_segments_df

Unnamed: 0,start,end,duration
0,2024-09-20 17:40:00,2024-09-20 17:50:00,0 days 00:10:00
1,2024-09-20 18:30:00,2024-09-20 20:00:00,0 days 01:30:00
2,2024-11-24 02:10:00,2024-11-24 05:20:00,0 days 03:10:00
3,2024-12-05 08:40:00,2024-12-05 11:50:00,0 days 03:10:00
4,2024-12-07 11:40:00,2024-12-09 03:00:00,1 days 15:20:00
5,2025-02-20 00:10:00,2025-02-20 00:10:00,0 days 00:00:00
6,2025-02-25 09:10:00,2025-02-25 09:50:00,0 days 00:40:00
7,2025-03-08 22:40:00,2025-03-08 22:40:00,0 days 00:00:00
8,2025-05-03 16:10:00,2025-05-03 19:10:00,0 days 03:00:00
9,2025-05-05 10:40:00,2025-05-05 10:40:00,0 days 00:00:00


In [None]:
# break continuous segments into an array of dataframes based on continuous non-NaN segments based on 10-minute intervals
continuous_data_segments = []
if not continuous_segments.empty:
    data_start = continuous_segments.index[0]
    data_end = continuous_segments.index[0]

    for current_time in continuous_segments.index[1:]:
        if (current_time - data_end) == pd.Timedelta(minutes=10):
            data_end = current_time
        else:
            continuous_data_segments.append((data_start, data_end))
            data_start = current_time
            data_end = current_time
    continuous_data_segments.append((data_start, data_end))
    continuous_data_segments_df = pd.DataFrame(continuous_data_segments, columns=['start', 'end'])
    continuous_data_segments_df['duration'] = continuous_data_segments_df['end'] - continuous_data_segments_df['start']
continuous_data_segments_df

Unnamed: 0,start,end,duration
0,2024-05-17 01:50:00,2024-09-20 17:30:00,126 days 15:40:00
1,2024-09-20 18:00:00,2024-09-20 18:20:00,0 days 00:20:00
2,2024-09-20 20:10:00,2024-11-24 02:00:00,64 days 05:50:00
3,2024-11-24 05:30:00,2024-12-05 08:30:00,11 days 03:00:00
4,2024-12-05 12:00:00,2024-12-07 11:30:00,1 days 23:30:00
5,2024-12-09 03:10:00,2025-02-20 00:00:00,72 days 20:50:00
6,2025-02-20 00:20:00,2025-02-25 09:00:00,5 days 08:40:00
7,2025-02-25 10:00:00,2025-03-08 22:30:00,11 days 12:30:00
8,2025-03-08 22:50:00,2025-05-03 16:00:00,55 days 17:10:00
9,2025-05-03 19:20:00,2025-05-05 10:30:00,1 days 15:10:00


In [None]:
# looking at earch continuous data segment how large are the gaps before and after each segment
continuous_data_segments_df['gap_before'] = continuous_data_segments_df['start'].shift(1) - continuous_data_segments_df['end'].shift(1)
continuous_data_segments_df['gap_after'] = continuous_data_segments_df['start'].shift(-1) - continuous_data_segments_df['end']
continuous_data_segments_df

Unnamed: 0,start,end,duration,gap_before,gap_after
0,2024-05-17 01:50:00,2024-09-20 17:30:00,126 days 15:40:00,NaT,0 days 00:30:00
1,2024-09-20 18:00:00,2024-09-20 18:20:00,0 days 00:20:00,-127 days +08:20:00,0 days 01:50:00
2,2024-09-20 20:10:00,2024-11-24 02:00:00,64 days 05:50:00,-1 days +23:40:00,0 days 03:30:00
3,2024-11-24 05:30:00,2024-12-05 08:30:00,11 days 03:00:00,-65 days +18:10:00,0 days 03:30:00
4,2024-12-05 12:00:00,2024-12-07 11:30:00,1 days 23:30:00,-12 days +21:00:00,1 days 15:40:00
5,2024-12-09 03:10:00,2025-02-20 00:00:00,72 days 20:50:00,-2 days +00:30:00,0 days 00:20:00
6,2025-02-20 00:20:00,2025-02-25 09:00:00,5 days 08:40:00,-73 days +03:10:00,0 days 01:00:00
7,2025-02-25 10:00:00,2025-03-08 22:30:00,11 days 12:30:00,-6 days +15:20:00,0 days 00:20:00
8,2025-03-08 22:50:00,2025-05-03 16:00:00,55 days 17:10:00,-12 days +11:30:00,0 days 03:20:00
9,2025-05-03 19:20:00,2025-05-05 10:30:00,1 days 15:10:00,-56 days +06:50:00,0 days 00:20:00


In [None]:
# to test how various gap filling methods perform
# we'll artificially create gaps in continuous data segments
# and try various gap filling methods

# we'll create gaps of increasing size and see how well each method performs starting from 10 minutes up to 6 hours

results = []
for gap_size_minutes in [10, 30, 60, 120, 180, 240, 300, 360]:  # from 10 minutes to 6 hours
    gap_size = pd.Timedelta(minutes=gap_size_minutes)
    for index, row in continuous_data_segments_df.iterrows():
        segment_start = row['start']
        segment_end = row['end']
        segment_duration = row['duration']
        
        if segment_duration > gap_size * 2:
            # Create a gap in the middle of the segment
            gap_start = segment_start + (segment_duration - gap_size) / 2
            gap_end = gap_start + gap_size
            
            # Create a copy of the original data segment
            test_segment = df_resampled[segment_start:segment_end].copy()
            
            # Introduce NaN values to simulate the gap
            test_segment.loc[gap_start:gap_end, 'streamway_depth_mm'] = np.nan
            
            # Now apply various gap filling methods here and evaluate performance
            # (e.g., forward fill, backward fill, interpolation, etc.)
            
            # Example: Forward Fill
            filled_ffill = test_segment['streamway_depth_mm'].fillna(method='ffill')
            
            # Example: Backward Fill
            filled_bfill = test_segment['streamway_depth_mm'].fillna(method='bfill')
            
            # Example: Linear Interpolation
            filled_interp = test_segment['streamway_depth_mm'].interpolate(method='linear')
            
            # Evaluate and compare the filled results with the original data segment
            original_segment = df_resampled[segment_start:segment_end]['streamway_depth_mm']
            
            # Calculate errors or differences for each method
            error_ffill = np.abs(original_segment - filled_ffill).sum()
            error_bfill = np.abs(original_segment - filled_bfill).sum()
            error_interp = np.abs(original_segment - filled_interp).sum()

            # Calculate errors or differences for each interval 10, 20, 30, ... minutes
            error_intervals = {}
            for interval in range(10, gap_size_minutes + 10, 10):
                interval_timedelta = pd.Timedelta(minutes=interval)
                gap_interval_end = gap_start + interval_timedelta
                
                original_interval = original_segment[gap_start:gap_interval_end]
                ffill_interval = filled_ffill[gap_start:gap_interval_end]
                bfill_interval = filled_bfill[gap_start:gap_interval_end]
                interp_interval = filled_interp[gap_start:gap_interval_end]
                
                error_intervals[interval] = {
                    'error_ffill': np.abs(original_interval - ffill_interval).sum(),
                    'error_bfill': np.abs(original_interval - bfill_interval).sum(),
                    'error_interp': np.abs(original_interval - interp_interval).sum()
                }

            results.append({
                'gap_size_minutes': gap_size_minutes,
                'segment_start': segment_start,
                'segment_end': segment_end,
                'error_ffill': error_ffill,
                'error_bfill': error_bfill,
                'error_interp': error_interp
            })
            for interval, errors in error_intervals.items():
                results[-1][f'error_ffill_{interval}min'] = errors['error_ffill']
                results[-1][f'error_bfill_{interval}min'] = errors['error_bfill']
                results[-1][f'error_interp_{interval}min'] = errors['error_interp']



Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


Series.fillna with 'method' is deprecat

In [None]:
# process results as needed
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,gap_size_minutes,segment_start,segment_end,error_ffill,error_bfill,error_interp,error_ffill_10min,error_bfill_10min,error_interp_10min,error_ffill_20min,...,error_interp_330min,error_ffill_340min,error_bfill_340min,error_interp_340min,error_ffill_350min,error_bfill_350min,error_interp_350min,error_ffill_360min,error_bfill_360min,error_interp_360min
0,10,2024-05-17 01:50:00,2024-09-20 17:30:00,0.0,5.0,2.500000,0.0,5.0,2.500000,,...,,,,,,,,,,
1,10,2024-09-20 20:10:00,2024-11-24 02:00:00,10.0,0.0,5.000000,10.0,0.0,5.000000,,...,,,,,,,,,,
2,10,2024-11-24 05:30:00,2024-12-05 08:30:00,0.0,0.0,0.000000,0.0,0.0,0.000000,,...,,,,,,,,,,
3,10,2024-12-05 12:00:00,2024-12-07 11:30:00,15.0,15.0,0.000000,15.0,15.0,0.000000,,...,,,,,,,,,,
4,10,2024-12-09 03:10:00,2025-02-20 00:00:00,0.0,0.0,0.000000,0.0,0.0,0.000000,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144,360,2025-08-19 01:50:00,2025-09-16 11:30:00,385.0,698.0,183.500000,5.0,53.0,5.763158,5.0,...,173.500000,322.0,693.0,176.210526,351.0,693.0,177.736842,385.0,698.0,183.500000
145,360,2025-09-16 14:40:00,2025-09-19 11:00:00,1224.0,2254.0,727.210526,30.0,158.0,22.578947,45.0,...,707.105263,1051.0,2239.0,714.684211,1130.0,2254.0,724.736842,1224.0,2254.0,727.210526
146,360,2025-09-19 12:00:00,2025-10-02 23:20:00,130.0,55.0,78.552632,5.0,5.0,4.868421,5.0,...,77.763158,120.0,55.0,78.157895,125.0,55.0,78.421053,130.0,55.0,78.552632
147,360,2025-10-02 23:40:00,2025-10-20 08:00:00,70.0,70.0,70.000000,5.0,5.0,5.000000,5.0,...,55.000000,60.0,60.0,60.000000,65.0,65.0,65.000000,70.0,70.0,70.000000


In [None]:
# generate summary statistics or visualizations to compare methods

for gap_size_minutes in [10, 30, 60, 120, 180, 240, 300, 360]:
    subset = results_df[results_df['gap_size_minutes'] == gap_size_minutes]
    mean_errors = subset[['error_ffill', 'error_bfill', 'error_interp']].mean()
    std_errors = subset[['error_ffill', 'error_bfill', 'error_interp']].std()
    max_errors = subset[['error_ffill', 'error_bfill', 'error_interp']].max()
    min_errors = subset[['error_ffill', 'error_bfill', 'error_interp']].min()
    print(f"Gap Size: {gap_size_minutes} mins")
    print("Mean Errors:")
    print(mean_errors)
    print("Standard Deviation of Errors:")
    print(std_errors)
    print("Maximum Errors:")
    print(max_errors)
    print("Minimum Errors:")
    print(min_errors)
    print()

Gap Size: 10 mins
Mean Errors:
error_ffill     4.45
error_bfill     2.95
error_interp    2.45
dtype: float64
Standard Deviation of Errors:
error_ffill     4.839258
error_bfill     4.084309
error_interp    2.887815
dtype: float64
Maximum Errors:
error_ffill     15.0
error_bfill     15.0
error_interp    10.0
dtype: float64
Minimum Errors:
error_ffill     0.0
error_bfill     0.0
error_interp    0.0
dtype: float64

Gap Size: 30 mins
Mean Errors:
error_ffill      9.157895
error_bfill     11.684211
error_interp     9.168421
dtype: float64
Standard Deviation of Errors:
error_ffill      9.639405
error_bfill     13.844584
error_interp     9.431628
dtype: float64
Maximum Errors:
error_ffill     30.0
error_bfill     60.0
error_interp    40.0
dtype: float64
Minimum Errors:
error_ffill     0.0
error_bfill     0.0
error_interp    0.0
dtype: float64

Gap Size: 60 mins
Mean Errors:
error_ffill     27.263158
error_bfill     29.421053
error_interp    18.616541
dtype: float64
Standard Deviation of Errors

In [None]:
# let's check the standard deviation of the streamway depth data overall
overall_std = df_resampled['streamway_depth_mm'].std()
overall_std

# largest diff in streamway depth over 10, 30, 60, 120, 180, 240, 300, 360 minute intervals
changes = {}
for interval in [10, 30, 60, 120, 180, 240, 300, 360]:
    shifted = df_resampled['streamway_depth_mm'].shift(interval)
    diffs = (df_resampled['streamway_depth_mm'] - shifted).abs()
    max_change = diffs.max()
    mean_change = diffs.mean()
    std_change = diffs.std()
    changes[interval] = {
        'max_change': max_change,
        'mean_change': mean_change,
        'std_change': std_change
    }
changes_df = pd.DataFrame(changes).T
changes_df

Unnamed: 0,max_change,mean_change,std_change
10,1210.0,19.945485,59.685679
30,1594.0,49.407688,126.445063
60,2017.0,77.487463,168.560082
120,2017.0,105.599165,199.706897
180,2022.0,122.092161,215.018587
240,2017.0,133.15812,224.5826
300,2007.0,138.13505,226.547519
360,1997.0,141.964852,228.352288


In [None]:
# how well do our gap filling methods perform relative to the changes_df
# our 10-minute gap fill should be within the mean change for 10 minutes changes_df

# ANSI color codes
GREEN = '\033[92m'
RED = '\033[91m'
RESET = '\033[0m'

for gap_size_minutes in [10, 30, 60, 120, 180, 240, 300, 360]:
    subset = results_df[results_df['gap_size_minutes'] == gap_size_minutes]
    mean_errors = subset[['error_ffill', 'error_bfill', 'error_interp']].mean()
    mean_change = changes_df.loc[gap_size_minutes, 'mean_change']
    print(f"Gap Size: {gap_size_minutes} mins")
    if mean_errors['error_ffill'] <= mean_change:
        print(f"{GREEN}  FFill Mean Error {mean_errors['error_ffill']:.2f} is within Mean Change {mean_change:.2f}{RESET}")
    else:
        print(f"{RED}  FFill Mean Error {mean_errors['error_ffill']:.2f} exceeds Mean Change {mean_change:.2f}{RESET}")
    if mean_errors['error_bfill'] <= mean_change:
        print(f"{GREEN}  BFill Mean Error {mean_errors['error_bfill']:.2f} is within Mean Change {mean_change:.2f}{RESET}")
    else:
        print(f"{RED}  BFill Mean Error {mean_errors['error_bfill']:.2f} exceeds Mean Change {mean_change:.2f}{RESET}")
    if mean_errors['error_interp'] <= mean_change:
        print(f"{GREEN}  Interp Mean Error {mean_errors['error_interp']:.2f} is within Mean Change {mean_change:.2f}{RESET}")
    else:
        print(f"{RED}  Interp Mean Error {mean_errors['error_interp']:.2f} exceeds Mean Change {mean_change:.2f}{RESET}")

Gap Size: 10 mins
[92m  FFill Mean Error 4.45 is within Mean Change 19.95[0m
[92m  BFill Mean Error 2.95 is within Mean Change 19.95[0m
[92m  Interp Mean Error 2.45 is within Mean Change 19.95[0m
Gap Size: 30 mins
[92m  FFill Mean Error 9.16 is within Mean Change 49.41[0m
[92m  BFill Mean Error 11.68 is within Mean Change 49.41[0m
[92m  Interp Mean Error 9.17 is within Mean Change 49.41[0m
Gap Size: 60 mins
[92m  FFill Mean Error 27.26 is within Mean Change 77.49[0m
[92m  BFill Mean Error 29.42 is within Mean Change 77.49[0m
[92m  Interp Mean Error 18.62 is within Mean Change 77.49[0m
Gap Size: 120 mins
[92m  FFill Mean Error 80.21 is within Mean Change 105.60[0m
[92m  BFill Mean Error 85.58 is within Mean Change 105.60[0m
[92m  Interp Mean Error 43.21 is within Mean Change 105.60[0m
Gap Size: 180 mins
[91m  FFill Mean Error 138.67 exceeds Mean Change 122.09[0m
[91m  BFill Mean Error 233.06 exceeds Mean Change 122.09[0m
[92m  Interp Mean Error 97.73 is with

In [None]:
# let's plot the changes_df and the mean errors for each gap filling method with std dev reference
fig = go.Figure()

# Mean Change bar with error bars showing std deviation
fig.add_trace(go.Bar(
    x=changes_df.index,
    y=changes_df['mean_change'],
    name='Mean Change',
    marker_color='blue',
    error_y=dict(
        type='data',
        array=changes_df['std_change'],
        visible=True,
        color='lightblue'
    )
))

# Gap filling method errors
fig.add_trace(go.Bar(
    x=results_df['gap_size_minutes'].unique(),
    y=results_df.groupby('gap_size_minutes')['error_ffill'].mean(),
    name='FFill Mean Error',
    marker_color='orange'
))
fig.add_trace(go.Bar(
    x=results_df['gap_size_minutes'].unique(),
    y=results_df.groupby('gap_size_minutes')['error_bfill'].mean(),
    name='BFill Mean Error',
    marker_color='green'
))
fig.add_trace(go.Bar(
    x=results_df['gap_size_minutes'].unique(),
    y=results_df.groupby('gap_size_minutes')['error_interp'].mean(),
    name='Interp Mean Error',
    marker_color='red'
))

# Add a line showing mean_change + std_change threshold
fig.add_trace(go.Scatter(
    x=changes_df.index,
    y=changes_df['mean_change'] + changes_df['std_change'],
    name='Mean + 1 Std',
    mode='lines+markers',
    line=dict(color='gray', dash='dash', width=2),
    marker=dict(size=8)
))

fig.update_layout(
    title='Gap Filling Method Mean Errors vs Mean Changes (with Std Dev)',
    xaxis_title='Gap Size (minutes)',
    yaxis_title='Value (mm)',
    barmode='group',
    hovermode='x unified'
)
fig.show()

In [None]:
# let's plot the errors for each interval for each gap filling method vs the mean change for that gap size
for gap_size_minutes in [10, 30, 60, 120, 180, 240, 300, 360]:
    subset = results_df[results_df['gap_size_minutes'] == gap_size_minutes]
    
    # Prepare data for mean errors and std deviations
    intervals = list(range(10, gap_size_minutes + 10, 10))
    ffill_means = [subset[f'error_ffill_{i}min'].mean() for i in intervals]
    ffill_stds = [subset[f'error_ffill_{i}min'].std() for i in intervals]
    bfill_means = [subset[f'error_bfill_{i}min'].mean() for i in intervals]
    bfill_stds = [subset[f'error_bfill_{i}min'].std() for i in intervals]
    interp_means = [subset[f'error_interp_{i}min'].mean() for i in intervals]
    interp_stds = [subset[f'error_interp_{i}min'].std() for i in intervals]
    
    # Create figure with error bars
    fig = go.Figure()
    
    # Add traces with error bars for each method
    fig.add_trace(go.Scatter(
        x=intervals, y=ffill_means,
        error_y=dict(type='data', array=ffill_stds, visible=True),
        mode='lines+markers',
        name='FFill Mean Error',
        line=dict(color='orange')
    ))
    fig.add_trace(go.Scatter(
        x=intervals, y=bfill_means,
        error_y=dict(type='data', array=bfill_stds, visible=True),
        mode='lines+markers',
        name='BFill Mean Error',
        line=dict(color='green')
    ))
    fig.add_trace(go.Scatter(
        x=intervals, y=interp_means,
        error_y=dict(type='data', array=interp_stds, visible=True),
        mode='lines+markers',
        name='Interp Mean Error',
        line=dict(color='red')
    ))
    
    # Add mean change line with standard deviation as shaded area
    mean_change = changes_df.loc[gap_size_minutes, 'mean_change']
    std_change = changes_df.loc[gap_size_minutes, 'std_change']
    fig.add_hline(y=mean_change, line_dash="dash", line_color="gray",
                  annotation_text="Mean Change", annotation_position="top left")
    fig.add_hrect(y0=mean_change - std_change, y1=mean_change + std_change, 
                  fillcolor="gray", opacity=0.2, line_width=0,
                  annotation_text="±1 Std", annotation_position="top right")
    
    fig.update_layout(
        title=f'Gap Filling Method Mean Errors for Gap Size {gap_size_minutes} mins (with Std Dev)',
        xaxis_title='Interval (minutes)', 
        yaxis_title='Mean Error (mm)',
        hovermode='x unified'
    )
    fig.show()