In [None]:
import pandas as pd
import struct
import datetime
from plotly.subplots import make_subplots
import numpy as np
import plotly.graph_objects as go
from dotenv import load_dotenv
import os
from scipy.signal import medfilt

In [None]:
QUERY = """
select 
    lower(hex(bytes)) as packets_hex,
    bytes as packet
from packets 
where (lower(hex(bytes)) like "aa6400a1%" or lower(hex(bytes)) like "aa5c00f0%") and lower(hex(uuid)) = "610800058d6d82b8614a1c8cb0f8dcc6"
"""

In [None]:
DATABASE_URL = os.getenv("DATABASE_URL").replace("sqlite://", "sqlite:///../")
df = pd.read_sql(QUERY, DATABASE_URL)

In [None]:
df["datetime"] = pd.to_datetime(df["packet"].apply(lambda data: struct.unpack('<I', data[11:15])[0]), unit="s")
df = df.sort_values("datetime", ascending=True)

df['date'] = df['datetime'].dt.date
df['time'] = df['datetime'].dt.time
df["bpm"] = df["packet"].apply(lambda data: data[21])

In [None]:
df["sleep_stage"] = df["packet"].apply(lambda data: struct.unpack('<I', data[31:35])[0])

In [None]:
def plot_heart_rate(days, column):
    for day_data in days:
        # Create a figure with secondary y-axes
        fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Add heart rate trace to primary y-axis
        fig.add_trace(
            go.Scatter(x=day_data['datetime'], y=day_data['bpm'], mode='lines', name='Heart Rate (BPM)', line=dict(color='blue')),
            secondary_y=False,
        )

        # Add x trace to secondary y-axis
        fig.add_trace(
            go.Scatter(x=day_data['datetime'], y=day_data[column], mode='lines', name='X', line=dict(color='orange')),
            secondary_y=True,
        )

        # Update layout for titles and axes
        fig.update_layout(
            title=f"Heart Rate and XYZ from {day_data['datetime'].iloc[0].date()} Noon to Next Day Noon",
            xaxis_title="Time",
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        )

        # Update x-axis for time formatting
        fig.update_xaxes(tickformat='%H:%M')

        # Update y-axes titles
        fig.update_yaxes(title_text="Heart Rate (BPM)", secondary_y=False)
        fig.update_yaxes(title_text="XYZ Values", secondary_y=True)

        # Show the plot
        fig.show()

In [None]:
def filter_noon_to_noon(df):
    # Group the data by date
    days = []
    unique_dates = df['date'].unique()
    
    for date in unique_dates:
        # Define noon of the current day and noon of the next day
        start_noon = pd.Timestamp(datetime.datetime.combine(date, datetime.time(12, 0)))
        end_noon = start_noon + pd.Timedelta(days=1)
        
        # Filter data between the start and end noon
        day_data = df[(df['datetime'] >= start_noon) & (df['datetime'] < end_noon)].copy()
        if not day_data.empty:
            days.append(day_data)

    return days

In [None]:
def remove_spikes_row(row, window_size=3, threshold=3):
    # Apply median filter
    filtered = medfilt(row, kernel_size=window_size)
    # Identify spikes
    deviation = np.abs(row - filtered)
    is_spike = deviation > threshold * np.std(row)
    # Replace spikes with filtered values
    smoothed = row.copy()
    smoothed[is_spike] = filtered[is_spike]
    return smoothed

In [None]:
df["sleep_stage"] = remove_spikes_row(df["sleep_stage"])

In [None]:
df.loc[df["sleep_stage"] < 500_000_000, "stage_category"] = 0
df.loc[(df["sleep_stage"] >= 500_000_000) & (df["sleep_stage"] < 900_000_000), "stage_category"] = 1
df.loc[(df["sleep_stage"] >= 1000_000_000) & (df["sleep_stage"] < 1500_000_000), "stage_category"] = 2
df.loc[df["sleep_stage"] > 1500_000_000, "stage_category"] = 3
df["stage_category"] = df["stage_category"].astype("int64")

In [None]:
days = filter_noon_to_noon(df)
plot_heart_rate(days, "stage_category")

In [None]:
SLEEP_DURATION = 600 * 2

def identify_sleep(df):
    change = df["stage_category"] == df["stage_category"].shift(1)
    stage_3 = df["stage_category"] == 2
    sleep_starts = df[~change & stage_3]["datetime"].tolist()

    change = df["stage_category"] == df["stage_category"].shift(-1)
    stage_3 = df["stage_category"] == 2
    sleep_ends = df[~change & stage_3]["datetime"].tolist()
    
    sleeps = pd.DataFrame(list(zip(sleep_starts, sleep_ends)), columns=["start", "end"])

    while True:
        sleeps["next_start"] = sleeps["start"].shift(-1)
        sleeps["duration"] = (sleeps["end"] - sleeps["start"]).apply(lambda x: x.total_seconds())
        sleeps["sleep_diff"] = (sleeps["next_start"] - sleeps["end"]).apply(lambda x: x.total_seconds())

        merge_mask = sleeps["sleep_diff"] < SLEEP_DURATION
        
        if not merge_mask.any():
            break
            
        for idx in merge_mask[merge_mask].index:
            if idx + 1 >= len(sleeps):
                continue

            sleeps.at[idx, "end"] = sleeps.at[idx + 1, "end"]
            sleeps = sleeps.drop(idx + 1)
        
        sleeps = sleeps.reset_index(drop=True)


    sleeps["duration"] = (sleeps["end"] - sleeps["start"]).apply(lambda x: x.total_seconds())
    sleeps = sleeps[sleeps["duration"] >= SLEEP_DURATION]


    sleeps = sleeps[["start", "end", "duration"]].reset_index(drop=True)
    sleeps["duration"] = (sleeps["end"] - sleeps["start"]).apply(lambda x: round(x.total_seconds() / 3600, 2))
    return sleeps

In [None]:
sleep_df = identify_sleep(df)

for row in sleep_df.iloc:
    print(f"Start: {row.start}, end: {row.end}, duration: {row.duration}h")