<a href="https://colab.research.google.com/github/wongledongle/liftempo/blob/main/liftempovisual1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [77]:
!pip install scipy



In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

pd.set_option("display.max_columns", 20)
pd.set_option("display.width", 120)

In [79]:
CSV_PATH = "/content/liftempo_sessions 2.csv"

df = pd.read_csv(CSV_PATH)
df.head()


Unnamed: 0,session_id,session_date,sample_index,timestamp,rotX,rotY,rotZ,accX,accY,accZ
0,AC3E7102-65EC-42C3-8782-F5C235F29E3A,2025-12-16T03:15:36.222Z,0,1765855000.0,0.130125,0.020638,-0.105609,-0.043743,0.102632,-0.034769
1,AC3E7102-65EC-42C3-8782-F5C235F29E3A,2025-12-16T03:15:36.222Z,1,1765855000.0,0.044633,-0.091622,0.002189,0.048786,-0.020863,-0.046658
2,AC3E7102-65EC-42C3-8782-F5C235F29E3A,2025-12-16T03:15:36.222Z,2,1765855000.0,-0.053888,-0.046635,0.062859,-0.019219,-0.035935,-0.031313
3,AC3E7102-65EC-42C3-8782-F5C235F29E3A,2025-12-16T03:15:36.222Z,3,1765855000.0,0.040757,-0.037635,0.02213,-0.017326,0.017241,-0.03066
4,AC3E7102-65EC-42C3-8782-F5C235F29E3A,2025-12-16T03:15:36.222Z,4,1765855000.0,0.062631,-0.080817,-0.011351,0.025298,0.016426,-0.027056


In [80]:
# Basic structure
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nInfo:")
display(df.info())

# How many distinct sessions?
session_ids = df["session_id"].unique()
print("\nNumber of sessions:", len(session_ids))
print("Session IDs:", session_ids)

# Samples per session
samples_per_session = df.groupby("session_id")["sample_index"].max() + 1
print("\nSamples per session:")
display(samples_per_session)


Shape: (465, 10)

Columns: ['session_id', 'session_date', 'sample_index', 'timestamp', 'rotX', 'rotY', 'rotZ', 'accX', 'accY', 'accZ']

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   session_id    465 non-null    object 
 1   session_date  465 non-null    object 
 2   sample_index  465 non-null    int64  
 3   timestamp     465 non-null    float64
 4   rotX          465 non-null    float64
 5   rotY          465 non-null    float64
 6   rotZ          465 non-null    float64
 7   accX          465 non-null    float64
 8   accY          465 non-null    float64
 9   accZ          465 non-null    float64
dtypes: float64(7), int64(1), object(2)
memory usage: 36.5+ KB


None


Number of sessions: 1
Session IDs: ['AC3E7102-65EC-42C3-8782-F5C235F29E3A']

Samples per session:


Unnamed: 0_level_0,sample_index
session_id,Unnamed: 1_level_1
AC3E7102-65EC-42C3-8782-F5C235F29E3A,465


In [81]:
def summarize_session_metrics(rep_metrics: pd.DataFrame) -> pd.Series:
    """
    Summarize per-rep metrics into a single row for the session.
    """
    if rep_metrics.empty:
        return pd.Series({
            "rep_count": 0,
            "avg_eccentric_duration": np.nan,
            "avg_concentric_duration": np.nan,
            "avg_total_duration": np.nan,
            "std_eccentric_duration": np.nan,
            "std_concentric_duration": np.nan,
            "std_total_duration": np.nan,
            "total_time_under_tension": 0.0,
        })

    ecc = rep_metrics["eccentric_duration"]
    con = rep_metrics["concentric_duration"]
    tot = rep_metrics["total_duration"]

    tut = float(ecc.sum() + con.sum())

    return pd.Series({
            "rep_count": len(rep_metrics),
            "avg_eccentric_duration": float(ecc.mean()),
            "avg_concentric_duration": float(con.mean()),
            "avg_total_duration": float(tot.mean()),
            "std_eccentric_duration": float(ecc.std(ddof=0)),
            "std_concentric_duration": float(con.std(ddof=0)),
            "std_total_duration": float(tot.std(ddof=0)),
            "total_time_under_tension": tut,
        })


In [82]:
def get_session_df(df, session_id):
    session = (
        df[df["session_id"] == session_id]
        .sort_values("sample_index")
        .reset_index(drop=True)
    )
    session["t_rel"] = session["timestamp"] - session["timestamp"].min()
    return session

def dominant_axis(session_df):
    # Compute variance of each axis to pick the dominant movement signal.
    vars = {
        "rotX": session_df["rotX"].var(),
        "rotY": session_df["rotY"].var(),
        "rotZ": session_df["rotZ"].var(),
    }
    axis = max(vars, key=vars.get)
    print("Dominant axis:", axis)
    return axis

def smooth_signal(signal, window=7):
    return pd.Series(signal).rolling(window, center=True).mean().to_numpy()

def detect_reps_v5(session_df, axis=None, smooth_window=13):
    if axis is None:
        axis = dominant_axis(session_df)

    raw = session_df[axis].to_numpy()
    t = session_df["t_rel"].to_numpy()

    # smoother signal
    smooth = smooth_signal(raw, window=smooth_window)

    # --- TUNED PEAK DETECTION ---
    peaks, _ = find_peaks(
        smooth,
        height=0.55,         # ↓ smaller threshold so rep 3's peak is included
        distance=50,         # ↓ allow rep 3 to be detected
        prominence=0.25      # ↓ lower prominence because rep 3 is softer
    )

    # --- TUNED VALLEY DETECTION ---
    valleys, _ = find_peaks(
        -smooth,
        height=0.35,         # ↓ valley depth threshold
        distance=50,
        prominence=0.25
    )

    # pair valleys → peaks
    reps = []
    p = 0
    for v in valleys:
        while p < len(peaks) and peaks[p] < v:
            p += 1
        if p >= len(peaks):
            break
        peak = peaks[p]

        # ensure timing makes sense
        if (t[peak] - t[v]) > 0.4:   # rep must last at least 0.4 sec
            reps.append({
                "valley_idx": int(v),
                "peak_idx": int(peak),
                "ecc_time": float(t[v] - t[v-1]) if v > 0 else None,
                "con_time": float(t[peak] - t[v])
            })

    print("Detected reps:", len(reps))
    return reps, smooth, peaks, valleys

def plot_reps_peak_valley(session_df, smooth, peaks, valleys):
    t = session_df["t_rel"].to_numpy()

    plt.figure(figsize=(12, 4))
    plt.plot(t, smooth, label="smoothed")

    plt.scatter(t[peaks], smooth[peaks], c='red', label='peaks')
    plt.scatter(t[valleys], smooth[valleys], c='blue', label='valleys')

    plt.title("Peak/Valley Rep Detection")
    plt.legend()
    plt.show()

def label_phases_for_session(session_df, reps, sample_rate_hz=50, pre_valley_ecc_time=0.25):
    """
    Label each sample in a session as:
      0 = neutral
      1 = eccentric
      2 = concentric

    session_df: DataFrame for a single session, sorted and with t_rel
    reps: list of dicts with 'valley_idx' and 'peak_idx'
    sample_rate_hz: sampling frequency used when recording (approx)
    pre_valley_ecc_time: for the first rep, treat this many seconds
                         before the first valley as eccentric.
    """

    n = len(session_df)
    labels = np.zeros(n, dtype=int)  # 0 neutral

    # how many samples to include before first valley as eccentric
    pre_valley_samples = int(pre_valley_ecc_time * sample_rate_hz)

    last_peak = None

    for i, rep in enumerate(reps):
        v = int(rep["valley_idx"])
        p = int(rep["peak_idx"])

        # -------- eccentric -------
        if last_peak is None:
            # first rep: start a little before the valley
            ecc_start = max(0, v - pre_valley_samples)
        else:
            # subsequent reps: eccentric from previous peak to current valley
            ecc_start = last_peak

        ecc_end = v
        labels[ecc_start:ecc_end + 1] = 1  # eccentric

        # -------- concentric -------
        conc_start = v
        conc_end = p
        labels[conc_start:conc_end + 1] = 2  # concentric

        last_peak = p

    # map to names for readability
    id_to_name = {0: "neutral", 1: "eccentric", 2: "concentric"}
    label_names = [id_to_name[x] for x in labels]

    labeled = session_df.copy()
    labeled["phase_id"] = labels
    labeled["phase_label"] = label_names

    return labeled

def compute_rep_metrics(session_df, reps, axis="rotZ",
                        sample_rate_hz=50,
                        pre_valley_ecc_time=0.25):
    """
    Compute per-rep metrics for a single session.

    session_df: DataFrame with at least columns:
        - t_rel (seconds)
        - axis column (e.g. rotZ)
    reps: list of dicts with 'valley_idx' and 'peak_idx'
    axis: which column to use for amplitude metrics
    sample_rate_hz: sampling frequency (approx.)
    pre_valley_ecc_time: how far before valley to approximate eccentric start for first rep
    """

    t = session_df["t_rel"].to_numpy()
    sig = session_df[axis].to_numpy()

    pre_valley_samples = int(pre_valley_ecc_time * sample_rate_hz)

    rows = []
    last_peak_idx = None

    for i, rep in enumerate(reps):
        v = int(rep["valley_idx"])
        p = int(rep["peak_idx"])

        # approximate eccentric start
        if last_peak_idx is None:
            # first rep: start a fixed window before valley
            start_idx = max(0, v - pre_valley_samples)
        else:
            # later reps: start at previous peak
            start_idx = last_peak_idx

        end_idx = p

        start_time = float(t[start_idx])
        bottom_time = float(t[v])
        end_time = float(t[end_idx])

        ecc_dur = bottom_time - start_time
        con_dur = end_time - bottom_time
        total_dur = end_time - start_time

        valley_value = float(sig[v])
        peak_value = float(sig[p])

        rows.append({
            "rep_index": i,
            "start_idx": start_idx,
            "valley_idx": v,
            "peak_idx": p,
            "end_idx": end_idx,
            "start_time": start_time,
            "bottom_time": bottom_time,
            "end_time": end_time,
            "eccentric_duration": ecc_dur,
            "concentric_duration": con_dur,
            "total_duration": total_dur,
            "valley_value": valley_value,
            "peak_value": peak_value
        })

        last_peak_idx = p

    return pd.DataFrame(rows)

def summarize_session_metrics(rep_metrics: pd.DataFrame) -> pd.Series:
    """
    Summarize per-rep metrics into a single row for the session.
    """
    if rep_metrics.empty:
        return pd.Series({
            "rep_count": 0,
            "avg_eccentric_duration": np.nan,
            "avg_concentric_duration": np.nan,
            "avg_total_duration": np.nan,
            "std_eccentric_duration": np.nan,
            "std_concentric_duration": np.nan,
            "std_total_duration": np.nan,
            "total_time_under_tension": 0.0,
        })

    ecc = rep_metrics["eccentric_duration"]
    con = rep_metrics["concentric_duration"]
    tot = rep_metrics["total_duration"]

    tut = float(ecc.sum() + con.sum())

    return pd.Series({
            "rep_count": len(rep_metrics),
            "avg_eccentric_duration": float(ecc.mean()),
            "avg_concentric_duration": float(con.mean()),
            "avg_total_duration": float(tot.mean()),
            "std_eccentric_duration": float(ecc.std(ddof=0)),
            "std_concentric_duration": float(con.std(ddof=0)),
            "std_total_duration": float(tot.std(ddof=0)),
            "total_time_under_tension": tut,
        })

def analyze_single_session(df, session_id, axis="rotZ", detection_fn=detect_reps_v5):
    session_df = get_session_df(df, session_id)
    reps, smooth, peaks, valleys = detection_fn(session_df, axis=axis)
    labeled_session = label_phases_for_session(session_df, reps)
    rep_metrics = compute_rep_metrics(session_df, reps, axis=axis)
    session_summary = summarize_session_metrics(rep_metrics)
    return reps, labeled_session, rep_metrics, session_summary, smooth, peaks, valleys, session_df

def compute_metrics_for_all_sessions(
    df,
    axis="rotZ",
    detection_fn=detect_reps_v5,
):
    """
    Loops over all session_ids in df and computes:
      - per-rep metrics (combined into a single DataFrame)
      - per-session summary metrics (one row per session)

    Returns:
      rep_metrics_all : DataFrame
      session_summaries : DataFrame
    """
    session_ids = df["session_id"].unique()

    all_rep_rows = []
    all_session_rows = []

    for sid in session_ids:
        # --- Extract session & sort ---
        session_df = get_session_df(df, sid)

        # --- Detect reps ---
        reps, smooth, peaks, valleys = detection_fn(session_df, axis=axis)

        # --- Compute rep-level metrics ---
        rep_metrics = compute_rep_metrics(session_df, reps, axis=axis)

        # Add session_id to rep metrics
        if not rep_metrics.empty:
            rep_metrics["session_id"] = sid
            all_rep_rows.append(rep_metrics)

        # --- Compute session-level summary ---
        summary = summarize_session_metrics(rep_metrics)
        summary["session_id"] = sid
        summary["rep_count_detected"] = len(reps)
        all_session_rows.append(summary)

    # Combine all sessions
    rep_metrics_all = (
        pd.concat(all_rep_rows, ignore_index=True) if all_rep_rows else pd.DataFrame()
    )

    session_summaries = pd.DataFrame(all_session_rows)

    return rep_metrics_all, session_summaries

In [83]:
rep_metrics_all, session_summaries = compute_metrics_for_all_sessions(df)

Detected reps: 3


In [84]:
rep_metrics_all.head()

Unnamed: 0,rep_index,start_idx,valley_idx,peak_idx,end_idx,start_time,bottom_time,end_time,eccentric_duration,concentric_duration,total_duration,valley_value,peak_value,session_id
0,0,43,55,79,79,0.838885,1.077055,1.554293,0.23817,0.477238,0.715408,-0.403796,0.850898,AC3E7102-65EC-42C3-8782-F5C235F29E3A
1,1,79,112,138,138,1.554293,2.210265,2.72739,0.655972,0.517125,1.173097,-1.450951,1.07172,AC3E7102-65EC-42C3-8782-F5C235F29E3A
2,2,138,243,267,267,2.72739,4.815308,5.29196,2.087918,0.476652,2.56457,-1.293643,1.391616,AC3E7102-65EC-42C3-8782-F5C235F29E3A


In [85]:
session_summaries

Unnamed: 0,rep_count,avg_eccentric_duration,avg_concentric_duration,avg_total_duration,std_eccentric_duration,std_concentric_duration,std_total_duration,total_time_under_tension,session_id,rep_count_detected
0,3.0,0.99402,0.490338,1.484358,0.792086,0.018943,0.786347,4.453075,AC3E7102-65EC-42C3-8782-F5C235F29E3A,3


In [86]:
rep_metrics_all.to_csv("liftempo_rep_metrics_all.csv", index=False)

In [87]:
session_summaries.to_csv("liftempo_session_summaries.csv", index=False)