In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

def find_footsteps_improved(df_data, window_size=3, min_distance=10, 
                           jump_threshold_percentile=75, debug=True):
    """
    Improved footstep detection that combines multiple approaches
    """
    y_values = df_data['right_heel_y'].values
    frames = df_data['frame_number'].values
    timestamps = df_data['timestamp'].values
    n = len(y_values)
    
    if debug:
        print(f"Processing {n} frames")
    
    # Phase 1: Find all potential minima with enhanced metrics
    potential_minima = []
    
    for i in range(window_size, n - window_size):
        current_val = y_values[i]
        
        # Check if it's a local minimum
        is_minimum = True
        for j in range(i - window_size, i + window_size + 1):
            if j != i and y_values[j] <= current_val:
                is_minimum = False
                break
        
        if is_minimum:
            # Enhanced change analysis
            look_back = min(20, i)  # Increased from 15
            look_forward = min(20, n - i - 1)
            
            if look_back > 0 and look_forward > 0:
                before_y = np.mean(y_values[i-look_back:i])
                after_y = np.mean(y_values[i+1:i+look_forward+1])
                
                # Calculate multiple metrics
                drop_magnitude = before_y - current_val
                rise_magnitude = after_y - current_val
                total_change = drop_magnitude + rise_magnitude
                
                # Additional metrics for better classification
                variance_before = np.var(y_values[i-look_back:i])
                variance_after = np.var(y_values[i+1:i+look_forward+1])
                
                potential_minima.append({
                    'index': i,
                    'frame_number': frames[i],
                    'timestamp': timestamps[i],
                    'y_value': current_val,
                    'drop_magnitude': drop_magnitude,
                    'rise_magnitude': rise_magnitude,
                    'total_change': total_change,
                    'variance_before': variance_before,
                    'variance_after': variance_after
                })
    
    if not potential_minima:
        return []
    
    # Phase 2: Classify swing vs stance phases
    total_changes = [m['total_change'] for m in potential_minima]
    change_threshold = np.percentile(total_changes, jump_threshold_percentile)
    
    swing_phases = [m for m in potential_minima if m['total_change'] > change_threshold]
    stance_candidates = [m for m in potential_minima if m['total_change'] <= change_threshold]
    
    if debug:
        print(f"Found {len(swing_phases)} swing phases, {len(stance_candidates)} stance candidates")
        print(f"Change threshold: {change_threshold:.4f}")
    
    # Phase 3: Advanced gait cycle detection
    detected_steps = []
    last_detected_frame = -1
    
    # Sort all potential minima by frame number
    all_minima_sorted = sorted(potential_minima, key=lambda x: x['frame_number'])
    
    for candidate in stance_candidates:
        current_frame = candidate['frame_number']
        current_y = candidate['y_value']
        
        if debug:
            print(f"\nProcessing candidate at frame {current_frame}")
        
        # Find preceding swing phases within reasonable distance
        preceding_swings = [
            s for s in swing_phases 
            if s['frame_number'] < current_frame 
            and current_frame - s['frame_number'] < 80  # Increased window
            and s['y_value'] < current_y - 0.03  # Reduced threshold for more sensitivity
        ]
        
        # Find following swing phases to validate this is indeed a step
        following_swings = [
            s for s in swing_phases 
            if s['frame_number'] > current_frame 
            and s['frame_number'] - current_frame < 80
            and s['y_value'] < current_y - 0.03
        ]
        
        # Enhanced validation
        has_preceding_swing = len(preceding_swings) > 0
        has_following_swing = len(following_swings) > 0
        distance_ok = current_frame - last_detected_frame >= min_distance
        
        # For early frames, be more lenient about preceding swings
        is_early_frame = current_frame < 100
        
        if debug:
            print(f"  Preceding swings: {len(preceding_swings)}")
            print(f"  Following swings: {len(following_swings)}")
            print(f"  Distance OK: {distance_ok}")
            print(f"  Is early frame: {is_early_frame}")
        
        # Decision logic
        should_detect = False
        
        if is_early_frame:
            # For early frames, just need following swing or be a clear minimum
            should_detect = (has_following_swing or 
                           candidate['rise_magnitude'] > 0.01) and distance_ok
        else:
            # For later frames, need both preceding and following evidence
            should_detect = (has_preceding_swing and 
                           (has_following_swing or candidate['rise_magnitude'] > 0.01)) and distance_ok
        
        if should_detect:
            detected_steps.append({
                'index': candidate['index'],
                'frame_number': candidate['frame_number'],
                'timestamp': candidate['timestamp'],
                'y_value': candidate['y_value']
            })
            last_detected_frame = current_frame
            if debug:
                print(f"  ✅ DETECTED step at frame {current_frame}")
        else:
            if debug:
                print(f"  ❌ REJECTED step at frame {current_frame}")
    
    return detected_steps

def analyze_detection_accuracy(detected_steps, ground_truth_steps, tolerance_frames=5):
    """
    Analyze detection accuracy with frame tolerance
    """
    gt_frames = [step['frame'] for step in ground_truth_steps]
    detected_frames = [step['frame_number'] for step in detected_steps]
    
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    matched_gt = set()
    matched_detected = set()
    
    # Find true positives
    for i, det_frame in enumerate(detected_frames):
        for j, gt_frame in enumerate(gt_frames):
            if abs(det_frame - gt_frame) <= tolerance_frames:
                if j not in matched_gt:
                    true_positives += 1
                    matched_gt.add(j)
                    matched_detected.add(i)
                    break
    
    false_positives = len(detected_frames) - len(matched_detected)
    false_negatives = len(gt_frames) - len(matched_gt)
    
    precision = true_positives / len(detected_frames) if detected_frames else 0
    recall = true_positives / len(gt_frames) if gt_frames else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'true_positives': true_positives,
        'false_positives': false_positives,
        'false_negatives': false_negatives,
        'precision': precision,
        'recall': recall,
        'f1_score': f1_score
    }

# Example usage (you would load your actual data):

# Load your data
df = pd.read_csv('/Users/yejinbang/Documents/GitHub/sfx-project/data/walk4_frame_data_v1.csv')
with open('/Users/yejinbang/Documents/GitHub/sfx-project/data/test_videos/walk5_ground_truth.json', 'r') as f:
    gt_data = json.load(f)

df_filtered = df[df['right_heel_usable'] == True].copy()
gt_right_steps = [step for step in gt_data['annotations'] if step['foot'] == 'right']

# Run improved detection
detected_steps = find_footsteps_improved(df_filtered, debug=True)

# Analyze accuracy
accuracy_metrics = analyze_detection_accuracy(detected_steps, gt_right_steps)
print(f"Precision: {accuracy_metrics['precision']:.3f}")
print(f"Recall: {accuracy_metrics['recall']:.3f}")  
print(f"F1-Score: {accuracy_metrics['f1_score']:.3f}")


Processing 322 frames
Found 3 swing phases, 8 stance candidates
Change threshold: 0.1192

Processing candidate at frame 4
  Preceding swings: 0
  Following swings: 0
  Distance OK: False
  Is early frame: True
  ❌ REJECTED step at frame 4

Processing candidate at frame 39
  Preceding swings: 0
  Following swings: 1
  Distance OK: True
  Is early frame: True
  ✅ DETECTED step at frame 39

Processing candidate at frame 46
  Preceding swings: 0
  Following swings: 1
  Distance OK: False
  Is early frame: True
  ❌ REJECTED step at frame 46

Processing candidate at frame 122
  Preceding swings: 1
  Following swings: 1
  Distance OK: True
  Is early frame: False
  ✅ DETECTED step at frame 122

Processing candidate at frame 186
  Preceding swings: 1
  Following swings: 1
  Distance OK: True
  Is early frame: False
  ✅ DETECTED step at frame 186

Processing candidate at frame 200
  Preceding swings: 1
  Following swings: 1
  Distance OK: True
  Is early frame: False
  ✅ DETECTED step at frame 