In [22]:
import pandas as pd
import numpy as np

In [23]:
raw_df = pd.read_csv("./data/merged_dataset_with_concepts.csv")
window_df = pd.read_csv("./data/merged_window_labels.csv")

In [24]:
print(raw_df.shape)
print(window_df.shape)

(8802, 10)
(150, 8)


In [25]:
# these are what rule based concepts we're calculating
rule_based_concepts = ["movement_variability", "movement_consistency"]

# which of these concepts to actually exclude from the export csvs (so what concepts we actually dont want used for training)
rules_to_exclude_from_saving = ["static_posture"]

raw_df_cols_to_save = [col for col in (list(raw_df.columns) + rule_based_concepts)  if col not in rules_to_exclude_from_saving]
window_df_cols_to_save = [col for col in (list(window_df.columns) + rule_based_concepts) if col not in rules_to_exclude_from_saving]
print("Raw data columns to save:", raw_df_cols_to_save)
print("Window data columns to save:", window_df_cols_to_save)

Raw data columns to save: ['user', 'activity', 'timestamp', 'x-axis', 'y-axis', 'z-axis', 'time_s', 'periodicity', 'temporal_stability', 'coordination', 'movement_variability', 'movement_consistency']
Window data columns to save: ['window_idx', 'user', 'activity', 'start_time', 'end_time', 'periodicity', 'temporal_stability', 'coordination', 'movement_variability', 'movement_consistency']


In [26]:
# Calculate concepts for both raw data and window data

def calculate_raw_data_movement_variability(df, window_size=50):
    """Calculate movement variability for each row using a sliding window"""
    variability_scores = []
    
    for i in range(len(df)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(df), i + window_size // 2)
        
        window_data = df.iloc[start_idx:end_idx]
        
        if len(window_data) < 5:
            variability_scores.append(0.0)
            continue
        
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        variability = magnitude.std()
        variability_scores.append(variability)
    
    # Normalize to [0, 1]
    variability_scores = np.array(variability_scores)
    if variability_scores.max() > variability_scores.min():
        normalized = (variability_scores - variability_scores.min()) / (variability_scores.max() - variability_scores.min())
    else:
        normalized = np.zeros_like(variability_scores)
    
    return normalized

def calculate_raw_data_movement_consistency(df, window_size=50):
    """Calculate movement consistency for each row using a sliding window"""
    consistency_scores = []
    
    for i in range(len(df)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(df), i + window_size // 2)
        
        window_data = df.iloc[start_idx:end_idx]
        
        if len(window_data) < 10:
            consistency_scores.append(0.0)
            continue
        
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        
        if len(magnitude) >= 2:
            correlation = np.corrcoef(magnitude[:-1], magnitude[1:])[0, 1]
            if np.isnan(correlation):
                consistency_scores.append(0.0)
            else:
                consistency_scores.append(correlation)
        else:
            consistency_scores.append(0.0)
    
    # Normalize to [0, 1]
    consistency_scores = np.array(consistency_scores)
    if consistency_scores.max() > consistency_scores.min():
        normalized = (consistency_scores - consistency_scores.min()) / (consistency_scores.max() - consistency_scores.min())
    else:
        normalized = np.zeros_like(consistency_scores)
    
    return normalized

def calculate_movement_variability(df_sensor, df_windows):
    """Calculate movement variability as normalized standard deviation of magnitude for each window"""
    variability_scores = []
    
    # First pass: collect all variability values to calculate global min/max for normalization
    temp_variability = []
    
    for _, window_row in df_windows.iterrows():
        user = window_row['user']
        activity = window_row['activity']
        start_time = window_row['start_time']
        end_time = window_row['end_time']
        
        user_activity_data = df_sensor[(df_sensor['user'] == user) & 
                                      (df_sensor['activity'] == activity)].copy()
        
        if len(user_activity_data) == 0:
            continue
            
        mask = ((user_activity_data['time_s'] >= start_time) & 
                (user_activity_data['time_s'] <= end_time))
        window_data = user_activity_data[mask]
        
        if len(window_data) < 5:  
            continue
            
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        variability = magnitude.std()
        temp_variability.append(variability)
    
    # Calculate normalization parameters
    if len(temp_variability) > 0:
        min_var = min(temp_variability)
        max_var = max(temp_variability)
        var_range = max_var - min_var
    else:
        min_var = 0
        var_range = 1
    
    # Second pass: calculate normalized variability
    for _, window_row in df_windows.iterrows():
        user = window_row['user']
        activity = window_row['activity']
        start_time = window_row['start_time']
        end_time = window_row['end_time']
        
        user_activity_data = df_sensor[(df_sensor['user'] == user) & 
                                      (df_sensor['activity'] == activity)].copy()
        
        if len(user_activity_data) == 0:
            variability_scores.append(0.0)
            continue
            
        mask = ((user_activity_data['time_s'] >= start_time) & 
                (user_activity_data['time_s'] <= end_time))
        window_data = user_activity_data[mask]
        
        if len(window_data) < 5:  
            variability_scores.append(0.0)
            continue
            
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        variability = magnitude.std()
        
        # Normalize to [0, 1] range
        if var_range > 0:
            normalized_variability = (variability - min_var) / var_range
        else:
            normalized_variability = 0.0
            
        variability_scores.append(normalized_variability)
    
    return np.array(variability_scores)

def calculate_movement_consistency(df_sensor, df_windows):
    """Calculate movement consistency as normalized temporal correlation for each window"""
    consistency_scores = []
    
    # First pass: collect all correlation values to calculate global min/max for normalization
    temp_consistency = []
    
    for _, window_row in df_windows.iterrows():
        user = window_row['user']
        activity = window_row['activity']
        start_time = window_row['start_time']
        end_time = window_row['end_time']
        
        user_activity_data = df_sensor[(df_sensor['user'] == user) & 
                                      (df_sensor['activity'] == activity)].copy()
        
        if len(user_activity_data) == 0:
            continue
            
        mask = ((user_activity_data['time_s'] >= start_time) & 
                (user_activity_data['time_s'] <= end_time))
        window_data = user_activity_data[mask]
        
        if len(window_data) < 10:  
            continue
            
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        
        if len(magnitude) >= 2:
            correlation = np.corrcoef(magnitude[:-1], magnitude[1:])[0, 1]
            if not np.isnan(correlation):
                temp_consistency.append(correlation)
    
    # Calculate normalization parameters
    if len(temp_consistency) > 0:
        min_consistency = min(temp_consistency)
        max_consistency = max(temp_consistency)
        consistency_range = max_consistency - min_consistency
    else:
        min_consistency = 0
        consistency_range = 1
    
    # Second pass: calculate normalized consistency
    for _, window_row in df_windows.iterrows():
        user = window_row['user']
        activity = window_row['activity']
        start_time = window_row['start_time']
        end_time = window_row['end_time']
        
        user_activity_data = df_sensor[(df_sensor['user'] == user) & 
                                      (df_sensor['activity'] == activity)].copy()
        
        if len(user_activity_data) == 0:
            consistency_scores.append(0.0)
            continue
            
        mask = ((user_activity_data['time_s'] >= start_time) & 
                (user_activity_data['time_s'] <= end_time))
        window_data = user_activity_data[mask]
        
        if len(window_data) < 10:  
            consistency_scores.append(0.0)
            continue
            
        magnitude = np.sqrt(window_data['x-axis']**2 + window_data['y-axis']**2 + window_data['z-axis']**2)
        
        if len(magnitude) >= 2:
            correlation = np.corrcoef(magnitude[:-1], magnitude[1:])[0, 1]
            if np.isnan(correlation):
                consistency_scores.append(0.0)
            else:
                # Normalize to [0, 1] range
                if consistency_range > 0:
                    normalized_consistency = (correlation - min_consistency) / consistency_range
                else:
                    normalized_consistency = 0.0
                consistency_scores.append(normalized_consistency)
        else:
            consistency_scores.append(0.0)
    
    return np.array(consistency_scores)

def detect_static_posture(df, window_size=10):
    """
    Detect static posture based on signal variance, not motion intensity.
    This removes the circular dependency.
    """
    static_posture = np.zeros(len(df))
    
    for i in range(len(df)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(df), i + window_size // 2)
        
        # Get window of sensor data
        window_data = df.iloc[start_idx:end_idx][['x-axis', 'y-axis', 'z-axis']].values
        
        # Calculate signal variance (not magnitude-based)
        signal_variance = np.var(window_data, axis=0).mean()
        
        # Static if variance is very low (indicating minimal movement)
        static_posture[i] = 1.0 if signal_variance < 0.5 else 0.0
    
    return static_posture

# Apply static posture detection based on variance
raw_df["static_posture"] = detect_static_posture(raw_df)

# Calculate new rule-based concepts for RAW data (using sliding windows)
print("Calculating movement variability for raw data...")
raw_df['movement_variability'] = calculate_raw_data_movement_variability(raw_df)

print("Calculating movement consistency for raw data...")
raw_df['movement_consistency'] = calculate_raw_data_movement_consistency(raw_df)

print("New rule-based concepts computed for raw data!")

# Calculate new rule-based concepts for window data (using fixed windows)
print("\nCalculating movement variability for window data...")
window_df['movement_variability'] = calculate_movement_variability(raw_df, window_df)

print("Calculating movement consistency for window data...")
window_df['movement_consistency'] = calculate_movement_consistency(raw_df, window_df)

print("New rule-based concepts computed for window data!")

# Verify normalization ranges
print(f"\n=== NORMALIZATION VERIFICATION ===")
print(f"Raw data - Movement variability range: {raw_df['movement_variability'].min():.4f} to {raw_df['movement_variability'].max():.4f}")
print(f"Raw data - Movement consistency range: {raw_df['movement_consistency'].min():.4f} to {raw_df['movement_consistency'].max():.4f}")
print(f"Raw data - Static posture range: {raw_df['static_posture'].min():.4f} to {raw_df['static_posture'].max():.4f}")
print(f"\nWindow data - Movement variability range: {window_df['movement_variability'].min():.4f} to {window_df['movement_variability'].max():.4f}")
print(f"Window data - Movement consistency range: {window_df['movement_consistency'].min():.4f} to {window_df['movement_consistency'].max():.4f}")

# Define contextual relationships - which features should use static posture contextually
def get_contextual_features_config():
    """
    Define which features should use static posture contextually.
    This configuration determines how static posture influences other motion concepts.
    """
    contextual_config = {
        'periodicity': False,       # Independent - periodic patterns don't depend on static posture
        'temporal_stability': False, # Independent - temporal stability is about consistency, not static periods
        'coordination': False,      # Independent - coordination is about limb synchronization, not static posture
        'movement_variability': False, # Independent - movement variability is about motion patterns, not static posture
        'movement_consistency': False, # Independent - movement consistency is about temporal patterns, not static posture
    }
    return contextual_config

# Get the contextual configuration
contextual_config = get_contextual_features_config()
print("Contextual Features Configuration:")
for feature, uses_context in contextual_config.items():
    print(f"  {feature}: {'Uses static posture context' if uses_context else 'Independent'}")

def compute_window_features(df, start, end):
    segment = df[(df["time_s"] >= start) & (df["time_s"] <= end)]
    if len(segment) == 0:
        return pd.Series({
            "static_posture": np.nan
        })
    
    # FIXED: Static posture based on signal variance, not motion intensity
    signal_variance = np.var(segment[['x-axis', 'y-axis', 'z-axis']].values, axis=0).mean()
    static_posture = 1.0 if signal_variance < 0.5 else 0.0
    
    return pd.Series({
        "static_posture": static_posture
    })

window_features = window_df.apply(lambda row: compute_window_features(raw_df, row["start_time"], row["end_time"]), axis=1)
window_df = pd.concat([window_df, window_features], axis=1)

# make sure they are discrete

raw_df['movement_variability'] = pd.cut(
    raw_df['movement_variability'], 
    bins=3, 
    labels=[0.0, 0.5, 1.0]
).astype(float)

raw_df['movement_consistency'] = pd.cut(
    raw_df['movement_consistency'], 
    bins=3, 
    labels=[0.0, 0.5, 1.0]
).astype(float)

window_df['movement_variability'] = pd.cut(
    window_df['movement_variability'], 
    bins=3, 
    labels=[0.0, 0.5, 1.0]
).astype(float)

window_df['movement_consistency'] = pd.cut(
    window_df['movement_consistency'], 
    bins=3, 
    labels=[0.0, 0.5, 1.0]
).astype(float)

raw_df.to_csv("./data/final_dataset.csv", columns=raw_df_cols_to_save, index=False)
window_df.to_csv("./data/final_window_labels.csv", columns=window_df_cols_to_save, index=False)

Calculating movement variability for raw data...
Calculating movement consistency for raw data...
New rule-based concepts computed for raw data!

Calculating movement variability for window data...
Calculating movement consistency for window data...
New rule-based concepts computed for window data!

=== NORMALIZATION VERIFICATION ===
Raw data - Movement variability range: 0.0000 to 1.0000
Raw data - Movement consistency range: 0.0000 to 1.0000
Raw data - Static posture range: 0.0000 to 1.0000

Window data - Movement variability range: 0.0000 to 1.0000
Window data - Movement consistency range: 0.0000 to 1.0000
Contextual Features Configuration:
  periodicity: Independent
  temporal_stability: Independent
  coordination: Independent
  movement_variability: Independent
  movement_consistency: Independent
