In [8]:
import pandas as pd
import numpy as np

In [9]:
raw_df = pd.read_csv("./data/merged_dataset_with_concepts.csv")
window_df = pd.read_csv("./data/merged_window_labels.csv")

In [10]:
print(raw_df.shape)
print(window_df.shape)

(8802, 10)
(150, 8)


In [11]:
# these are what rule based concepts we're calculating
calculated_rule_concepts = ["motion_intensity", "vertical_dominance", "static_posture", "directional_variability", "burstiness"]

# which of these concepts to actually exclude from the export csvs (so what concepts we actually dont want used for training)
rules_to_exclude_from_saving = ["static_posture", "directional_variability", "burstiness"]

raw_df_cols_to_save = [col for col in (list(raw_df.columns) + calculated_rule_concepts)  if col not in rules_to_exclude_from_saving]
window_df_cols_to_save = [col for col in (list(window_df.columns) + calculated_rule_concepts) if col not in rules_to_exclude_from_saving]
print(raw_df_cols_to_save)
print(window_df_cols_to_save)

['user', 'activity', 'timestamp', 'x-axis', 'y-axis', 'z-axis', 'time_s', 'periodicity', 'temporal_stability', 'coordination', 'motion_intensity', 'vertical_dominance']
['window_idx', 'user', 'activity', 'start_time', 'end_time', 'periodicity', 'temporal_stability', 'coordination', 'motion_intensity', 'vertical_dominance']


In [12]:
raw_df["magnitude"] = np.sqrt(raw_df["x-axis"]**2 + raw_df["y-axis"]**2 + raw_df["z-axis"]**2)

raw_df["motion_intensity"] = (raw_df["magnitude"] - raw_df["magnitude"].min()) / (raw_df["magnitude"].max() - raw_df["magnitude"].min())

x_norm = np.linalg.norm(raw_df["x-axis"])
y_norm = np.linalg.norm(raw_df["y-axis"])
z_norm = np.linalg.norm(raw_df["z-axis"])

raw_df["vertical_dominance"] = abs(raw_df["z-axis"]) / (abs(raw_df["x-axis"]) + abs(raw_df["y-axis"]) + abs(raw_df["z-axis"]) + 1e-9)

def compute_directional_variability(df, start, end):
    """Calculate directional variability within a time window"""
    segment = df[(df["time_s"] >= start) & (df["time_s"] <= end)]
    if len(segment) < 3:
        return np.nan
    
    # Calculate direction vectors (normalized acceleration vectors)
    directions = []
    for i in range(len(segment)):
        x, y, z = segment.iloc[i]["x-axis"], segment.iloc[i]["y-axis"], segment.iloc[i]["z-axis"]
        magnitude = np.sqrt(x**2 + y**2 + z**2)
        if magnitude > 1e-6:  # Avoid division by zero
            directions.append([x/magnitude, y/magnitude, z/magnitude])
    
    if len(directions) < 2:
        return np.nan
    
    directions = np.array(directions)
    
    # Calculate variance of direction vectors
    # Directional variability = 1 - mean cosine similarity between consecutive direction vectors
    cos_similarities = []
    for i in range(len(directions) - 1):
        cos_sim = np.dot(directions[i], directions[i+1])
        cos_similarities.append(cos_sim)
    
    mean_cos_similarity = np.mean(cos_similarities)
    directional_variability = 1 - mean_cos_similarity  # Higher values = more directional change
    
    return directional_variability

# NEW: Burstiness - measures how impulsive/spiky the motion is
def compute_burstiness(df, start, end):
    """Calculate burstiness within a time window"""
    segment = df[(df["time_s"] >= start) & (df["time_s"] <= end)]
    if len(segment) < 10:  # Need sufficient data points
        return np.nan
    
    # Calculate magnitude time series
    mag = np.sqrt(segment["x-axis"]**2 + segment["y-axis"]**2 + segment["z-axis"]**2)
    
    # Method 1: Coefficient of variation of magnitude (spikiness measure)
    if mag.mean() > 1e-6:
        cv_burstiness = mag.std() / mag.mean()
    else:
        cv_burstiness = 0
    
    # Method 2: High-frequency energy ratio (impulsive vs continuous)
    # Calculate first differences (high-frequency component)
    mag_diff = np.diff(mag)
    
    # Energy in high-frequency component vs total energy
    high_freq_energy = np.sum(mag_diff**2)
    total_energy = np.sum(mag**2)
    
    if total_energy > 1e-6:
        freq_burstiness = high_freq_energy / total_energy
    else:
        freq_burstiness = 0
    
    # Combine both measures (normalized)
    combined_burstiness = (cv_burstiness + freq_burstiness) / 2
    
    return combined_burstiness

# FIXED: Static posture based on signal variance, not motion intensity
# This removes the circular dependency
def detect_static_posture(df, window_size=10):
    """
    Detect static posture based on signal variance, not motion intensity.
    This removes the circular dependency.
    """
    static_posture = np.zeros(len(df))
    
    for i in range(len(df)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(df), i + window_size // 2)
        
        # Get window of sensor data
        window_data = df.iloc[start_idx:end_idx][['x-axis', 'y-axis', 'z-axis']].values
        
        # Calculate signal variance (not magnitude-based)
        signal_variance = np.var(window_data, axis=0).mean()
        
        # Static if variance is very low (indicating minimal movement)
        static_posture[i] = 1.0 if signal_variance < 0.5 else 0.0
    
    return static_posture

# Apply static posture detection based on variance
raw_df["static_posture"] = detect_static_posture(raw_df)

# Compute new features for raw data
print("Computing directional variability and burstiness for raw data...")
# For raw data, we'll compute these features using a sliding window approach
def compute_raw_features_with_window(df, window_size=10):
    """Compute directional variability and burstiness for raw data using sliding window"""
    directional_variability = []
    burstiness = []
    
    for i in range(len(df)):
        start_time = df.iloc[i]["time_s"] - window_size/2
        end_time = df.iloc[i]["time_s"] + window_size/2
        
        dir_var = compute_directional_variability(df, start_time, end_time)
        burst = compute_burstiness(df, start_time, end_time)
        
        directional_variability.append(dir_var)
        burstiness.append(burst)
    
    return directional_variability, burstiness

# Compute the new features for raw data
raw_dir_var, raw_burst = compute_raw_features_with_window(raw_df)
raw_df["directional_variability"] = raw_dir_var
raw_df["burstiness"] = raw_burst

print("New features computed for raw data!")

# Define contextual relationships - which features should use static posture contextually
def get_contextual_features_config():
    """
    Define which features should use static posture contextually.
    This configuration determines how static posture influences other motion concepts.
    """
    contextual_config = {
        'motion_intensity': True,    # Uses static posture context - helps distinguish low motion vs transitions
        'vertical_dominance': True,  # Uses static posture context - static periods affect vertical dominance interpretation
        'periodicity': False,       # Independent - periodic patterns don't depend on static posture
        'temporal_stability': False, # Independent - temporal stability is about consistency, not static periods
        'coordination': False,      # Independent - coordination is about limb synchronization, not static posture
        'directional_variability': False, # Independent - directional changes are about movement patterns, not static posture
        'burstiness': False         # Independent - burstiness is about motion characteristics, not static posture
    }
    return contextual_config

# Get the contextual configuration
contextual_config = get_contextual_features_config()
print("Contextual Features Configuration:")
for feature, uses_context in contextual_config.items():
    print(f"  {feature}: {'Uses static posture context' if uses_context else 'Independent'}")

def compute_window_features(df, start, end):
    segment = df[(df["time_s"] >= start) & (df["time_s"] <= end)]
    if len(segment) == 0:
        return pd.Series({
            "motion_intensity": np.nan, 
            "vertical_dominance": np.nan, 
            "static_posture": np.nan, 
            "magnitude": np.nan,
            "directional_variability": np.nan,
            "burstiness": np.nan
        })
    
    # Calculate motion intensity (independent of static posture)
    mag = np.sqrt(segment["x-axis"]**2 + segment["y-axis"]**2 + segment["z-axis"]**2)
    motion_intensity = (mag.mean() - raw_df["magnitude"].min()) / (raw_df["magnitude"].max() - raw_df["magnitude"].min())
    
    # Calculate vertical dominance (independent of static posture)
    vert_dom = np.linalg.norm(segment["z-axis"]) / (
        np.linalg.norm(segment["x-axis"]) + np.linalg.norm(segment["y-axis"]) + np.linalg.norm(segment["z-axis"]) + 1e-9
    )
    
    # FIXED: Static posture based on signal variance, not motion intensity
    signal_variance = np.var(segment[['x-axis', 'y-axis', 'z-axis']].values, axis=0).mean()
    static_posture = 1.0 if signal_variance < 0.5 else 0.0
    
    # NEW: Calculate directional variability and burstiness
    directional_variability = compute_directional_variability(df, start, end)
    burstiness = compute_burstiness(df, start, end)
    
    magnitude_mean = mag.mean()
    return pd.Series({
        "motion_intensity": motion_intensity,
        "vertical_dominance": vert_dom,
        "static_posture": static_posture,
        "magnitude": magnitude_mean,
        "directional_variability": directional_variability,
        "burstiness": burstiness
    })

window_features = window_df.apply(lambda row: compute_window_features(raw_df, row["start_time"], row["end_time"]), axis=1)
window_df = pd.concat([window_df, window_features], axis=1)

raw_df.drop(columns=["magnitude"], inplace=True)
window_df.drop(columns=["magnitude"], inplace=True)

raw_df.to_csv("./data/final_dataset.csv", columns=raw_df_cols_to_save, index=False)
window_df.to_csv("./data/final_window_labels.csv", columns=window_df_cols_to_save, index=False)

Computing directional variability and burstiness for raw data...
New features computed for raw data!
Contextual Features Configuration:
  motion_intensity: Uses static posture context
  vertical_dominance: Uses static posture context
  periodicity: Independent
  temporal_stability: Independent
  coordination: Independent
  directional_variability: Independent
  burstiness: Independent


In [13]:
print("Available features:", calculated_rule_concepts)
print("\nRaw data statistics:")
print(raw_df[calculated_rule_concepts].describe())

print("\nWindow data statistics:")
print(window_df[calculated_rule_concepts].describe())

print(f"\nData shapes:")
print(f"Raw data: {raw_df.shape}")
print(f"Window data: {window_df.shape}")

Available features: ['motion_intensity', 'vertical_dominance', 'static_posture', 'directional_variability', 'burstiness']

Raw data statistics:
       motion_intensity  vertical_dominance  static_posture  \
count       8802.000000         8802.000000     8802.000000   
mean           0.331265            0.243082        0.317201   
std            0.146701            0.213129        0.465413   
min            0.000000            0.000000        0.000000   
25%            0.270740            0.084025        0.000000   
50%            0.295913            0.177345        0.000000   
75%            0.387405            0.358235        1.000000   
max            1.000000            0.916763        1.000000   

       directional_variability   burstiness  
count              8802.000000  8802.000000  
mean                  0.162346     0.267708  
std                   0.133657     0.093572  
min                   0.008382     0.015741  
25%                   0.082459     0.218222  
50%         

In [14]:
print("Raw data head:")
print(raw_df[calculated_rule_concepts].head())
print("\nWindow data head:")
print(window_df[calculated_rule_concepts].head())

Raw data head:
   motion_intensity  vertical_dominance  static_posture  \
0          0.063145            0.420366             0.0   
1          0.075010            0.526316             0.0   
2          0.245287            0.407080             0.0   
3          0.793429            0.390757             0.0   
4          0.342695            0.199557             0.0   

   directional_variability  burstiness  
0                 0.164655    0.454181  
1                 0.163803    0.441930  
2                 0.161684    0.436336  
3                 0.161471    0.431552  
4                 0.159401    0.431536  

Window data head:
   motion_intensity  vertical_dominance  static_posture  \
0          0.316815            0.221105             0.0   
1          0.302850            0.291116             0.0   
2          0.303036            0.181147             0.0   
3          0.313779            0.305797             0.0   
4          0.408648            0.262989             0.0   

   directi