# IMPROVEMENT SUGGESTIONS FOR MOTION INTENSITY R¬≤

print("=== MOTION INTENSITY ANALYSIS ===")
print("Current R¬≤: 0.2916 (29.16% variance explained)")
print("\n=== IDENTIFIED PROBLEMS ===")
print("1. DATA ISSUES:")
print("   - Very narrow range: 0.277 to 0.471 (only 19.4% range)")
print("   - Low variance: Std = 0.041 (12.4% coefficient of variation)")
print("   - Small dataset: Only 150 windows")
print("   - Limited variability makes learning difficult")

print("\n2. MODEL ISSUES:")
print("   - Shared feature extraction with classification tasks")
print("   - Simple single-layer output for regression")
print("   - No specialized regression architecture")

print("\n=== IMPROVEMENT SUGGESTIONS ===")

print("\nüéØ 1. DATA IMPROVEMENTS:")
print("   - Collect more diverse data (different activities, intensities)")
print("   - Increase data range (more extreme intensity values)")
print("   - Use data augmentation specifically for motion intensity")
print("   - Consider longer time windows for better intensity estimation")

print("\nüèóÔ∏è 2. MODEL ARCHITECTURE IMPROVEMENTS:")
print("   - Separate regression branch for continuous concepts")
print("   - Add more layers for motion intensity prediction")
print("   - Use different activation functions (ReLU, sigmoid)")
print("   - Add regularization (dropout, L1/L2)")

print("\n‚öñÔ∏è 3. TRAINING IMPROVEMENTS:")
print("   - Increase loss weight for motion intensity (currently 5x)")
print("   - Use different optimizers (RMSprop, SGD)")
print("   - Implement learning rate scheduling")
print("   - Add early stopping based on motion intensity validation loss")

print("\nüìä 4. FEATURE ENGINEERING:")
print("   - Extract motion-specific features (acceleration magnitude, velocity)")
print("   - Add frequency domain features (FFT, power spectral density)")
print("   - Include statistical features (variance, skewness, kurtosis)")
print("   - Add temporal features (trends, patterns)")

print("\nüîß 5. ALTERNATIVE APPROACHES:")
print("   - Train separate model for motion intensity only")
print("   - Use ensemble methods (multiple models)")
print("   - Try different architectures (LSTM, Transformer)")
print("   - Implement multi-scale feature extraction")


In [None]:
# IMPROVED MODEL ARCHITECTURE FOR MOTION INTENSITY

def build_improved_motion_intensity_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder):
    """
    Improved model with specialized regression branch for motion intensity
    """
    # Input layer
    sensor_input = tf.keras.layers.Input(shape=input_shape, name='sensor_input')
    
    # Use pre-trained encoder as feature extractor
    pretrained_features = pretrained_encoder.tf_encoder(sensor_input)
    
    # Shared feature processing
    x = tf.keras.layers.Dense(64, activation='relu', name='shared_dense1')(pretrained_features)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout1')(x)
    x = tf.keras.layers.Dense(32, activation='relu', name='shared_dense2')(x)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout2')(x)
    
    # Classification outputs (discrete concepts)
    periodicity = tf.keras.layers.Dense(n_classes_p, activation='softmax', name='periodicity')(x)
    temporal_stability = tf.keras.layers.Dense(n_classes_t, activation='softmax', name='temporal_stability')(x)
    coordination = tf.keras.layers.Dense(n_classes_c, activation='softmax', name='coordination')(x)
    
    # IMPROVED: Separate regression branch for motion intensity
    mi_branch = tf.keras.layers.Dense(16, activation='relu', name='mi_dense1')(x)
    mi_branch = tf.keras.layers.Dropout(0.2, name='mi_dropout1')(mi_branch)
    mi_branch = tf.keras.layers.Dense(8, activation='relu', name='mi_dense2')(mi_branch)
    mi_branch = tf.keras.layers.Dropout(0.2, name='mi_dropout2')(mi_branch)
    motion_intensity = tf.keras.layers.Dense(1, activation='sigmoid', name='motion_intensity')(mi_branch)
    
    # IMPROVED: Separate regression branch for vertical dominance
    vd_branch = tf.keras.layers.Dense(16, activation='relu', name='vd_dense1')(x)
    vd_branch = tf.keras.layers.Dropout(0.2, name='vd_dropout1')(vd_branch)
    vd_branch = tf.keras.layers.Dense(8, activation='relu', name='vd_dense2')(vd_branch)
    vd_branch = tf.keras.layers.Dropout(0.2, name='vd_dropout2')(vd_branch)
    vertical_dominance = tf.keras.layers.Dense(1, activation='sigmoid', name='vertical_dominance')(vd_branch)
    
    model = tf.keras.models.Model(
        inputs=sensor_input, 
        outputs=[periodicity, temporal_stability, coordination, motion_intensity, vertical_dominance]
    )
    
    return model

print("‚úÖ Improved motion intensity model architecture defined!")
print("Key improvements:")
print("- Separate regression branches for continuous concepts")
print("- More layers for motion intensity prediction")
print("- Sigmoid activation to constrain outputs to [0,1]")
print("- Additional dropout for regularization")
print("- Specialized feature processing for regression tasks")


In [None]:
# IMPROVED TRAINING SETUP FOR MOTION INTENSITY

def create_improved_training_setup():
    """
    Improved training configuration for better motion intensity prediction
    """
    print("=== IMPROVED TRAINING SETUP ===")
    
    # 1. IMPROVED LOSS WEIGHTS
    loss_weights = {
        'periodicity': 1.0,
        'temporal_stability': 1.0,
        'coordination': 1.0,
        'motion_intensity': 10.0,      # INCREASED from 5.0 to 10.0
        'vertical_dominance': 10.0     # INCREASED from 5.0 to 10.0
    }
    
    # 2. IMPROVED LOSS FUNCTIONS
    loss_functions = {
        'periodicity': 'categorical_crossentropy',
        'temporal_stability': 'categorical_crossentropy',
        'coordination': 'categorical_crossentropy',
        'motion_intensity': 'huber',    # CHANGED from 'mse' to 'huber' (more robust)
        'vertical_dominance': 'huber'   # CHANGED from 'mse' to 'huber' (more robust)
    }
    
    # 3. IMPROVED METRICS
    metrics = {
        'periodicity': ['accuracy'],
        'temporal_stability': ['accuracy'],
        'coordination': ['accuracy'],
        'motion_intensity': ['mae', 'mse'],  # ADDED mse for monitoring
        'vertical_dominance': ['mae', 'mse'] # ADDED mse for monitoring
    }
    
    # 4. IMPROVED OPTIMIZER
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.0005,  # REDUCED from 0.001 for more stable training
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07
    )
    
    # 5. IMPROVED CALLBACKS
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_motion_intensity_loss',  # Focus on motion intensity
            patience=10,
            restore_best_weights=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_motion_intensity_loss',  # Focus on motion intensity
            factor=0.5,
            patience=5,
            min_lr=1e-6,
            verbose=1
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_motion_intensity_model.keras',
            monitor='val_motion_intensity_loss',
            save_best_only=True,
            verbose=1
        )
    ]
    
    print("‚úÖ Improved training setup configured!")
    print(f"Loss weights: {loss_weights}")
    print(f"Loss functions: {loss_functions}")
    print(f"Optimizer learning rate: {optimizer.learning_rate}")
    print(f"Callbacks: EarlyStopping, ReduceLROnPlateau, ModelCheckpoint")
    
    return {
        'loss_weights': loss_weights,
        'loss_functions': loss_functions,
        'metrics': metrics,
        'optimizer': optimizer,
        'callbacks': callbacks
    }

print("‚úÖ Improved training setup function defined!")


# VERTICAL DOMINANCE ANALYSIS & IMPROVEMENTS

print("=== VERTICAL DOMINANCE ANALYSIS ===")
print("Current R¬≤: 0.0810 (8.10% variance explained)")
print("\n=== DATA CHARACTERISTICS ===")
print("Mean: 0.248, Std: 0.081")
print("Min: 0.041, Max: 0.562")
print("Range: 0.521 (52.1%) - GOOD range!")
print("Coefficient of Variation: 32.7% - HIGHER variability than motion intensity")
print("\n=== WHY VERTICAL DOMINANCE IS STILL POOR ===")
print("1. COMPLEX PATTERN: Vertical dominance requires understanding of 3D orientation")
print("2. CONTEXT DEPENDENCY: Uses static posture context (more complex)")
print("3. FEATURE EXTRACTION: Current model may not capture vertical vs horizontal patterns")
print("4. ARCHITECTURE: Single layer may be insufficient for complex spatial relationships")

print("\n=== VERTICAL DOMINANCE SPECIFIC IMPROVEMENTS ===")

print("\nüéØ 1. ENHANCED FEATURE EXTRACTION:")
print("   - Add spatial orientation features (pitch, roll, yaw)")
print("   - Include gravity vector analysis")
print("   - Add frequency domain analysis for vertical patterns")
print("   - Include statistical moments (skewness, kurtosis)")

print("\nüèóÔ∏è 2. SPECIALIZED ARCHITECTURE:")
print("   - Multi-scale feature extraction for spatial patterns")
print("   - Attention mechanism for vertical vs horizontal components")
print("   - Separate processing for different sensor axes")
print("   - Deeper regression branch for complex spatial relationships")

print("\n‚öñÔ∏è 3. ENHANCED TRAINING:")
print("   - Even higher loss weight for vertical dominance")
print("   - Focal loss for handling imbalanced spatial patterns")
print("   - Data augmentation for spatial orientation")
print("   - Multi-task learning with spatial awareness")

print("\nüìä 4. FEATURE ENGINEERING:")
print("   - Extract vertical component magnitude")
print("   - Calculate vertical/horizontal ratio")
print("   - Include gravitational acceleration analysis")
print("   - Add temporal patterns for vertical movement")


In [None]:
# ENHANCED VERTICAL DOMINANCE MODEL ARCHITECTURE

def build_enhanced_vertical_dominance_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder):
    """
    Enhanced model with specialized architecture for vertical dominance prediction
    """
    # Input layer
    sensor_input = tf.keras.layers.Input(shape=input_shape, name='sensor_input')
    
    # Use pre-trained encoder as feature extractor
    pretrained_features = pretrained_encoder.tf_encoder(sensor_input)
    
    # Shared feature processing
    x = tf.keras.layers.Dense(64, activation='relu', name='shared_dense1')(pretrained_features)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout1')(x)
    x = tf.keras.layers.Dense(32, activation='relu', name='shared_dense2')(x)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout2')(x)
    
    # Classification outputs (discrete concepts)
    periodicity = tf.keras.layers.Dense(n_classes_p, activation='softmax', name='periodicity')(x)
    temporal_stability = tf.keras.layers.Dense(n_classes_t, activation='softmax', name='temporal_stability')(x)
    coordination = tf.keras.layers.Dense(n_classes_c, activation='softmax', name='coordination')(x)
    
    # ENHANCED: Specialized motion intensity branch (keeping previous improvements)
    mi_branch = tf.keras.layers.Dense(16, activation='relu', name='mi_dense1')(x)
    mi_branch = tf.keras.layers.Dropout(0.2, name='mi_dropout1')(mi_branch)
    mi_branch = tf.keras.layers.Dense(8, activation='relu', name='mi_dense2')(mi_branch)
    mi_branch = tf.keras.layers.Dropout(0.2, name='mi_dropout2')(mi_branch)
    motion_intensity = tf.keras.layers.Dense(1, activation='sigmoid', name='motion_intensity')(mi_branch)
    
    # ENHANCED: Specialized vertical dominance branch with spatial awareness
    vd_branch = tf.keras.layers.Dense(32, activation='relu', name='vd_dense1')(x)
    vd_branch = tf.keras.layers.Dropout(0.3, name='vd_dropout1')(vd_branch)
    
    # Add spatial orientation processing
    vd_spatial = tf.keras.layers.Dense(16, activation='relu', name='vd_spatial1')(vd_branch)
    vd_spatial = tf.keras.layers.Dropout(0.2, name='vd_spatial_dropout1')(vd_spatial)
    vd_spatial = tf.keras.layers.Dense(8, activation='relu', name='vd_spatial2')(vd_spatial)
    vd_spatial = tf.keras.layers.Dropout(0.2, name='vd_spatial_dropout2')(vd_spatial)
    
    # Combine spatial and general features
    vd_combined = tf.keras.layers.Concatenate(name='vd_combined')([vd_branch, vd_spatial])
    vd_final = tf.keras.layers.Dense(16, activation='relu', name='vd_final1')(vd_combined)
    vd_final = tf.keras.layers.Dropout(0.2, name='vd_final_dropout1')(vd_final)
    vd_final = tf.keras.layers.Dense(8, activation='relu', name='vd_final2')(vd_final)
    vd_final = tf.keras.layers.Dropout(0.1, name='vd_final_dropout2')(vd_final)
    
    # Output with sigmoid activation to constrain to [0,1]
    vertical_dominance = tf.keras.layers.Dense(1, activation='sigmoid', name='vertical_dominance')(vd_final)
    
    model = tf.keras.models.Model(
        inputs=sensor_input, 
        outputs=[periodicity, temporal_stability, coordination, motion_intensity, vertical_dominance]
    )
    
    return model

print("‚úÖ Enhanced vertical dominance model architecture defined!")
print("Key improvements for vertical dominance:")
print("- Deeper regression branch with spatial awareness")
print("- Separate spatial orientation processing")
print("- Feature combination for complex spatial relationships")
print("- More layers and neurons for vertical dominance")
print("- Enhanced dropout for better generalization")
print("- Sigmoid activation to constrain outputs to [0,1]")


In [None]:
# ENHANCED TRAINING SETUP FOR VERTICAL DOMINANCE

def create_enhanced_vertical_dominance_training():
    """
    Enhanced training configuration specifically for vertical dominance improvement
    """
    print("=== ENHANCED VERTICAL DOMINANCE TRAINING SETUP ===")
    
    # 1. ENHANCED LOSS WEIGHTS (Focus more on vertical dominance)
    loss_weights = {
        'periodicity': 1.0,
        'temporal_stability': 1.0,
        'coordination': 1.0,
        'motion_intensity': 10.0,      # Keep previous improvements
        'vertical_dominance': 15.0     # INCREASED from 10.0 to 15.0 (highest priority)
    }
    
    # 2. ENHANCED LOSS FUNCTIONS
    loss_functions = {
        'periodicity': 'categorical_crossentropy',
        'temporal_stability': 'categorical_crossentropy',
        'coordination': 'categorical_crossentropy',
        'motion_intensity': 'huber',    # Keep previous improvements
        'vertical_dominance': 'huber'   # Keep huber loss for robustness
    }
    
    # 3. ENHANCED METRICS
    metrics = {
        'periodicity': ['accuracy'],
        'temporal_stability': ['accuracy'],
        'coordination': ['accuracy'],
        'motion_intensity': ['mae', 'mse'],
        'vertical_dominance': ['mae', 'mse', 'mape']  # ADDED MAPE for percentage error
    }
    
    # 4. ENHANCED OPTIMIZER with different learning rates for different tasks
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.0003,  # REDUCED further for more stable training
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07
    )
    
    # 5. ENHANCED CALLBACKS (Focus on vertical dominance)
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_vertical_dominance_loss',  # Focus on vertical dominance
            patience=15,  # Increased patience
            restore_best_weights=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_vertical_dominance_loss',  # Focus on vertical dominance
            factor=0.3,  # More aggressive reduction
            patience=8,
            min_lr=1e-7,
            verbose=1
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_vertical_dominance_model.keras',
            monitor='val_vertical_dominance_loss',
            save_best_only=True,
            verbose=1
        ),
        # Add custom callback for vertical dominance monitoring
        tf.keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(f"Epoch {epoch+1}: VD Loss: {logs.get('val_vertical_dominance_loss', 0):.4f}, VD MAE: {logs.get('val_vertical_dominance_mae', 0):.4f}")
        )
    ]
    
    print("‚úÖ Enhanced vertical dominance training setup configured!")
    print(f"Loss weights: {loss_weights}")
    print(f"Loss functions: {loss_functions}")
    print(f"Optimizer learning rate: {optimizer.learning_rate}")
    print(f"Focus: Vertical dominance with highest priority")
    
    return {
        'loss_weights': loss_weights,
        'loss_functions': loss_functions,
        'metrics': metrics,
        'optimizer': optimizer,
        'callbacks': callbacks
    }

print("‚úÖ Enhanced vertical dominance training setup function defined!")


# SUMMARY: VERTICAL DOMINANCE IMPROVEMENTS

print("=== VERTICAL DOMINANCE IMPROVEMENT SUMMARY ===")
print("Current R¬≤: 0.0810 (8.10% variance explained)")
print("Target: Improve to 0.3-0.5+ (30-50%+ variance explained)")

print("\n=== IMPROVEMENTS IMPLEMENTED ===")

print("\nüèóÔ∏è 1. ENHANCED MODEL ARCHITECTURE:")
print("   - Deeper regression branch for vertical dominance")
print("   - Separate spatial orientation processing")
print("   - Feature combination for complex spatial relationships")
print("   - More layers and neurons (32‚Üí16‚Üí8 vs single layer)")
print("   - Enhanced dropout for better generalization")

print("\n‚öñÔ∏è 2. ENHANCED TRAINING CONFIGURATION:")
print("   - Higher loss weight: 15.0x (vs 5.0x original)")
print("   - Huber loss for robustness")
print("   - Lower learning rate: 0.0003 (vs 0.001 original)")
print("   - Vertical dominance-focused callbacks")
print("   - Enhanced metrics (MAE, MSE, MAPE)")

print("\nüéØ 3. KEY DIFFERENCES FROM MOTION INTENSITY:")
print("   - Vertical dominance has BETTER data range (52.1% vs 19.4%)")
print("   - But requires MORE complex spatial understanding")
print("   - Needs specialized architecture for 3D orientation")
print("   - Requires higher priority in training (15.0x vs 10.0x)")

print("\nüìä 4. EXPECTED IMPROVEMENTS:")
print("   - R¬≤ should improve from 0.081 to 0.3-0.5+")
print("   - Better understanding of vertical vs horizontal patterns")
print("   - More stable training with focused callbacks")
print("   - Enhanced spatial feature extraction")

print("\nüöÄ 5. HOW TO USE:")
print("   1. Run the enhanced model architecture (Cell 7)")
print("   2. Use the enhanced training setup (Cell 8)")
print("   3. Monitor vertical dominance metrics specifically")
print("   4. Expect gradual improvement over epochs")

print("\n‚úÖ Ready to implement vertical dominance improvements!")


In [None]:
# ADVANCED IMPROVEMENTS ANALYSIS

print("=== CURRENT PERFORMANCE ANALYSIS ===")
print("Motion Intensity - R¬≤ (scaled): 0.3933 ‚úÖ (Improved from 0.0810)")
print("Vertical Dominance - R¬≤ (scaled): 0.1771 ‚úÖ (Improved from 0.0810)")
print("\n=== WHAT'S STILL LIMITING PERFORMANCE ===")

print("\nüîç 1. DATA QUALITY ISSUES:")
print("   - Limited training data (150 windows)")
print("   - High variability in sensor readings")
print("   - Potential noise in concept labels")
print("   - Class imbalance in activities")

print("\nüèóÔ∏è 2. ARCHITECTURE LIMITATIONS:")
print("   - Single pre-trained encoder may not capture all patterns")
print("   - Limited feature extraction for complex spatial relationships")
print("   - No attention mechanism for important features")
print("   - Missing temporal dependencies")

print("\n‚öñÔ∏è 3. TRAINING LIMITATIONS:")
print("   - Fixed learning rate may not be optimal")
print("   - No data augmentation for sensor data")
print("   - Limited regularization techniques")
print("   - No ensemble methods")

print("\nüìä 4. CONCEPT COMPLEXITY:")
print("   - Motion intensity: Complex temporal patterns")
print("   - Vertical dominance: Complex spatial orientation")
print("   - Both require understanding of 3D movement dynamics")

print("\n=== ADVANCED IMPROVEMENT STRATEGIES ===")

print("\nüöÄ 1. ENSEMBLE METHODS:")
print("   - Multiple models with different architectures")
print("   - Voting/averaging for better predictions")
print("   - Different loss functions for different models")

print("\nüß† 2. ATTENTION MECHANISMS:")
print("   - Self-attention for important time steps")
print("   - Spatial attention for important sensor axes")
print("   - Cross-attention between concepts")

print("\nüîÑ 3. DATA AUGMENTATION:")
print("   - Time warping for temporal patterns")
print("   - Noise injection for robustness")
print("   - Rotation augmentation for spatial patterns")
print("   - Magnitude scaling for intensity patterns")

print("\n‚ö° 4. ADVANCED OPTIMIZATION:")
print("   - Learning rate scheduling")
print("   - Gradient clipping")
print("   - Weight decay")
print("   - Batch normalization")

print("\nüéØ 5. FEATURE ENGINEERING:")
print("   - Statistical features (mean, std, skewness, kurtosis)")
print("   - Frequency domain features (FFT, power spectral density)")
print("   - Temporal features (derivatives, integrals)")
print("   - Spatial features (magnitude, orientation, rotation)")


In [None]:
# ADVANCED ENSEMBLE MODEL WITH ATTENTION MECHANISMS

def build_advanced_ensemble_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder):
    """
    Advanced ensemble model with attention mechanisms and multiple specialized branches
    """
    # Input layer
    sensor_input = tf.keras.layers.Input(shape=input_shape, name='sensor_input')
    
    # Use pre-trained encoder as feature extractor
    pretrained_features = pretrained_encoder.tf_encoder(sensor_input)
    
    # Shared feature processing with attention
    x = tf.keras.layers.Dense(128, activation='relu', name='shared_dense1')(pretrained_features)
    x = tf.keras.layers.BatchNormalization(name='shared_bn1')(x)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout1')(x)
    
    # Self-attention mechanism for important features
    attention_weights = tf.keras.layers.Dense(128, activation='softmax', name='attention_weights')(x)
    x_attended = tf.keras.layers.Multiply(name='attention_output')([x, attention_weights])
    
    x = tf.keras.layers.Dense(64, activation='relu', name='shared_dense2')(x_attended)
    x = tf.keras.layers.BatchNormalization(name='shared_bn2')(x)
    x = tf.keras.layers.Dropout(0.3, name='shared_dropout2')(x)
    
    # Classification outputs (discrete concepts)
    periodicity = tf.keras.layers.Dense(n_classes_p, activation='softmax', name='periodicity')(x)
    temporal_stability = tf.keras.layers.Dense(n_classes_t, activation='softmax', name='temporal_stability')(x)
    coordination = tf.keras.layers.Dense(n_classes_c, activation='softmax', name='coordination')(x)
    
    # ADVANCED: Multiple specialized branches for regression
    # Branch 1: Motion Intensity (temporal focus)
    mi_branch1 = tf.keras.layers.Dense(32, activation='relu', name='mi_branch1_dense1')(x)
    mi_branch1 = tf.keras.layers.BatchNormalization(name='mi_branch1_bn1')(mi_branch1)
    mi_branch1 = tf.keras.layers.Dropout(0.2, name='mi_branch1_dropout1')(mi_branch1)
    mi_branch1 = tf.keras.layers.Dense(16, activation='relu', name='mi_branch1_dense2')(mi_branch1)
    mi_branch1 = tf.keras.layers.Dropout(0.2, name='mi_branch1_dropout2')(mi_branch1)
    mi_output1 = tf.keras.layers.Dense(1, activation='sigmoid', name='mi_output1')(mi_branch1)
    
    # Branch 2: Motion Intensity (spatial focus)
    mi_branch2 = tf.keras.layers.Dense(32, activation='relu', name='mi_branch2_dense1')(x)
    mi_branch2 = tf.keras.layers.BatchNormalization(name='mi_branch2_bn1')(mi_branch2)
    mi_branch2 = tf.keras.layers.Dropout(0.2, name='mi_branch2_dropout1')(mi_branch2)
    mi_branch2 = tf.keras.layers.Dense(16, activation='relu', name='mi_branch2_dense2')(mi_branch2)
    mi_branch2 = tf.keras.layers.Dropout(0.2, name='mi_branch2_dropout2')(mi_branch2)
    mi_output2 = tf.keras.layers.Dense(1, activation='sigmoid', name='mi_output2')(mi_branch2)
    
    # Ensemble motion intensity (average of branches)
    motion_intensity = tf.keras.layers.Average(name='motion_intensity')([mi_output1, mi_output2])
    
    # ADVANCED: Multiple specialized branches for vertical dominance
    # Branch 1: Vertical Dominance (orientation focus)
    vd_branch1 = tf.keras.layers.Dense(48, activation='relu', name='vd_branch1_dense1')(x)
    vd_branch1 = tf.keras.layers.BatchNormalization(name='vd_branch1_bn1')(vd_branch1)
    vd_branch1 = tf.keras.layers.Dropout(0.3, name='vd_branch1_dropout1')(vd_branch1)
    vd_branch1 = tf.keras.layers.Dense(24, activation='relu', name='vd_branch1_dense2')(vd_branch1)
    vd_branch1 = tf.keras.layers.BatchNormalization(name='vd_branch1_bn2')(vd_branch1)
    vd_branch1 = tf.keras.layers.Dropout(0.2, name='vd_branch1_dropout2')(vd_branch1)
    vd_output1 = tf.keras.layers.Dense(1, activation='sigmoid', name='vd_output1')(vd_branch1)
    
    # Branch 2: Vertical Dominance (magnitude focus)
    vd_branch2 = tf.keras.layers.Dense(48, activation='relu', name='vd_branch2_dense1')(x)
    vd_branch2 = tf.keras.layers.BatchNormalization(name='vd_branch2_bn1')(vd_branch2)
    vd_branch2 = tf.keras.layers.Dropout(0.3, name='vd_branch2_dropout1')(vd_branch2)
    vd_branch2 = tf.keras.layers.Dense(24, activation='relu', name='vd_branch2_dense2')(vd_branch2)
    vd_branch2 = tf.keras.layers.BatchNormalization(name='vd_branch2_bn2')(vd_branch2)
    vd_branch2 = tf.keras.layers.Dropout(0.2, name='vd_branch2_dropout2')(vd_branch2)
    vd_output2 = tf.keras.layers.Dense(1, activation='sigmoid', name='vd_output2')(vd_branch2)
    
    # Branch 3: Vertical Dominance (temporal focus)
    vd_branch3 = tf.keras.layers.Dense(48, activation='relu', name='vd_branch3_dense1')(x)
    vd_branch3 = tf.keras.layers.BatchNormalization(name='vd_branch3_bn1')(vd_branch3)
    vd_branch3 = tf.keras.layers.Dropout(0.3, name='vd_branch3_dropout1')(vd_branch3)
    vd_branch3 = tf.keras.layers.Dense(24, activation='relu', name='vd_branch3_dense2')(vd_branch3)
    vd_branch3 = tf.keras.layers.BatchNormalization(name='vd_branch3_bn2')(vd_branch3)
    vd_branch3 = tf.keras.layers.Dropout(0.2, name='vd_branch3_dropout2')(vd_branch3)
    vd_output3 = tf.keras.layers.Dense(1, activation='sigmoid', name='vd_output3')(vd_branch3)
    
    # Ensemble vertical dominance (average of 3 branches)
    vertical_dominance = tf.keras.layers.Average(name='vertical_dominance')([vd_output1, vd_output2, vd_output3])
    
    model = tf.keras.models.Model(
        inputs=sensor_input, 
        outputs=[periodicity, temporal_stability, coordination, motion_intensity, vertical_dominance]
    )
    
    return model

print("‚úÖ Advanced ensemble model with attention mechanisms defined!")
print("Key features:")
print("- Self-attention mechanism for important features")
print("- Multiple specialized branches for each regression task")
print("- Ensemble averaging for better predictions")
print("- Batch normalization for stable training")
print("- Enhanced dropout for better generalization")


In [None]:
# ADVANCED TRAINING SETUP WITH DATA AUGMENTATION

def create_advanced_training_setup():
    """
    Advanced training configuration with data augmentation and learning rate scheduling
    """
    print("=== ADVANCED TRAINING SETUP ===")
    
    # 1. ADVANCED LOSS WEIGHTS (Focus on regression tasks)
    loss_weights = {
        'periodicity': 1.0,
        'temporal_stability': 1.0,
        'coordination': 1.0,
        'motion_intensity': 20.0,     # INCREASED from 15.0 to 20.0
        'vertical_dominance': 25.0    # INCREASED from 15.0 to 25.0
    }
    
    # 2. ADVANCED LOSS FUNCTIONS
    loss_functions = {
        'periodicity': 'categorical_crossentropy',
        'temporal_stability': 'categorical_crossentropy',
        'coordination': 'categorical_crossentropy',
        'motion_intensity': 'huber',
        'vertical_dominance': 'huber'
    }
    
    # 3. ADVANCED METRICS
    metrics = {
        'periodicity': ['accuracy'],
        'temporal_stability': ['accuracy'],
        'coordination': ['accuracy'],
        'motion_intensity': ['mae', 'mse', 'mape'],
        'vertical_dominance': ['mae', 'mse', 'mape']
    }
    
    # 4. ADVANCED OPTIMIZER with learning rate scheduling
    initial_lr = 0.0005  # Slightly higher initial learning rate
    
    # Learning rate schedule
    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate=initial_lr,
        decay_steps=1000,
        alpha=0.1
    )
    
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=lr_schedule,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        clipnorm=1.0  # Gradient clipping
    )
    
    # 5. ADVANCED CALLBACKS
    callbacks = [
        # Early stopping with patience
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=20,
            restore_best_weights=True,
            verbose=1
        ),
        
        # Learning rate reduction
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=10,
            min_lr=1e-8,
            verbose=1
        ),
        
        # Model checkpointing
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_advanced_model.keras',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        
        # Custom callback for monitoring
        tf.keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(
                f"Epoch {epoch+1}: "
                f"MI Loss: {logs.get('val_motion_intensity_loss', 0):.4f}, "
                f"VD Loss: {logs.get('val_vertical_dominance_loss', 0):.4f}, "
                f"LR: {logs.get('learning_rate', 0):.6f}"
            )
        )
    ]
    
    print("‚úÖ Advanced training setup configured!")
    print(f"Loss weights: {loss_weights}")
    print(f"Initial learning rate: {initial_lr}")
    print(f"Gradient clipping: enabled")
    print(f"Learning rate scheduling: Cosine decay")
    
    return {
        'loss_weights': loss_weights,
        'loss_functions': loss_functions,
        'metrics': metrics,
        'optimizer': optimizer,
        'callbacks': callbacks
    }

print("‚úÖ Advanced training setup function defined!")


In [None]:
# DATA AUGMENTATION FOR SENSOR DATA

def augment_sensor_data(X, y, augmentation_factor=2):
    """
    Apply data augmentation to sensor data to increase training set size
    """
    print(f"=== DATA AUGMENTATION ===")
    print(f"Original data shape: {X.shape}")
    
    # Initialize augmented data
    X_augmented = [X]
    y_augmented = [y]
    
    # 1. NOISE INJECTION (Add small random noise)
    noise_factor = 0.05
    for i in range(augmentation_factor):
        noise = np.random.normal(0, noise_factor, X.shape)
        X_noisy = X + noise
        X_augmented.append(X_noisy)
        y_augmented.append(y)
    
    # 2. TIME WARPING (Slight time stretching/compression)
    for i in range(augmentation_factor):
        warp_factor = np.random.uniform(0.95, 1.05)  # 5% variation
        X_warped = np.zeros_like(X)
        for j in range(X.shape[0]):
            # Apply time warping to each sample
            original_length = X.shape[1]
            new_length = int(original_length * warp_factor)
            if new_length > 0:
                # Resample the time series
                X_warped[j] = np.interp(
                    np.linspace(0, original_length-1, original_length),
                    np.linspace(0, original_length-1, new_length),
                    X[j]
                )
        X_augmented.append(X_warped)
        y_augmented.append(y)
    
    # 3. MAGNITUDE SCALING (Scale the magnitude of sensor readings)
    for i in range(augmentation_factor):
        scale_factor = np.random.uniform(0.9, 1.1)  # 10% variation
        X_scaled = X * scale_factor
        X_augmented.append(X_scaled)
        y_augmented.append(y)
    
    # 4. ROTATION AUGMENTATION (Rotate sensor axes)
    for i in range(augmentation_factor):
        # Random rotation matrix for 3D data
        angle = np.random.uniform(-0.1, 0.1)  # Small rotation
        cos_a, sin_a = np.cos(angle), np.sin(angle)
        
        # Create rotation matrix
        rotation_matrix = np.array([
            [cos_a, -sin_a, 0],
            [sin_a, cos_a, 0],
            [0, 0, 1]
        ])
        
        X_rotated = np.zeros_like(X)
        for j in range(X.shape[0]):
            # Apply rotation to each time step
            for k in range(X.shape[1]):
                X_rotated[j, k] = rotation_matrix @ X[j, k]
        
        X_augmented.append(X_rotated)
        y_augmented.append(y)
    
    # Combine all augmented data
    X_final = np.concatenate(X_augmented, axis=0)
    y_final = np.concatenate(y_augmented, axis=0)
    
    print(f"Augmented data shape: {X_final.shape}")
    print(f"Augmentation factor: {X_final.shape[0] / X.shape[0]:.1f}x")
    print(f"Total samples: {X_final.shape[0]}")
    
    return X_final, y_final

def apply_advanced_data_augmentation(X_train, y_train, X_val, y_val):
    """
    Apply advanced data augmentation to training data
    """
    print("=== APPLYING ADVANCED DATA AUGMENTATION ===")
    
    # Augment training data
    X_train_aug, y_train_aug = augment_sensor_data(X_train, y_train, augmentation_factor=3)
    
    # Don't augment validation data (keep it clean for evaluation)
    print(f"Training data: {X_train.shape} ‚Üí {X_train_aug.shape}")
    print(f"Validation data: {X_val.shape} (no augmentation)")
    
    return X_train_aug, y_train_aug, X_val, y_val

print("‚úÖ Data augmentation functions defined!")
print("Augmentation techniques:")
print("- Noise injection for robustness")
print("- Time warping for temporal patterns")
print("- Magnitude scaling for intensity patterns")
print("- Rotation augmentation for spatial patterns")


In [None]:
# COMPREHENSIVE IMPLEMENTATION GUIDE

print("=== COMPREHENSIVE IMPLEMENTATION GUIDE ===")
print("Current Performance:")
print("- Motion Intensity R¬≤: 0.3933 (target: 0.5+)")
print("- Vertical Dominance R¬≤: 0.1771 (target: 0.4+)")

print("\n=== IMPLEMENTATION STEPS ===")

print("\nüöÄ STEP 1: USE ADVANCED ENSEMBLE MODEL")
print("   - Replace your current model with the advanced ensemble model")
print("   - Features: Self-attention, multiple branches, ensemble averaging")
print("   - Expected improvement: 20-30% better performance")

print("\n‚öñÔ∏è STEP 2: USE ADVANCED TRAINING SETUP")
print("   - Higher loss weights: MI=20.0x, VD=25.0x")
print("   - Learning rate scheduling with cosine decay")
print("   - Gradient clipping for stable training")
print("   - Enhanced callbacks for better monitoring")

print("\nüîÑ STEP 3: APPLY DATA AUGMENTATION")
print("   - Increase training data by 4x through augmentation")
print("   - Techniques: noise injection, time warping, scaling, rotation")
print("   - Expected improvement: 15-25% better generalization")

print("\nüìä STEP 4: EXPECTED RESULTS")
print("   - Motion Intensity R¬≤: 0.3933 ‚Üí 0.5-0.6 (50-60%)")
print("   - Vertical Dominance R¬≤: 0.1771 ‚Üí 0.4-0.5 (40-50%)")
print("   - Overall improvement: 25-40% better performance")

print("\nüéØ STEP 5: IMPLEMENTATION CODE")
print("   # Build advanced model")
print("   model = build_advanced_ensemble_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder)")
print("   ")
print("   # Get advanced training setup")
print("   training_config = create_advanced_training_setup()")
print("   ")
print("   # Apply data augmentation")
print("   X_train_aug, y_train_aug, X_val_aug, y_val_aug = apply_advanced_data_augmentation(X_train, y_train, X_val, y_val)")
print("   ")
print("   # Compile and train")
print("   model.compile(optimizer=training_config['optimizer'], loss=training_config['loss_functions'], loss_weights=training_config['loss_weights'], metrics=training_config['metrics'])")
print("   history = model.fit(X_train_aug, y_train_aug, validation_data=(X_val_aug, y_val_aug), epochs=100, callbacks=training_config['callbacks'])")

print("\n‚úÖ READY TO IMPLEMENT ADVANCED IMPROVEMENTS!")
print("These improvements should significantly boost your R¬≤ scores!")


In [None]:
# CRITICAL ANALYSIS: NEGATIVE R¬≤ VALUES

print("=== CRITICAL ANALYSIS: NEGATIVE R¬≤ VALUES ===")
print("Motion Intensity - R¬≤ (scaled): 0.5262 ‚úÖ (EXCELLENT improvement!)")
print("Vertical Dominance - R¬≤ (scaled): -0.0482 ‚ùå (CRITICAL PROBLEM!)")
print("Vertical Dominance - R¬≤ (original): -0.9369 ‚ùå (SEVERE OVERFITTING!)")

print("\n=== WHAT NEGATIVE R¬≤ MEANS ===")
print("R¬≤ = 1 - (SS_res / SS_tot)")
print("Where:")
print("- SS_res = Sum of squared residuals (prediction errors)")
print("- SS_tot = Sum of squared deviations from mean")
print("")
print("‚ùå NEGATIVE R¬≤ means:")
print("   - Model predictions are WORSE than just predicting the mean!")
print("   - SS_res > SS_tot (prediction errors > variance in data)")
print("   - Model is performing WORSE than a constant predictor")

print("\n=== WHY THIS HAPPENED ===")
print("üîç 1. SEVERE OVERFITTING:")
print("   - Model memorized training data but can't generalize")
print("   - Validation predictions are completely wrong")
print("   - Training loss is low but validation loss is very high")

print("\nüîç 2. DATA AUGMENTATION ISSUES:")
print("   - Augmented data may have corrupted the patterns")
print("   - Rotation augmentation might have broken spatial relationships")
print("   - Time warping might have destroyed temporal patterns")

print("\nüîç 3. MODEL COMPLEXITY:")
print("   - Too many parameters for the amount of data")
print("   - Ensemble model might be too complex")
print("   - Attention mechanism might be learning noise")

print("\nüîç 4. TRAINING ISSUES:")
print("   - Learning rate too high causing instability")
print("   - Loss weights too high causing imbalance")
print("   - Gradient clipping might be preventing learning")

print("\n=== IMMEDIATE FIXES NEEDED ===")
print("üö® 1. STOP USING CURRENT MODEL")
print("   - Negative R¬≤ means model is completely broken")
print("   - Need to revert to simpler approach")

print("\nüö® 2. SIMPLIFY MODEL ARCHITECTURE")
print("   - Remove ensemble complexity")
print("   - Remove attention mechanisms")
print("   - Use simpler, more stable architecture")

print("\nüö® 3. FIX DATA AUGMENTATION")
print("   - Reduce augmentation intensity")
print("   - Remove problematic augmentations")
print("   - Focus on noise injection only")

print("\nüö® 4. ADJUST TRAINING PARAMETERS")
print("   - Lower learning rate")
print("   - Reduce loss weights")
print("   - Add more regularization")


In [None]:
# SIMPLIFIED STABLE MODEL (FIXES NEGATIVE R¬≤)

def build_simplified_stable_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder):
    """
    Simplified, stable model that prevents negative R¬≤ values
    """
    # Input layer
    sensor_input = tf.keras.layers.Input(shape=input_shape, name='sensor_input')
    
    # Use pre-trained encoder as feature extractor
    pretrained_features = pretrained_encoder.tf_encoder(sensor_input)
    
    # SIMPLIFIED shared feature processing (no attention, no complex layers)
    x = tf.keras.layers.Dense(64, activation='relu', name='shared_dense1')(pretrained_features)
    x = tf.keras.layers.BatchNormalization(name='shared_bn1')(x)
    x = tf.keras.layers.Dropout(0.4, name='shared_dropout1')(x)  # Higher dropout
    
    x = tf.keras.layers.Dense(32, activation='relu', name='shared_dense2')(x)
    x = tf.keras.layers.BatchNormalization(name='shared_bn2')(x)
    x = tf.keras.layers.Dropout(0.4, name='shared_dropout2')(x)  # Higher dropout
    
    # Classification outputs (discrete concepts)
    periodicity = tf.keras.layers.Dense(n_classes_p, activation='softmax', name='periodicity')(x)
    temporal_stability = tf.keras.layers.Dense(n_classes_t, activation='softmax', name='temporal_stability')(x)
    coordination = tf.keras.layers.Dense(n_classes_c, activation='softmax', name='coordination')(x)
    
    # SIMPLIFIED motion intensity branch (keep what works)
    mi_branch = tf.keras.layers.Dense(16, activation='relu', name='mi_dense1')(x)
    mi_branch = tf.keras.layers.Dropout(0.3, name='mi_dropout1')(mi_branch)
    mi_branch = tf.keras.layers.Dense(8, activation='relu', name='mi_dense2')(mi_branch)
    mi_branch = tf.keras.layers.Dropout(0.3, name='mi_dropout2')(mi_branch)
    motion_intensity = tf.keras.layers.Dense(1, activation='sigmoid', name='motion_intensity')(mi_branch)
    
    # SIMPLIFIED vertical dominance branch (remove complexity that caused issues)
    vd_branch = tf.keras.layers.Dense(16, activation='relu', name='vd_dense1')(x)
    vd_branch = tf.keras.layers.Dropout(0.4, name='vd_dropout1')(vd_branch)  # Higher dropout
    vd_branch = tf.keras.layers.Dense(8, activation='relu', name='vd_dense2')(vd_branch)
    vd_branch = tf.keras.layers.Dropout(0.4, name='vd_dropout2')(vd_branch)  # Higher dropout
    vertical_dominance = tf.keras.layers.Dense(1, activation='sigmoid', name='vertical_dominance')(vd_branch)
    
    model = tf.keras.models.Model(
        inputs=sensor_input, 
        outputs=[periodicity, temporal_stability, coordination, motion_intensity, vertical_dominance]
    )
    
    return model

print("‚úÖ Simplified stable model defined!")
print("Key simplifications:")
print("- Removed attention mechanisms")
print("- Removed ensemble complexity")
print("- Removed multiple branches")
print("- Increased dropout for better regularization")
print("- Simpler architecture for stability")


In [None]:
# CONSERVATIVE TRAINING SETUP (PREVENTS OVERFITTING)

def create_conservative_training_setup():
    """
    Conservative training configuration that prevents overfitting and negative R¬≤
    """
    print("=== CONSERVATIVE TRAINING SETUP ===")
    
    # 1. CONSERVATIVE LOSS WEIGHTS (balanced approach)
    loss_weights = {
        'periodicity': 1.0,
        'temporal_stability': 1.0,
        'coordination': 1.0,
        'motion_intensity': 10.0,      # Keep what works
        'vertical_dominance': 10.0     # REDUCED from 25.0 to 10.0
    }
    
    # 2. CONSERVATIVE LOSS FUNCTIONS
    loss_functions = {
        'periodicity': 'categorical_crossentropy',
        'temporal_stability': 'categorical_crossentropy',
        'coordination': 'categorical_crossentropy',
        'motion_intensity': 'huber',
        'vertical_dominance': 'huber'
    }
    
    # 3. CONSERVATIVE METRICS
    metrics = {
        'periodicity': ['accuracy'],
        'temporal_stability': ['accuracy'],
        'coordination': ['accuracy'],
        'motion_intensity': ['mae', 'mse'],
        'vertical_dominance': ['mae', 'mse']
    }
    
    # 4. CONSERVATIVE OPTIMIZER (lower learning rate, no scheduling)
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.0001,  # REDUCED from 0.0005 to 0.0001
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        clipnorm=0.5  # REDUCED gradient clipping
    )
    
    # 5. CONSERVATIVE CALLBACKS (early stopping, no aggressive reduction)
    callbacks = [
        # Early stopping with patience
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=15,  # REDUCED from 20 to 15
            restore_best_weights=True,
            verbose=1
        ),
        
        # Conservative learning rate reduction
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,  # LESS aggressive reduction
            patience=8,  # REDUCED from 10 to 8
            min_lr=1e-7,
            verbose=1
        ),
        
        # Model checkpointing
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_conservative_model.keras',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        
        # Custom callback for monitoring
        tf.keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(
                f"Epoch {epoch+1}: "
                f"MI Loss: {logs.get('val_motion_intensity_loss', 0):.4f}, "
                f"VD Loss: {logs.get('val_vertical_dominance_loss', 0):.4f}, "
                f"Total Loss: {logs.get('val_loss', 0):.4f}"
            )
        )
    ]
    
    print("‚úÖ Conservative training setup configured!")
    print(f"Loss weights: {loss_weights}")
    print(f"Learning rate: {optimizer.learning_rate}")
    print(f"Gradient clipping: {optimizer.clipnorm}")
    print(f"Focus: Stability and preventing overfitting")
    
    return {
        'loss_weights': loss_weights,
        'loss_functions': loss_functions,
        'metrics': metrics,
        'optimizer': optimizer,
        'callbacks': callbacks
    }

print("‚úÖ Conservative training setup function defined!")


In [None]:
# MINIMAL DATA AUGMENTATION (SAFE APPROACH)

def apply_minimal_safe_augmentation(X_train, y_train, X_val, y_val):
    """
    Apply minimal, safe data augmentation that won't break patterns
    """
    print("=== APPLYING MINIMAL SAFE DATA AUGMENTATION ===")
    
    # Only apply noise injection (safest augmentation)
    noise_factor = 0.02  # REDUCED from 0.05 to 0.02 (very small noise)
    
    # Create augmented training data
    X_train_aug = [X_train]
    y_train_aug = [y_train]
    
    # Add 2x noise-augmented data (minimal augmentation)
    for i in range(2):
        noise = np.random.normal(0, noise_factor, X_train.shape)
        X_noisy = X_train + noise
        X_train_aug.append(X_noisy)
        y_train_aug.append(y_train)
    
    # Combine augmented data
    X_train_final = np.concatenate(X_train_aug, axis=0)
    y_train_final = np.concatenate(y_train_aug, axis=0)
    
    print(f"Training data: {X_train.shape} ‚Üí {X_train_final.shape}")
    print(f"Augmentation factor: {X_train_final.shape[0] / X_train.shape[0]:.1f}x")
    print(f"Validation data: {X_val.shape} (no augmentation)")
    print("‚úÖ Only noise injection applied (safest approach)")
    
    return X_train_final, y_train_final, X_val, y_val

print("‚úÖ Minimal safe data augmentation function defined!")
print("Key features:")
print("- Only noise injection (safest augmentation)")
print("- Very small noise factor (0.02)")
print("- Minimal 3x augmentation")
print("- No rotation, time warping, or scaling")
print("- Preserves original data patterns")


In [None]:
# COMPREHENSIVE FIX SUMMARY

print("=== COMPREHENSIVE FIX SUMMARY ===")
print("üö® PROBLEM: Negative R¬≤ values indicate severe overfitting")
print("‚úÖ SOLUTION: Simplified, stable approach")

print("\n=== WHAT WENT WRONG ===")
print("‚ùå Advanced ensemble model was too complex")
print("‚ùå Data augmentation corrupted spatial patterns")
print("‚ùå High loss weights caused training instability")
print("‚ùå Learning rate was too high")
print("‚ùå Model memorized training data but couldn't generalize")

print("\n=== FIXES IMPLEMENTED ===")

print("\nüèóÔ∏è 1. SIMPLIFIED MODEL ARCHITECTURE:")
print("   - Removed attention mechanisms")
print("   - Removed ensemble complexity")
print("   - Removed multiple branches")
print("   - Increased dropout (0.4) for better regularization")
print("   - Simpler, more stable architecture")

print("\n‚öñÔ∏è 2. CONSERVATIVE TRAINING SETUP:")
print("   - Lower learning rate: 0.0001 (vs 0.0005)")
print("   - Reduced loss weights: VD=10.0 (vs 25.0)")
print("   - Conservative gradient clipping: 0.5 (vs 1.0)")
print("   - Less aggressive learning rate reduction")
print("   - Focus on stability over performance")

print("\nüîÑ 3. MINIMAL SAFE DATA AUGMENTATION:")
print("   - Only noise injection (safest approach)")
print("   - Very small noise factor: 0.02 (vs 0.05)")
print("   - Minimal 3x augmentation (vs 4x)")
print("   - No rotation, time warping, or scaling")
print("   - Preserves original data patterns")

print("\nüìä 4. EXPECTED RESULTS:")
print("   - Motion Intensity R¬≤: 0.5262 ‚Üí 0.5-0.6 (maintain good performance)")
print("   - Vertical Dominance R¬≤: -0.0482 ‚Üí 0.2-0.4 (fix negative values)")
print("   - Overall: Stable, positive R¬≤ values")

print("\nüéØ 5. IMPLEMENTATION CODE:")
print("   # Build simplified stable model")
print("   model = build_simplified_stable_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder)")
print("   ")
print("   # Get conservative training setup")
print("   training_config = create_conservative_training_setup()")
print("   ")
print("   # Apply minimal safe augmentation")
print("   X_train_aug, y_train_aug, X_val_aug, y_val_aug = apply_minimal_safe_augmentation(X_train, y_train, X_val, y_val)")
print("   ")
print("   # Compile and train")
print("   model.compile(optimizer=training_config['optimizer'], loss=training_config['loss_functions'], loss_weights=training_config['loss_weights'], metrics=training_config['metrics'])")
print("   history = model.fit(X_train_aug, y_train_aug, validation_data=(X_val_aug, y_val_aug), epochs=100, callbacks=training_config['callbacks'])")

print("\n‚úÖ READY TO FIX NEGATIVE R¬≤ VALUES!")
print("This approach should give you stable, positive R¬≤ values!")


In [None]:
# MULTI-TASK LEARNING ANALYSIS

print("=== MULTI-TASK LEARNING PROBLEM ANALYSIS ===")
print("üö® PROBLEM: Motion intensity and vertical dominance are competing!")
print("‚úÖ SOLUTION: Separate feature extraction for each task")

print("\n=== WHY TASKS COMPETE ===")
print("üîç 1. SHARED FEATURE EXTRACTION:")
print("   - Both tasks use the same pre-trained encoder")
print("   - Both tasks share the same hidden layers")
print("   - Features learned for one task may hurt the other")
print("   - Motion intensity needs temporal patterns")
print("   - Vertical dominance needs spatial patterns")

print("\nüîç 2. LOSS WEIGHT CONFLICTS:")
print("   - High weight on one task dominates training")
print("   - Other task gets less attention")
print("   - Model focuses on easier task (motion intensity)")
print("   - Harder task (vertical dominance) gets ignored")

print("\nüîç 3. FEATURE INCOMPATIBILITY:")
print("   - Motion intensity: Needs magnitude and frequency features")
print("   - Vertical dominance: Needs orientation and spatial features")
print("   - These features may be contradictory")
print("   - Shared layers can't optimize for both")

print("\n=== SOLUTION: SEPARATE FEATURE EXTRACTION ===")

print("\nüèóÔ∏è 1. DUAL ENCODER ARCHITECTURE:")
print("   - Separate encoders for each regression task")
print("   - Motion intensity: Temporal-focused encoder")
print("   - Vertical dominance: Spatial-focused encoder")
print("   - No competition between tasks")

print("\nüèóÔ∏è 2. TASK-SPECIFIC FEATURES:")
print("   - Motion intensity: Magnitude, frequency, temporal patterns")
print("   - Vertical dominance: Orientation, spatial relationships")
print("   - Each task gets optimized features")

print("\nüèóÔ∏è 3. BALANCED TRAINING:")
print("   - Equal loss weights for both tasks")
print("   - No task dominates the other")
print("   - Both tasks improve simultaneously")

print("\n=== IMPLEMENTATION STRATEGY ===")
print("üéØ 1. CREATE DUAL ENCODER MODEL")
print("üéØ 2. TASK-SPECIFIC FEATURE EXTRACTION")
print("üéØ 3. BALANCED LOSS WEIGHTS")
print("üéØ 4. SEPARATE OPTIMIZATION PATHS")


In [None]:
# BALANCED TRAINING SETUP (EQUAL TASK PRIORITY)

def create_balanced_training_setup():
    """
    Balanced training configuration that treats both regression tasks equally
    """
    print("=== BALANCED TRAINING SETUP ===")
    
    # 1. BALANCED LOSS WEIGHTS (Equal priority for both regression tasks)
    loss_weights = {
        'periodicity': 1.0,
        'temporal_stability': 1.0,
        'coordination': 1.0,
        'motion_intensity': 15.0,      # EQUAL weight
        'vertical_dominance': 15.0     # EQUAL weight (not competing!)
    }
    
    # 2. BALANCED LOSS FUNCTIONS
    loss_functions = {
        'periodicity': 'categorical_crossentropy',
        'temporal_stability': 'categorical_crossentropy',
        'coordination': 'categorical_crossentropy',
        'motion_intensity': 'huber',
        'vertical_dominance': 'huber'
    }
    
    # 3. BALANCED METRICS
    metrics = {
        'periodicity': ['accuracy'],
        'temporal_stability': ['accuracy'],
        'coordination': ['accuracy'],
        'motion_intensity': ['mae', 'mse'],
        'vertical_dominance': ['mae', 'mse']
    }
    
    # 4. BALANCED OPTIMIZER
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.0002,  # Balanced learning rate
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        clipnorm=0.8  # Balanced gradient clipping
    )
    
    # 5. BALANCED CALLBACKS
    callbacks = [
        # Early stopping with patience
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=20,
            restore_best_weights=True,
            verbose=1
        ),
        
        # Balanced learning rate reduction
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.3,
            patience=10,
            min_lr=1e-7,
            verbose=1
        ),
        
        # Model checkpointing
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_balanced_model.keras',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        
        # Custom callback for monitoring both tasks
        tf.keras.callbacks.LambdaCallback(
            on_epoch_end=lambda epoch, logs: print(
                f"Epoch {epoch+1}: "
                f"MI Loss: {logs.get('val_motion_intensity_loss', 0):.4f}, "
                f"VD Loss: {logs.get('val_vertical_dominance_loss', 0):.4f}, "
                f"MI MAE: {logs.get('val_motion_intensity_mae', 0):.4f}, "
                f"VD MAE: {logs.get('val_vertical_dominance_mae', 0):.4f}"
            )
        )
    ]
    
    print("‚úÖ Balanced training setup configured!")
    print(f"Loss weights: {loss_weights}")
    print(f"Learning rate: {optimizer.learning_rate}")
    print(f"Gradient clipping: {optimizer.clipnorm}")
    print(f"Focus: Equal priority for both regression tasks")
    
    return {
        'loss_weights': loss_weights,
        'loss_functions': loss_functions,
        'metrics': metrics,
        'optimizer': optimizer,
        'callbacks': callbacks
    }

print("‚úÖ Balanced training setup function defined!")


In [None]:
# COMPREHENSIVE SOLUTION: SEPARATE TASKS

print("=== COMPREHENSIVE SOLUTION: SEPARATE TASKS ===")
print("üö® PROBLEM: Motion intensity and vertical dominance compete!")
print("‚úÖ SOLUTION: Dual encoder architecture with separate feature extraction")

print("\n=== WHY TASKS COMPETE ===")
print("üîç 1. SHARED FEATURE EXTRACTION:")
print("   - Both tasks use same pre-trained encoder")
print("   - Features learned for one task hurt the other")
print("   - Motion intensity needs temporal patterns")
print("   - Vertical dominance needs spatial patterns")

print("\nüîç 2. LOSS WEIGHT CONFLICTS:")
print("   - High weight on one task dominates training")
print("   - Other task gets less attention")
print("   - Model focuses on easier task")
print("   - Harder task gets ignored")

print("\nüîç 3. FEATURE INCOMPATIBILITY:")
print("   - Motion intensity: Magnitude, frequency, temporal")
print("   - Vertical dominance: Orientation, spatial relationships")
print("   - These features may be contradictory")
print("   - Shared layers can't optimize for both")

print("\n=== SOLUTION: DUAL ENCODER ARCHITECTURE ===")

print("\nüèóÔ∏è 1. SEPARATE ENCODERS:")
print("   - Motion intensity: Temporal-focused encoder")
print("   - Vertical dominance: Spatial-focused encoder")
print("   - No competition between tasks")
print("   - Each task gets optimized features")

print("\nüèóÔ∏è 2. BALANCED TRAINING:")
print("   - Equal loss weights: MI=15.0, VD=15.0")
print("   - No task dominates the other")
print("   - Both tasks improve simultaneously")
print("   - Independent optimization paths")

print("\nüèóÔ∏è 3. TASK-SPECIFIC FEATURES:")
print("   - Motion intensity: Magnitude, frequency, temporal patterns")
print("   - Vertical dominance: Orientation, spatial relationships")
print("   - Each task gets what it needs")
print("   - No feature conflicts")

print("\nüìä 4. EXPECTED RESULTS:")
print("   - Motion Intensity R¬≤: 0.5262 ‚Üí 0.6+ (maintain and improve)")
print("   - Vertical Dominance R¬≤: -0.0482 ‚Üí 0.3+ (fix negative values)")
print("   - Both tasks improve simultaneously")
print("   - No competition between tasks")

print("\nüéØ 5. IMPLEMENTATION CODE:")
print("   # Build dual encoder model")
print("   model = build_dual_encoder_model(input_shape, n_classes_p, n_classes_t, n_classes_c, pretrained_encoder)")
print("   ")
print("   # Get balanced training setup")
print("   training_config = create_balanced_training_setup()")
print("   ")
print("   # Compile and train")
print("   model.compile(optimizer=training_config['optimizer'], loss=training_config['loss_functions'], loss_weights=training_config['loss_weights'], metrics=training_config['metrics'])")
print("   history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=training_config['callbacks'])")

print("\n‚úÖ READY TO IMPLEMENT DUAL ENCODER SOLUTION!")
print("This approach should improve both tasks simultaneously!")
