In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from azureml.core import Workspace, Dataset, Run

# Define constants
GESTURE_CLASSES = ["O", "V"]
SEQUENCE_LENGTH = 100  # Adjust based on your data
MODEL_SAVE_PATH = "wand_model.h5"

def load_data_from_directory(directory):
    """Load all data files from a directory and return features and labels."""
    features = []
    labels = []
    
    for idx, class_name in enumerate(GESTURE_CLASSES):
        class_dir = os.path.join(directory, class_name)
        
        # Skip if not a directory
        if not os.path.isdir(class_dir):
            continue
            
        # Process all CSV files in the class directory
        for file_name in os.listdir(class_dir):
            if file_name.endswith('.csv'):
                file_path = os.path.join(class_dir, file_name)
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Extract IMU data (x, y, z columns)
                sequence = df[['x', 'y', 'z']].values
                
                # Process the sequence
                if len(sequence) >= SEQUENCE_LENGTH:
                    # Take the first SEQUENCE_LENGTH samples
                    features.append(sequence[:SEQUENCE_LENGTH])
                    labels.append(idx)
                elif len(sequence) > 10:  # Minimum sequence length
                    # Pad shorter sequences
                    padded = np.pad(sequence, ((0, SEQUENCE_LENGTH - len(sequence)), (0, 0)), 
                                  mode='constant', constant_values=0)
                    features.append(padded)
                    labels.append(idx)
    
    return np.array(features), np.array(labels)

def preprocess_data(features, labels):
    """Preprocess the data: reshape, normalize, and split into train/test sets."""
    # Reshape features to (samples, sequence_length * 3)
    n_samples = features.shape[0]
    features_flat = features.reshape(n_samples, -1)
    
    # Normalize features
    scaler = StandardScaler()
    features_normalized = scaler.fit_transform(features_flat)
    
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        features_normalized, labels, test_size=0.2, random_state=42, stratify=labels
    )
    
    return X_train, X_test, y_train, y_test

def build_model(input_shape, num_classes):
    """Build a neural network model for gesture classification."""
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def main():
    # Get the run context
    run = Run.get_context()
    
    if hasattr(run, "experiment"):
        # We're in an AzureML job
        ws = run.experiment.workspace
    else:
        # We're running locally (e.g., in JupyterLab)
        ws = Workspace.from_config()  # assumes config.json is present or you're in a bound environment
    
    dataset = Dataset.File.from_files(path=(ws.get_default_datastore(), 'UI/2025-04-07_211604_UTC/**'))
    
    # Mount the dataset
    mount_context = dataset.mount()
    mount_context.start()
    
    try:
        print("Loading data...")
        features, labels = load_data_from_directory(mount_context.mount_point)
        
        if len(features) == 0:
            print("No data found. Please check your data directory.")
            return
        
        print(f"Loaded {len(features)} samples across {len(GESTURE_CLASSES)} classes")
        
        # Preprocess data
        X_train, X_test, y_train, y_test = preprocess_data(features, labels)
        
        # Build and train model
        input_shape = X_train.shape[1]
        model = build_model(input_shape, len(GESTURE_CLASSES))
        
        print("Training model...")
        history = model.fit(
            X_train, y_train,
            epochs=50,
            batch_size=32,
            validation_split=0.2,
            callbacks=[
                tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss',
                    patience=10,
                    restore_best_weights=True
                )
            ]
        )
        
        # Evaluate model
        print("Evaluating model...")
        test_loss, test_acc = model.evaluate(X_test, y_test)
        print(f"Test accuracy: {test_acc:.4f}")
        
        # Log metrics to Azure ML
        run.log("Test Accuracy", test_acc)
        run.log("Test Loss", test_loss)
        
        # Save model
        model.save(MODEL_SAVE_PATH)
        print(f"Model saved to {MODEL_SAVE_PATH}")
        
    finally:
        # Always unmount the dataset
        mount_context.stop()

if __name__ == "__main__":
    main()