In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.preprocessing import label_binarize
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.models import load_model
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

In [10]:
df_windows = pd.read_csv('./data/final_window_labels.csv') 

In [11]:
# Define concept columns
discrete_columns = ['periodicity', 'temporal_stability', 'coordination', 'static_posture']
continuous_columns = ['motion_intensity', 'vertical_dominance']

# Combine directly (already numeric and bounded)
X_final = df_windows[discrete_columns + continuous_columns].values

# Prepare labels
y_label_str = df_windows['activity'].values
activity_mapping = {act: i for i, act in enumerate(np.unique(y_label_str))}
y_label = np.array([activity_mapping[act] for act in y_label_str])

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y_label, test_size=0.25, random_state=42, stratify=y_label
)

# Helper function to calculate AUROC for multi-class problems
def calculate_auroc(y_true, y_pred_proba, num_classes):
    """
    Calculate AUROC for multi-class classification
    """
    # Binarize the labels for multi-class AUROC calculation
    y_true_bin = label_binarize(y_true, classes=range(num_classes))
    
    # Calculate AUROC using one-vs-rest approach
    if y_true_bin.shape[1] == 1:
        # Binary case
        return roc_auc_score(y_true_bin, y_pred_proba[:, 1])
    else:
        # Multi-class case
        return roc_auc_score(y_true_bin, y_pred_proba, multi_class='ovr', average='macro')

In [12]:
# ---------- Neural Network ----------
num_classes = len(activity_mapping)
y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=num_classes)

model = Sequential([
    Dense(16, activation='relu', input_shape=(X_final.shape[1],)),
    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(
    X_train, y_train_cat,
    validation_data=(X_test, y_test_cat),
    epochs=200,
    batch_size=16,
    verbose=0
)

y_pred_probs = model.predict(X_test)
y_pred_nn = np.argmax(y_pred_probs, axis=1)

print("=== Neural Network ===")
print("Accuracy:", accuracy_score(y_test, y_pred_nn))
print("AUROC:", calculate_auroc(y_test, y_pred_probs, num_classes))
print(classification_report(y_test, y_pred_nn))
print(confusion_matrix(y_test, y_pred_nn))
print()

# ---------- Decision Tree ----------
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
y_pred_proba_dt = dt.predict_proba(X_test)

print("=== Decision Tree ===")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("AUROC:", calculate_auroc(y_test, y_pred_proba_dt, num_classes))
print(classification_report(y_test, y_pred_dt))
print(confusion_matrix(y_test, y_pred_dt))
print()

# ---------- Logistic Regression ----------
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_pred_proba_lr = lr.predict_proba(X_test)

print("=== Logistic Regression ===")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("AUROC:", calculate_auroc(y_test, y_pred_proba_lr, num_classes))
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

# ---------- Random Forest ----------
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42,
)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_pred_proba_rf = rf.predict_proba(X_test)

print("=== Random Forest ===")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("AUROC:", calculate_auroc(y_test, y_pred_proba_rf, num_classes))
print(classification_report(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))
print()

# ---------- XGBoost ----------
xgb = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=4,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
y_pred_proba_xgb = xgb.predict_proba(X_test)

print("=== XGBoost ===")
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("AUROC:", calculate_auroc(y_test, y_pred_proba_xgb, num_classes))
print(classification_report(y_test, y_pred_xgb))
print(confusion_matrix(y_test, y_pred_xgb))
print()

# ---------- SVM (RBF Kernel) ----------
svm = SVC(kernel='rbf', C=10, gamma='scale', probability=True, random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
y_pred_proba_svm = svm.predict_proba(X_test)

print("=== SVM (RBF) ===")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("AUROC:", calculate_auroc(y_test, y_pred_proba_svm, num_classes))
print(classification_report(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
=== Neural Network ===
Accuracy: 0.39473684210526316
AUROC: 0.8397297427035331
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.25      0.29      0.27         7
           2       0.83      0.83      0.83         6
           3       0.56      0.83      0.67         6
           4       0.38      0.50      0.43         6
           5       0.00      0.00      0.00         7

    accuracy                           0.39        38
   macro avg       0.34      0.41      0.37        38
weighted avg       0.32      0.39      0.35        38

[[0 0 0 2 3 1]
 [1 2 0 0 1 3]
 [0 0 5 1 0 0]
 [0 0 1 5 0 0]
 [0 0 0 1 3 2]
 [0 6 0 0 1 0]]

=== Decision Tree ===
Accuracy: 0.5789473684210527
AUROC: 0.7456117191500256
              precision    recall  f1-score   support

           0       0.33      0.50      0.40         6
           1       0.80     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


=== XGBoost ===
Accuracy: 0.5789473684210527
AUROC: 0.8517865143369177
              precision    recall  f1-score   support

           0       0.33      0.33      0.33         6
           1       1.00      0.71      0.83         7
           2       0.71      0.83      0.77         6
           3       0.75      0.50      0.60         6
           4       0.50      0.50      0.50         6
           5       0.40      0.57      0.47         7

    accuracy                           0.58        38
   macro avg       0.62      0.58      0.58        38
weighted avg       0.62      0.58      0.59        38

[[2 0 0 0 2 2]
 [0 5 0 0 0 2]
 [0 0 5 1 0 0]
 [2 0 1 3 0 0]
 [0 0 1 0 3 2]
 [2 0 0 0 1 4]]

=== SVM (RBF) ===
Accuracy: 0.5263157894736842
AUROC: 0.8597630248335895
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.57      0.57      0.57         7
           2       0.83      0.83      0.83         6
    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [13]:
model.save("concepts_to_true_labels_model.keras")
print("\nModel saved as 'concepts_to_true_labels_model.keras'")


Model saved as 'concepts_to_true_labels_model.keras'
