In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

def classify_activity(features_path, features_list, classification_method):
    # Load data
    data = pd.read_csv(features_path)
    X = data[features_list]
    y = data['Activity']
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Select classifier based on the specified method
    if classification_method == 'decision_tree':
        classifier = DecisionTreeClassifier(random_state=42)
    elif classification_method == 'random_forest':
        classifier = RandomForestClassifier(random_state=42)
    elif classification_method == 'svm':
        classifier = SVC(random_state=42)
    else:
        raise ValueError("Invalid classification method")
    
    # Train the classifier and predict
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    
    # Evaluate the model
    conf_matrix = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    
    # Display confusion matrix
    # plt.figure(figsize=(8, 6))
    # sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", 
    #             xticklabels=classifier.classes_, yticklabels=classifier.classes_)
    # plt.xlabel("Predicted Label")
    # plt.ylabel("True Label")
    # plt.title(f"Confusion Matrix - {classification_method.capitalize()}")
    #plt.show()
    
    #print(f"\nAccuracy ({classification_method.capitalize()}):\n", accuracy)
    
    return accuracy

# Example usage:
print("Decision Tree Results:")
classify_activity('features_window4.csv', ['mean_x', 'std_x', 'mean_y', 'std_y', 'mean_z', 'std_z', 'median_x', 'median_y', 'median_z', 'root_mean_square_x', 'root_mean_square_y', 'root_mean_square_z'], 'decision_tree')

print("\nRandom Forest Results:")
classify_activity('features.csv', ['mean_x', 'std_x', 'mean_y', 'std_y', 'mean_z', 'std_z'], 'random_forest')

print("\nSVM Results:")
classify_activity('features.csv', ['mean_x', 'std_x', 'mean_y', 'std_y', 'mean_z', 'std_z'], 'svm')


Decision Tree Results:

Random Forest Results:

SVM Results:


0.9172413793103448

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score

def classify_with_sfs(features_path, features_list, time_interval):
    # Load data
    data = pd.read_csv(features_path)
    X = data[features_list]
    y = data['Activity']
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize classifier (Decision Tree in this case)
    classifier = DecisionTreeClassifier(random_state=42)
    
    # Sequential Feature Selector to find the best feature subset
    sfs = SequentialFeatureSelector(classifier, n_features_to_select="auto", direction="forward", scoring="accuracy", cv=None)
    sfs.fit(X_train, y_train)
    
    # Get the best feature subset
    best_features = list(X_train.columns[sfs.get_support()])
    print("Best feature subset:", best_features)
    
    # Train the classifier on the selected features
    classifier.fit(X_train[best_features], y_train)
    y_pred = classifier.predict(X_test[best_features])
    
    # Evaluate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print("\nAccuracy with best feature subset:", accuracy)
    
    return best_features, accuracy

# Example usage with the best time interval and initial feature set
best_features, accuracy = classify_with_sfs('features_window4.csv', 
                                            ['mean_x', 'std_x', 'mean_y', 'std_y', 'mean_z', 'std_z', 'median_x', 'median_y', 'median_z', 'root_mean_square_x', 'root_mean_square_y', 'root_mean_square_z'], 
                                            time_interval=4)


In [None]:
def sequential_feature_selection(features_path, candidate_features, classification_method='decision_tree'):
    best_features = []
    best_accuracy = 0
    feature_accuracies = {}

    for i in range(len(candidate_features)):
        best_feature_in_round = None
        for feature in candidate_features:
            if feature in best_features:
                continue
            # Test this feature combination
            current_features = best_features + [feature]
            accuracy = classify_activity(features_path, current_features, classification_method)
            
            # Track best feature in this round
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature_in_round = feature

        # If no improvement, stop the selection
        if best_feature_in_round is None:
            break
        
        # Update best features and accuracy
        best_features.append(best_feature_in_round)
        feature_accuracies[tuple(best_features)] = best_accuracy
        print(f"Selected feature: {best_feature_in_round}, Accuracy: {best_accuracy}")
        
candidate_features = ['mean_x', 'std_x', 'median_x', 'root_mean_square_x', 'mean_y', 'std_y', 'median_y', 'root_mean_square_y', 'mean_z', 'std_z', 'median_z', 'root_mean_square_z']


print("Decision Tree Results:\n")
sequential_feature_selection('features_window4.csv', candidate_features, classification_method='decision_tree')
print("\n")

print("Random Forest Results:\n")
sequential_feature_selection('features_window4.csv', candidate_features, classification_method='random_forest')
print("\n")


print("SVM Results:\n")
sequential_feature_selection('features_window4.csv', candidate_features, classification_method='svm')
print("\n")



Decision Tree Results:

Selected feature: mean_x, Accuracy: 0.8586206896551725
Selected feature: mean_z, Accuracy: 0.906896551724138
Selected feature: root_mean_square_y, Accuracy: 0.9344827586206896
Selected feature: median_z, Accuracy: 0.9448275862068966
Selected feature: median_x, Accuracy: 0.9586206896551724
Random Forest Results:

Selected feature: mean_x, Accuracy: 0.8586206896551725
Selected feature: median_z, Accuracy: 0.9310344827586207
Selected feature: mean_y, Accuracy: 0.9448275862068966
Selected feature: mean_z, Accuracy: 0.9586206896551724
Selected feature: std_z, Accuracy: 0.9620689655172414
SVM Results:

Selected feature: median_x, Accuracy: 0.896551724137931
Selected feature: mean_z, Accuracy: 0.9241379310344827
Selected feature: std_y, Accuracy: 0.9379310344827586


In [None]:
['mean_y', 'std_y', 'mean_z', 'std_z', 'median_x', 'root_mean_square_z']
