In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import sklearn
from sklearn.linear_model import Lasso, RidgeClassifier
import numpy as np
from sklearn.utils import resample

In [None]:
file_path = "posture_data.csv"  # Adjust the path as needed
data = pd.read_csv(file_path)
#data = data.sample(frac=1, random_state=42).reset_index(drop=True)

class_counts = data['posture'].value_counts()

# Display the counts
print(f"Number of 'Standing' samples: {class_counts.get('Standing', 0)}")
print(f"Number of 'Sitting' samples: {class_counts.get('Sitting', 0)}")


Number of 'Standing' samples: 467
Number of 'Sitting' samples: 511


In [None]:
label_encoder = LabelEncoder()

# Separate the classes
sitting_data = data[data['posture'] == 'Sitting']
standing_data = data[data['posture'] == 'Standing']

class_counts = data['posture'].value_counts()




# Balance the classes (Downsample to the smaller class size)
if len(sitting_data) > len(standing_data):
    sitting_data = resample(sitting_data, replace=False, n_samples=len(standing_data), random_state=42)
elif len(standing_data) > len(sitting_data):
    standing_data = resample(standing_data, replace=False, n_samples=len(sitting_data), random_state=42)

# Combine the balanced data
balanced_data = pd.concat([sitting_data, standing_data])

sitting_data = balanced_data[balanced_data['posture'] == 'Sitting']
standing_data = balanced_data[balanced_data['posture'] == 'Standing']
print(f"Sitting samples B: {len(sitting_data)}")
print(f"Standing samples B : {len(standing_data)}")


Sitting samples B: 467
Standing samples B : 467


In [None]:

# Prepare features (X) and target (y)
X = balanced_data.iloc[:, :-1]
y = balanced_data.iloc[:, -1]

# Encode labels

y_encoded = label_encoder.fit_transform(y)

# Split into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
#X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
#X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

y_train

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,

In [None]:
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000, C=1.0),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Classifier (SVC)": SVC( ),
    "Decision Tree": DecisionTreeClassifier(max_depth=5),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, max_depth=5),

    "Ridge": RidgeClassifier(),
    "GBC": GradientBoostingClassifier(),
    "Voting Classifier": sklearn.ensemble.VotingClassifier(estimators=[('rf', RandomForestClassifier()), ('lr', LogisticRegression()), ('rc', RidgeClassifier()), ('gb', GradientBoostingClassifier())])
}


In [None]:
results = {}
for name, model in classifiers.items():
    print(f"\nModel: {name}")
    # Cross-validation
    cv_scores = cross_val_score(model, X_train, y_train,  scoring='f1')
    print(f"Cross-Validation Accuracy: {cv_scores.mean():.2f} (+/- {cv_scores.std():.2f})")

    # Fit on training data
    model.fit(X_train, y_train)
    # Validate on validation set
    y_val_pred = model.predict(X_test)
    val_accuracy = accuracy_score(y_test, y_val_pred)
    results[name] = val_accuracy
    print(f"Validation Set Accuracy: {val_accuracy:.2f}")
    print("Classification Report:\n", classification_report(y_test, y_val_pred, target_names=label_encoder.classes_))



Model: Random Forest
Cross-Validation Accuracy: 1.00 (+/- 0.00)
Validation Set Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

     Sitting       1.00      1.00      1.00        95
    Standing       1.00      1.00      1.00        92

    accuracy                           1.00       187
   macro avg       1.00      1.00      1.00       187
weighted avg       1.00      1.00      1.00       187


Model: Logistic Regression
Cross-Validation Accuracy: 1.00 (+/- 0.00)
Validation Set Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

     Sitting       1.00      1.00      1.00        95
    Standing       1.00      1.00      1.00        92

    accuracy                           1.00       187
   macro avg       1.00      1.00      1.00       187
weighted avg       1.00      1.00      1.00       187


Model: K-Nearest Neighbors
Cross-Validation Accuracy: 1.00 (+/- 0.00)
Validation Set Accuracy: 1.00
C

In [None]:
print("\nFinal Model Comparison:")
for name, accuracy in results.items():
    print(f"{name}: {accuracy:.2f}")


Final Model Comparison:
Random Forest: 1.00
Logistic Regression: 1.00
K-Nearest Neighbors: 1.00
Support Vector Classifier (SVC): 1.00
Decision Tree: 1.00
Gradient Boosting: 1.00
Ridge: 1.00
GBC: 1.00
Voting Classifier: 1.00


In [None]:
import joblib
from google.colab import files

# Save models
def save_models(classifiers, file_names):
    for name, model in classifiers.items():
        file_name = f"{file_names[name]}.joblib"
        joblib.dump(model, file_name)
        print(f"Model '{name}' saved as {file_name}")



# Define filenames for each model
file_names = {
    "Random Forest": "random_forest_model",
    "Logistic Regression": "logistic_regression_model",
    "K-Nearest Neighbors": "knn_model",
    "Support Vector Classifier (SVC)": "svc_model",
    "Decision Tree": "decision_tree_model",
    "Gradient Boosting": "gradient_boosting_model",

    "Ridge": "RidgeClassifier",
    "GBC": "GradientBoostingClassifier",
    "Voting Classifier": "voter"
    }

# Save all models
save_models(classifiers, file_names)

# Download saved models
for name, file_name in file_names.items():
   files.download(f"{file_name}.joblib")


Model 'Random Forest' saved as random_forest_model.joblib
Model 'Logistic Regression' saved as logistic_regression_model.joblib
Model 'K-Nearest Neighbors' saved as knn_model.joblib
Model 'Support Vector Classifier (SVC)' saved as svc_model.joblib
Model 'Decision Tree' saved as decision_tree_model.joblib
Model 'Gradient Boosting' saved as gradient_boosting_model.joblib
Model 'Ridge' saved as RidgeClassifier.joblib
Model 'GBC' saved as GradientBoostingClassifier.joblib
Model 'Voting Classifier' saved as voter.joblib


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
rf_model = classifiers["Random Forest"]
feature_importances = rf_model.feature_importances_
print("\nFeature Importances (Random Forest):")
for feature, importance in zip(data.columns[:-1], feature_importances):
    print(f"{feature}: {importance:.4f}")