In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

In [2]:
def preprocess_dataset(dataset_path):
    df = pd.read_csv(dataset_path)
    
    for col in df.columns[1:]:
        df[col] = df[col].apply(lambda x: tuple(map(float, x.strip('()').split(','))))

    
    labels = df['Label']
    features = df.drop(columns=['Label'])

    
    for i in range(1, 22):
        features[f'Keypoint_{i}_x'] = features[f'Keypoint_{i}'].apply(lambda x: x[0])
        features[f'Keypoint_{i}_y'] = features[f'Keypoint_{i}'].apply(lambda x: x[1])

    
    features.drop(columns=df.columns[1:22], inplace=True)

    return features, labels


In [3]:

def save_model(model, model_filename):
    joblib.dump(model, model_filename)


In [4]:
def train_multiple_models(features, labels):
    X = features
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(labels)
    
    label_encoder_filename = '../label_encoder.joblib'
    save_model(label_encoder, label_encoder_filename)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    classifiers = {
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
        'Decision Tree': DecisionTreeClassifier(random_state=42),
        'SVM': SVC(kernel='linear', random_state=42),
        'KNN': KNeighborsClassifier(),
        'GBM': GradientBoostingClassifier(random_state=42),
        'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
        'Neural Network': MLPClassifier(max_iter=1000, random_state=42)
    }
    
    for name, clf in classifiers.items():
        print(f"Training {name}...")
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        report = classification_report(y_test, y_pred)
        print(report)
        model_filename = f'../trained_{name.replace(" ", "_")}.joblib'
        save_model(clf, model_filename)


In [5]:
train_csv_path = '../train_data.csv'
test_csv_path = '../test_data.csv'

train_features, train_labels = preprocess_dataset(train_csv_path)
test_features, test_labels = preprocess_dataset(test_csv_path)

train_multiple_models(train_features, train_labels)


Training Random Forest...
              precision    recall  f1-score   support

           0       0.95      0.91      0.93       106
           1       0.98      1.00      0.99       125
           2       0.98      0.98      0.98       128
           3       0.95      0.94      0.95       124
           4       0.95      0.95      0.95       131
           5       0.99      0.99      0.99       164
           6       0.97      0.97      0.97       148
           7       0.97      0.99      0.98       142
           8       1.00      0.99      0.99        94
           9       0.93      0.99      0.96        92
          10       0.99      0.98      0.98       131
          11       1.00      0.99      0.99       134
          12       0.89      0.90      0.90        84
          13       0.98      0.98      0.98       126
          14       0.89      0.96      0.93        97
          15       0.91      0.93      0.92        68
          16       1.00      0.90      0.95        30
 