In [9]:
%load_ext cuml.accel

In [25]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report

In [11]:
cols = ['Frame', 'Right Forearm x', 'Right Forearm y', 'Right Forearm z', 'Left Forearm x', 'Left Forearm y', 'Left Forearm z']
boning = pd.read_csv('Boning.csv', usecols=cols)
slicing = pd.read_csv('Slicing.csv', usecols=cols)
boning['class'] = 0
slicing['class'] = 1
df = pd.concat([boning, slicing], ignore_index=True)
df.to_csv('combined_data.csv', index=False)

In [12]:
rf = cols[1:4]
lf = cols[4:7]

def compute_features(df, cols, prefix):
    x, y, z = df[cols[0]], df[cols[1]], df[cols[2]]
    new_features = pd.DataFrame({
        f'{prefix}_RMS_xy': np.sqrt((x**2 + y**2) / 2),
        f'{prefix}_RMS_yz': np.sqrt((y**2 + z**2) / 2),
        f'{prefix}_RMS_zx': np.sqrt((z**2 + x**2) / 2),
        f'{prefix}_RMS_xyz': np.sqrt((x**2 + y**2 + z**2) / 3),
        f'{prefix}_Roll': np.degrees(np.arctan2(y, np.sqrt(x**2 + z**2))),
        f'{prefix}_Pitch': np.degrees(np.arctan2(x, np.sqrt(y**2 + z**2))),
    })
    return new_features

features_rf = compute_features(df, rf, "RF")
features_lf = compute_features(df, lf, "LF")

df_full = pd.concat([df, features_rf, features_lf], axis=1)

ordered_cols = (["Frame"] + rf + lf + list(features_rf.columns) + list(features_lf.columns) + ["class"])
df_full = df_full[ordered_cols]

df_full.to_csv("features_data", index=False)

In [30]:
def extract_features(df, frame_col="Frame", class_col="class", window_size=60):
    feature_list = []
    columns = df.columns[1:-1]
    
    num_windows = len(df) // window_size
    
    for w in range(num_windows):
        start = w * window_size
        end = start + window_size
        window = df.iloc[start:end]
        
        feats = {}
        for col in columns:
            x = window[col].values
            
            feats[f"{col}_mean"] = np.mean(x)
            feats[f"{col}_std"] = np.std(x)
            feats[f"{col}_min"] = np.min(x)
            feats[f"{col}_max"] = np.max(x)
            feats[f"{col}_auc"] = np.trapezoid(x)
            feats[f"{col}_peaks"] = np.sum(
                (np.diff(np.sign(np.diff(x))) < 0).astype(int)
            )
        
        feats[class_col] = window[class_col].mode()[0]
        feats[frame_col] = window[frame_col].iloc[0]
        
        feature_list.append(feats)
    
    feature_df = pd.DataFrame(feature_list)
    
    scaler = StandardScaler()
    num_cols = [c for c in feature_df.columns if c not in [frame_col, class_col]]
    feature_df[num_cols] = scaler.fit_transform(feature_df[num_cols]).astype(float)
    
    return feature_df


feature_df = extract_features(df)
print(feature_df.shape)
print(feature_df.head())


(1201, 38)
   Right Forearm x_mean  Right Forearm x_std  Right Forearm x_min  \
0              0.352959            -0.899852             0.657669   
1             -0.000169            -0.829807             0.603389   
2             -0.283665            -0.351956             0.277242   
3              1.101244            -0.133396             0.284152   
4              0.281863             0.898797            -0.910422   

   Right Forearm x_max  Right Forearm x_auc  Right Forearm x_peaks  \
0            -0.629073             0.378916               0.183689   
1            -0.607178            -0.006398               0.183689   
2            -0.010425            -0.490921              -0.150569   
3            -0.263541             1.265812              -0.819084   
4             0.884435             0.409506               0.852204   

   Right Forearm y_mean  Right Forearm y_std  Right Forearm y_min  \
0              0.040663            -0.846723             0.719695   
1              

In [32]:
def evaluate_model(model, X, y, cv=10):
    # train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42, stratify=y
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc_split = accuracy_score(y_test, y_pred)

    # 10-fold CV
    scores = cross_val_score(model, X, y, cv=cv)
    acc_cv = np.mean(scores)

    return acc_split, acc_cv

In [None]:

def run_svm_experiments(X, y):
    results = []

    acc_split, acc_cv = evaluate_model(SVC(), X, y)
    results.append(["SVM (baseline)", acc_split, acc_cv])

    param_grid = {
        "C": [0.1, 1, 10],
        "gamma": [0.01, 0.1, 1],
        "kernel": ["rbf"]
    }
    grid = GridSearchCV(SVC(), param_grid, cv=5)
    grid.fit(X, y)
    best_svm = grid.best_estimator_

    acc_split, acc_cv = evaluate_model(best_svm, X, y)
    results.append(["SVM (hyper-tuned)", acc_split, acc_cv])

    selector = SelectKBest(score_func=f_classif, k=10)
    X_new = selector.fit_transform(X, y)
    acc_split, acc_cv = evaluate_model(best_svm, X_new, y)
    results.append(["SVM (tuned + KBest 10)", acc_split, acc_cv])

    pca = PCA(n_components=10)
    X_pca = pca.fit_transform(X)
    acc_split, acc_cv = evaluate_model(best_svm, X_pca, y)
    results.append(["SVM (tuned + PCA 10)", acc_split, acc_cv])

    return pd.DataFrame(results, columns=["Model", "Train-test split", "10-fold CV"])

def run_other_classifiers(X, y):
    results = []

    for name, model in [
        ("SGD", SGDClassifier(random_state=1)),
        ("RandomForest", RandomForestClassifier(random_state=1)),
        ("MLP", MLPClassifier(hidden_layer_sizes=(50,50), max_iter=2000, random_state=1))
    ]:
        acc_split, acc_cv = evaluate_model(model, X, y)
        results.append([name, acc_split, acc_cv])

    return pd.DataFrame(results, columns=["Model", "Train-test split", "10-fold CV"])

1 fits failed out of a total of 45.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/home/crep/miniforge3/envs/rapids/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/crep/miniforge3/envs/rapids/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py", line 175, in method
    return self._call_method(name, *args, **kwargs)
           ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/crep/miniforge3/envs/rapids/lib/python3.13/site-packages/cuml/accel/estimator_proxy.py", line 298, in _call

SVM Results:
                    Model  Train-test split  10-fold CV
0          SVM (baseline)          0.836565    0.847638
1       SVM (hyper-tuned)          0.844875    0.852631
2  SVM (tuned + KBest 10)          0.797784    0.808457
3    SVM (tuned + PCA 10)          0.803324    0.806026


In [None]:
X = feature_df.drop(columns=["Frame", "class"]).values
y = feature_df["class"].values

svm_results = run_svm_experiments(X, y)
print("SVM Results:")
print(svm_results)

other_results = run_other_classifiers(X, y)
print("\nOther Classifiers:")
print(other_results)