In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances as distance
import json

In [None]:
stand_csv = pd.read_csv("stand.csv")
stand_csv["keypoint_coords"] = stand_csv["keypoint_coords"].apply(lambda x : json.loads(x))
stand_csv["keypoint_scores"] = stand_csv["keypoint_scores"].apply(lambda x : json.loads(x))
stand_csv

In [None]:
crunch_csv = pd.read_csv("crunch.csv")
crunch_csv["keypoint_coords"] = crunch_csv["keypoint_coords"].apply(lambda x : list(json.loads(x)))
crunch_csv["keypoint_scores"] = crunch_csv["keypoint_scores"].apply(lambda x : list(json.loads(x)))
crunch_csv

In [None]:
pose = stand_csv.iloc[0, 1]
center = [0, 0]
for x, y in pose:
    center[0] += x 
    center[1] += y
center[0] /= len(pose)
center[1] /= len(pose)
features = distance([center], pose)[0]
mx = max(features)
mn = min(features)
for index in range(len(features)):
    features[index] = (features[index] - mn) / (mx-mn)
features 

In [None]:
import posenet.constants as const 
def extract_feature(keypoint_scores, keypoint_coords):
    features = distance(keypoint_coords[0:1], keypoint_coords[1:])[0]
    # normalize
    mx = max(features)
    mn = min(features)
    if mx == 0:
        return np.array([0]*len(keypoint_coords[1:]))

    for index in range(len(features)):
        features[index] = (features[index] - mn) / (mx-mn)
    return features

# derives features from keypoints
stand_csv["features"] = stand_csv.apply(lambda row: extract_feature(row["keypoint_scores"], row["keypoint_coords"]), axis=1)
# derives features from keypoints
crunch_csv["features"] = crunch_csv.apply(lambda row: extract_feature(row["keypoint_scores"], row["keypoint_coords"]), axis=1)


In [None]:
# concat 2 df
# append features and label to the numpy
# train_x = np.concatenate(stand_csv["features"], crunch_csv["features"])

# build 2-classes data
ds_x = []
ds_y = []
for _, row in stand_csv.iterrows():
    ds_x.append(list(row["features"]))
    ds_y.append(0)
for _, row in crunch_csv.iterrows():
    ds_x.append(list(row["features"]))
    ds_y.append(1)

In [None]:
# Remove NaN rows
# NaN happens when very distances are zeroes, just because of normalization
from sklearn.model_selection import train_test_split
ds_x = np.array(ds_x)
ds_y = np.array(ds_y)

selected_non_nan = ~np.isnan(ds_x).any(axis=1)
ds_x = ds_x[selected_non_nan]
ds_y = ds_y[selected_non_nan]

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, log_loss

s_acc = s_loss = 0
def scorer(model, X_test, y_test):
    global s_acc, s_loss
    y_pred = model.predict(X_test)
    s_acc += accuracy_score(y_test, y_pred)

    y_pred = model.predict_proba(X_test)
    s_loss += log_loss(y_test, y_pred)
    return s_acc

def score_dataset(dataset_name, model, X, y, cv=None):
    global s_acc, s_loss
    if cv:
        s_acc = s_loss = 0
        scores = cross_val_score(model, X, y, cv=cv, scoring=scorer)
        mean_acc = s_acc/cv
        mean_loss = s_loss/cv
        print("%12s %2d-folds = %.3f with loss = %.3f" % (dataset_name, cv, mean_acc, mean_loss))
        return mean_acc, mean_loss
    else:
        for nFold in range(2, 10+1):
            scores = cross_val_score(model, X, y, cv=nFold)
            print("%s %2d-folds = %.3f" % (dataset_name, nFold, np.mean(scores)))

In [None]:
import pandas as pd
from sklearn import tree
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

models = {
    # 'kNN': KNeighborsClassifier(n_neighbors=2),
    # 'GaussianNB': GaussianNB(),
    # 'DecisionTree': tree.DecisionTreeClassifier(criterion="gini"),
    # 'Bagging': BaggingClassifier(base_estimator=tree.DecisionTreeClassifier(), n_estimators=100),
    # 'AdamBoost': AdaBoostClassifier(base_estimator=tree.DecisionTreeClassifier(max_depth=2), n_estimators=100),
    'RandomForest': RandomForestClassifier(n_estimators=100),
    # 'SVM_Linear': svm.SVC(kernel='linear', C=1000, probability=True),
    # 'SVM_Poly': svm.SVC(kernel='poly', C=100000, probability=True),
    # 'SVM_RBF': svm.SVC(kernel='rbf', C=100000, gamma=0.01, probability=True),
    # 'SVM_Sigmoid': svm.SVC(kernel='sigmoid', C=100000, gamma=0.0001, probability=True)
}

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for model in models.items():
    mean_acc, mean_loss = score_dataset(model[0], model[1], ds_x, ds_y, cv=10)
    log_entry = pd.DataFrame([[model[0], mean_acc*100, mean_loss]], columns=log_cols)
    log = log.append(log_entry)

In [None]:
print(log)
log.to_csv('log.csv', index=False)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (16, 6)
plt.subplots_adjust(wspace=0.5)

# Plot Accuracy Figure
plt.subplot(1, 2, 1)
sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")
plt.xlabel('Accuracy %')
plt.title('Classifier Accuracy')

# Plot Log Loss Figure
plt.subplot(1, 2, 2)
sns.set_color_codes("muted")
sns.barplot(x='Log Loss', y='Classifier', data=log, color="g")
plt.xlabel('Log Loss')
plt.title('Classifier Log Loss')

In [None]:
from sklearn.model_selection import train_test_split

# Sử dụng nghi thức Hold-out
model = RandomForestClassifier(n_estimators=50)
model.fit(ds_x, ds_y)

# output model
import pickle
file_name = 'stand_crunch.model'
with open(file_name, 'wb') as f:
    pickle.dump(model, f)
# Export model
# dump(model, 'diabetes.model')