In [108]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances as distance
import json

In [109]:
stand_csv = pd.read_csv("stand.csv")
stand_csv["keypoint_coords"] = stand_csv["keypoint_coords"].apply(lambda x : json.loads(x))
stand_csv["keypoint_scores"] = stand_csv["keypoint_scores"].apply(lambda x : json.loads(x))
stand_csv

Unnamed: 0.1,Unnamed: 0,keypoint_coords,keypoint_scores
0,0,"[[41.17484540717547, 424.118377831564], [18.37...","[0.9492308497428894, 0.9189706444740295, 0.951..."
1,1,"[[65.42472422400186, 418.31286562686904], [31....","[0.960146427154541, 0.8650374412536621, 0.9544..."
2,2,"[[62.49335976534112, 408.9349744878888], [32.9...","[0.9485349059104919, 0.8589192628860474, 0.948..."
3,3,"[[59.07950964639353, 403.96868692060406], [35....","[0.9301652908325195, 0.8851189613342285, 0.948..."
4,4,"[[57.207370048345524, 397.7698863636364], [33....","[0.9177314043045044, 0.8564090132713318, 0.945..."
...,...,...,...
2318,2318,"[[82.09326500116393, 988.3409228165184], [12.7...","[0.818300724029541, 0.3718859553337097, 0.7361..."
2319,2319,"[[93.70561102933662, 986.9426278292277], [31.7...","[0.8564101457595825, 0.5420486927032471, 0.797..."
2320,2320,"[[78.38059891101926, 1002.6481942354777], [-5....","[0.7710866928100586, 0.16072413325309753, 0.71..."
2321,2321,"[[53.604439668877184, 997.8381508275082], [-13...","[0.760976254940033, 0.1255340278148651, 0.6220..."


In [110]:
crunch_csv = pd.read_csv("crunch.csv")
crunch_csv["keypoint_coords"] = crunch_csv["keypoint_coords"].apply(lambda x : list(json.loads(x)))
crunch_csv["keypoint_scores"] = crunch_csv["keypoint_scores"].apply(lambda x : list(json.loads(x)))
crunch_csv

Unnamed: 0.1,Unnamed: 0,keypoint_coords,keypoint_scores
0,0,"[[179.8922788043355, 626.91329737029], [171.97...","[0.9750418663024902, 0.9369086027145386, 0.767..."
1,1,"[[201.41093785263772, 617.0603745747982], [185...","[0.9682409763336182, 0.9340900182723999, 0.962..."
2,2,"[[215.5135328437007, 609.2460128565154], [198....","[0.9566459655761719, 0.9162812232971191, 0.944..."
3,3,"[[238.0826816558838, 614.4344318882701], [219....","[0.9483922719955444, 0.8992764949798584, 0.933..."
4,4,"[[246.39222340251123, 615.9106883363861], [226...","[0.9549083709716797, 0.9076463580131531, 0.941..."
...,...,...,...
1795,1795,"[[439.6655964740487, 694.0105106043474], [416....","[0.949302613735199, 0.8536398410797119, 0.9496..."
1796,1796,"[[438.34243146763293, 696.7001079942621], [416...","[0.9452518224716187, 0.8464301824569702, 0.947..."
1797,1797,"[[442.92277307288595, 702.4441615930584], [419...","[0.9377988576889038, 0.8237844705581665, 0.945..."
1798,1798,"[[444.2876731739488, 698.6436338287791], [419....","[0.9339144229888916, 0.8247878551483154, 0.939..."


In [112]:
def extract_feature(keypoint_coords):
    features = distance(keypoint_coords[0:1], keypoint_coords[1:])[0]
    # normalize
    mx = max(features)
    mn = min(features)
    for index in range(len(features)):
        features[index] = (features[index] - mn) / (mx-mn)
    return features

# derives features from keypoints
stand_csv["features"] = stand_csv.apply(lambda row: extract_feature(row["keypoint_coords"]), axis=1)
# derives features from keypoints
crunch_csv["features"] = crunch_csv.apply(lambda row: extract_feature(row["keypoint_coords"]), axis=1)


  import sys


In [113]:
# concat 2 df
# append features and label to the numpy
# train_x = np.concatenate(stand_csv["features"], crunch_csv["features"])

# build 2-classes data
ds_x = []
ds_y = []
for _, row in stand_csv.iterrows():
    ds_x.append(list(row["features"]))
    ds_y.append(0)
for _, row in crunch_csv.iterrows():
    ds_x.append(list(row["features"]))
    ds_y.append(1)

In [114]:
# Remove NaN rows
# NaN happens when very distances are zeroes, just because of normalization
from sklearn.model_selection import train_test_split
ds_x = np.array(ds_x)
ds_y = np.array(ds_y)

selected_non_nan = ~np.isnan(ds_x).any(axis=1)
ds_x = ds_x[selected_non_nan]
ds_y = ds_y[selected_non_nan]

In [116]:
from sklearn.svm import SVC
def new_model():
    return SVC(C=1000, coef0=0.01)

In [117]:
from sklearn.model_selection import KFold 
from sklearn.metrics import accuracy_score, precision_score, recall_score

sum_acc = 0

kf = KFold(n_splits=10, shuffle=True)

count = 0
for train_index, test_index in kf.split(ds_x):
    count += 1
    print("Evaluating %d-th" % count)
    clf = new_model()
    train_x, test_x, train_y, test_y = ds_x[train_index], ds_x[test_index], ds_y[train_index], ds_y[test_index]
    clf.fit(train_x, train_y)
    # evaluate model
    pred_y = clf.predict(test_x)
    acc = accuracy_score(test_y, pred_y)*100
    precision = precision_score(test_y, pred_y) *100
    recall = recall_score(test_y, pred_y)*100
    f1 = 2*precision*recall/(precision + recall)
    sum_acc += acc
    print("accuracy : %.2f%%" % acc)
    print("precision: %.2f%%" % precision)
    print("recall   : %.2f%%" % recall)
    print("f1       : %.2f%%" % f1)
    print("=====")

print("Avg acc: %.2f" % (sum_acc / kf.n_splits))

Evaluating 1-th
accuracy : 99.52%
precision: 100.00%
recall   : 98.88%
f1       : 99.44%
=====
Evaluating 2-th
accuracy : 100.00%
precision: 100.00%
recall   : 100.00%
f1       : 100.00%
=====
Evaluating 3-th
accuracy : 99.76%
precision: 100.00%
recall   : 99.41%
f1       : 99.70%
=====
Evaluating 4-th
accuracy : 98.79%
precision: 98.82%
recall   : 98.24%
f1       : 98.53%
=====
Evaluating 5-th
accuracy : 100.00%
precision: 100.00%
recall   : 100.00%
f1       : 100.00%
=====
Evaluating 6-th
accuracy : 99.76%
precision: 100.00%
recall   : 99.44%
f1       : 99.72%
=====
Evaluating 7-th
accuracy : 99.27%
precision: 98.92%
recall   : 99.46%
f1       : 99.19%
=====
Evaluating 8-th
accuracy : 99.51%
precision: 98.94%
recall   : 100.00%
f1       : 99.47%
=====
Evaluating 9-th
accuracy : 99.51%
precision: 98.74%
recall   : 100.00%
f1       : 99.37%
=====
Evaluating 10-th
accuracy : 99.76%
precision: 100.00%
recall   : 99.47%
f1       : 99.73%
=====
Avg acc: 99.59


In [118]:
# train for product-ready model
clf = new_model()
clf.fit(ds_x, ds_y)

SVC(C=1000, coef0=0.01)

In [120]:
# output model
import pickle
file_name = 'stand_crunch.model'
with open(file_name, 'wb') as f:
    pickle.dump(clf, f)