In [1]:
import cv2
import traceback
import mediapipe as mp
import cv2
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Detecting Poses with MediaPipe

In [2]:
# Start video capture
cap = cv2.VideoCapture(0)

with mp_pose.Pose(min_detection_confidence=.5, min_tracking_confidence=.5) as pose:
    try:
        # Loop through the video frames
        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()

            if not success: break

            # Run inference on the frame
            results = pose.process(frame)

            if results.pose_landmarks:
                mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            
            cv2.imshow("", frame)

            # Break the loop if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

    except Exception as err:
        traceback.print_exc()
    finally:
        # Release the video capture object and close the display window
        cap.release()
        cv2.destroyAllWindows()



# Realtime labeling: Capture Landmarks and Export to CSV

In [16]:
import csv
import os
import numpy as np
from matplotlib import pyplot as plt

In [17]:
# Create header for csv file
filename = 'pushup-form-data.csv'

landmarks = ['class']
for val in range(1, 33+1): # 33 keypoints
    landmarks += [s + str(val) for s in 'xyzv']

with open('pushup-form-data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(landmarks)

In [18]:
def export_landmarks(results, classification, writer):
    pass
    try:
        keypoints = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
        keypoints.insert(0, classification)
        writer.writerow(keypoints)
    except Exception as e:
        print(e)

In [21]:
import time

delayBetweenFrames = .05 # in seconds

with open(filename, mode='a', newline='') as file:
    writer = csv.writer(file)

    # Start video capture
    cap = cv2.VideoCapture('../videos/pushups-in-class.mp4')

    with mp_pose.Pose(min_detection_confidence=.5, min_tracking_confidence=.5) as pose:
        try:
            # Loop through the video frames
            while cap.isOpened():
                # Read a frame from the video
                success, frame = cap.read()

                if not success: break

                results = pose.process(frame)

                if results.pose_landmarks:
                    mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

                cv2.imshow("", frame)

                time.sleep(delayBetweenFrames)

                # Classify pushup form
                key = cv2.pollKey()
                while cv2.pollKey() != -1: pass # IIRC paint events are only called when the queue is empty
                if key == ord('1'):
                    export_landmarks(results, 'correct', writer)
                elif key == ord('2'):
                    export_landmarks(results, 'too high', writer)
                elif key == ord('3'):
                    export_landmarks(results, 'too low', writer)

                # Break the loop if 'q' is pressed
                if key == ord("q"):
                    break

        except Exception as err:
            traceback.print_exc()
        finally:
            # Release the video capture object and close the display window
            cap.release()
            cv2.destroyAllWindows()



# Train Custom Model Using Scikit Learn

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [23]:
df = pd.read_csv(filename)
df

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,correct,0.228465,0.402040,-0.193817,0.997524,0.218100,0.374460,-0.196640,0.997424,0.219094,...,0.173811,0.641723,0.897886,0.805056,-0.246175,0.954093,0.840877,0.738776,0.118001,0.700969
1,correct,0.228905,0.402087,-0.173807,0.997417,0.217052,0.374032,-0.181596,0.997093,0.217822,...,0.257706,0.580411,0.888719,0.797005,-0.204214,0.938830,0.827460,0.732985,0.203206,0.643513
2,correct,0.227769,0.402103,-0.175530,0.997449,0.215786,0.374105,-0.183545,0.997103,0.216443,...,0.292627,0.572552,0.886678,0.796181,-0.207838,0.937560,0.822506,0.733136,0.245341,0.636662
3,correct,0.222370,0.402109,-0.143537,0.997512,0.211764,0.374100,-0.156263,0.997171,0.212423,...,0.294387,0.563024,0.885340,0.795752,-0.214304,0.937075,0.819029,0.733662,0.244294,0.631308
4,correct,0.219876,0.402115,-0.140136,0.997630,0.209658,0.374114,-0.152775,0.997290,0.210258,...,0.256177,0.561078,0.885094,0.795512,-0.199895,0.935624,0.819996,0.734973,0.197646,0.629004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1829,too high,0.127981,0.346994,-0.118772,0.999361,0.106165,0.319714,-0.142981,0.999283,0.106259,...,0.417956,0.521658,0.817693,0.765944,-0.215383,0.909630,0.782099,0.715701,0.391903,0.557115
1830,too high,0.119469,0.349396,-0.172626,0.999398,0.099061,0.319909,-0.195910,0.999326,0.099864,...,0.409455,0.521683,0.817754,0.765782,-0.139053,0.907888,0.777658,0.722370,0.398759,0.555908
1831,too high,0.118769,0.355052,-0.102192,0.999385,0.097508,0.327009,-0.144487,0.999320,0.098197,...,0.402042,0.520848,0.817683,0.764904,-0.075068,0.904150,0.778390,0.722249,0.389635,0.551315
1832,too high,0.111496,0.357493,-0.123791,0.999408,0.089759,0.327907,-0.157509,0.999350,0.090814,...,0.399080,0.517505,0.817345,0.764544,-0.112449,0.900149,0.766946,0.693730,0.383059,0.545417


In [25]:
X = df.drop('class', axis=1) # features
y = df['class'] # classifications

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=101)

In [26]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

![image](https://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png)

In [31]:
pipelines = {
    'lr': make_pipeline(StandardScaler(), LogisticRegression()),
    'rc': make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf': make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb': make_pipeline(StandardScaler(), GradientBoostingClassifier()),
    'nn': make_pipeline(StandardScaler(), KNeighborsClassifier()),
    'svc': make_pipeline(StandardScaler(), LinearSVC()),
    'sgd': make_pipeline(StandardScaler(), SGDClassifier()),
}

In [32]:
fit_models = {}
for name, pipeline in pipelines.items():
    fit_models[name] = pipeline.fit(X_train, y_train)



# Evaluate and Save

In [33]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
import pickle

In [34]:
for algo, model in fit_models.items():
    y_pred = model.predict(X_test)
    print(algo,
        accuracy_score(y_test.values, y_pred),
        # precision_score(y_test.values, y_pred, average='weighted', pos_label='up'),
        # recall_score(y_test.values, y_pred, average='weighted', pos_label='up')
    )

lr 1.0
rc 1.0
rf 1.0
gb 0.9945553539019963
nn 1.0
svc 0.9981851179673321
sgd 1.0


In [101]:
with open('pushup_butt_height.pkl', 'wb') as f:
    pickle.dump(fit_models['svc'], f)

# Run Model

In [13]:
with open('pushup_butt_height.pkl', 'rb') as f:
    model = pickle.load(f)

In [15]:
# Start video capture
cap = cv2.VideoCapture(0)

with mp_pose.Pose(min_detection_confidence=.5, min_tracking_confidence=.5) as pose:
    try:
        # Loop through the video frames
        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()

            if not success: break

            # Run inference on the frame
            results = pose.process(frame)

            if results.pose_landmarks:
                mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

                keypoints = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten().tolist()
                X = pd.DataFrame([keypoints], columns=landmarks[1:])
                class_ = model.predict(X)[0]
                cv2.putText(frame, class_, (95, 40), cv2.FONT_HERSHEY_SIMPLEX, .5, (255,255,255), 2, cv2.LINE_AA)
            
            
            cv2.imshow("", frame)

            # Break the loop if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

    except Exception as err:
        traceback.print_exc()
    finally:
        # Release the video capture object and close the display window
        cap.release()
        cv2.destroyAllWindows()

