# Body Language Decoder

## Table of Content
0. [Import Dependencies](#install)
1. [Detection using MediaPipe](#detection) 
2. [Feature Extraction](#feature-extraction) 
    1. [Write Columns Head in CSV File](#csv-header)
    2. [Extract Features of Assigned Class](#save-coordinates)
3. [Train Custom Model Using Scikit Learn](#model)
    1. [Load and Preprocess Input Data](#load-input)
    2. [Train Machine Learning Classification Models](#training)
    3. [Evaluate and Serialize Model](#evaluate)
4. [Real-time Detections with Model](#real-time-detection)

## 0. Import Dependencies <a id="install"></a>

In [1]:
from mediapipe import solutions as mp
import cv2
import csv
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import pickle
import os

## 1. Detection using MediaPipe <a id="detection"></a>

In [3]:
#Check Parameter that can put
mp.holistic.Holistic??

In [2]:
cap = cv2.VideoCapture(0)

# Initiate holistic model
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)

        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))

        cv2.imshow("Holistic Model Detection", image)
        
        # Press "q" to exit
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [3]:
results.pose_landmarks

landmark {
  x: 0.5843711
  y: 0.56119466
  z: -1.611466
  visibility: 0.99913055
}
landmark {
  x: 0.62362343
  y: 0.49369425
  z: -1.6057202
  visibility: 0.9985631
}
landmark {
  x: 0.6421812
  y: 0.49956048
  z: -1.6053894
  visibility: 0.9985148
}
landmark {
  x: 0.6663171
  y: 0.50675076
  z: -1.6062084
  visibility: 0.9984906
}
landmark {
  x: 0.562326
  y: 0.4816672
  z: -1.6020733
  visibility: 0.99849623
}
landmark {
  x: 0.54021674
  y: 0.47851017
  z: -1.6030219
  visibility: 0.9984915
}
landmark {
  x: 0.51813495
  y: 0.47513095
  z: -1.6035877
  visibility: 0.99841464
}
landmark {
  x: 0.702864
  y: 0.5499426
  z: -1.2511183
  visibility: 0.9983488
}
landmark {
  x: 0.4867033
  y: 0.51005733
  z: -1.2308837
  visibility: 0.998183
}
landmark {
  x: 0.61857384
  y: 0.66376466
  z: -1.4530115
  visibility: 0.99908644
}
landmark {
  x: 0.5398247
  y: 0.6480311
  z: -1.4496695
  visibility: 0.99903387
}
landmark {
  x: 0.842206
  y: 0.90674096
  z: -0.8773053
  visibility: 0.9

## 2. Feature Extraction <a id="feature-extraction"></a>

### Write Columns Head in CSV File <a id="csv-header"></a>

In [4]:
num_coords = len(results.pose_landmarks.landmark) + len(results.face_landmarks.landmark)
num_coords

501

In [5]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val),
                  'z{}'.format(val), 'v{}'.format(val)]

In [6]:
if not os.path.exists("data"):
    os.mkdir("data")

with open("data/body_language_coords.csv", mode="w", newline="" ) as f:
    csv_writer = csv.writer(f, delimiter=",", quotechar='"',quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

### Extract Features of Assigned Class <a id="save-coordinates"></a>

- Repeat this step to save features of different targets.
- Make sure your samples cover different scenario of the target.

In [9]:
class_name = "call me"

In [11]:
class_name = "okay"
cap = cv2.VideoCapture(0)
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break
            
        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)

        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))
        
        # Export coordinates
        if results.pose_landmarks and results.face_landmarks:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in pose]).flatten())

                
            # Extract face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in face]).flatten())
            
#             # Extract right hand landmarks
#             right_hand = results.right_hand_landmarks.landmark
#             right_hand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
#                                             for landmark in right_hand]).flatten())
           
            # Concate row
            row = pose_row + face_row 
            
            # Append class name
            row.insert(0, class_name)
            
            # Export to CVS
            with open("data/body_language_coords.csv", mode="a", newline="" ) as f:
                csv_writer = csv.writer(f, delimiter=",", quotechar='"',quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
        
        cv2.imshow("Holistic Model Detection", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## 3. Train Custom Model Using Scikit Learn <a id="model"></a>

### Load and Preprocess Input Data <a id="load-input"></a>

In [12]:
df = pd.read_csv("data/body_language_coords.csv")
df

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,hello,0.398455,0.600279,-1.055155,0.999973,0.433070,0.539880,-1.045254,0.999941,0.453345,...,-0.004736,0.0,0.478617,0.538527,0.007579,0.0,0.484838,0.533906,0.007439,0.0
1,hello,0.397785,0.600784,-1.266790,0.999941,0.433034,0.540222,-1.254986,0.999885,0.453347,...,-0.003540,0.0,0.477038,0.530903,0.009073,0.0,0.482944,0.526155,0.008975,0.0
2,hello,0.397164,0.600634,-1.310272,0.999861,0.432457,0.539895,-1.300587,0.999757,0.453116,...,-0.003812,0.0,0.471257,0.522302,0.007390,0.0,0.477177,0.517328,0.007134,0.0
3,hello,0.395314,0.600402,-1.245809,0.999812,0.429674,0.539133,-1.231977,0.999691,0.451041,...,-0.002065,0.0,0.468842,0.516510,0.010164,0.0,0.474540,0.511191,0.010064,0.0
4,hello,0.394182,0.599554,-1.247712,0.999774,0.427174,0.537644,-1.235662,0.999635,0.449047,...,-0.001986,0.0,0.465738,0.513554,0.010410,0.0,0.471411,0.508272,0.010352,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,okay,0.510028,0.611455,-1.216142,0.999992,0.537551,0.544752,-1.198435,0.999992,0.557770,...,-0.009392,0.0,0.574701,0.535402,0.004498,0.0,0.579662,0.529117,0.004538,0.0
295,okay,0.511650,0.608248,-1.252818,0.999992,0.539802,0.541962,-1.233470,0.999992,0.560164,...,-0.007518,0.0,0.582852,0.521825,0.007331,0.0,0.588122,0.515606,0.007415,0.0
296,okay,0.513868,0.602867,-1.304193,0.999992,0.542164,0.536599,-1.288110,0.999992,0.562877,...,-0.005146,0.0,0.587349,0.516700,0.011592,0.0,0.592424,0.511196,0.011807,0.0
297,okay,0.514789,0.595165,-1.269664,0.999993,0.544227,0.529418,-1.250440,0.999992,0.565324,...,-0.002919,0.0,0.590335,0.510601,0.014454,0.0,0.595509,0.505495,0.014687,0.0


In [13]:
X = df.drop("class", axis=1)
y = df["class"]

In [14]:
y.value_counts()

call me    114
hello      101
okay        84
Name: class, dtype: int64

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47)

### Train Machine Learning Classification Models <a id="training"></a>

In [16]:
pipelines = {
    "lr":make_pipeline(StandardScaler(), LogisticRegression()),
    "svc":make_pipeline(StandardScaler(), SVC()),
    "rf":make_pipeline(StandardScaler(), RandomForestClassifier()),
    "knn":make_pipeline(StandardScaler(), KNeighborsClassifier()),
}

In [17]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train.ravel())
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Evaluate and Serialize Model <a id="evaluate"></a>

In [18]:
# Show accuracy
for algo, model in fit_models.items():
    pred = model.predict(X_test)
    print(algo, accuracy_score(y_test, pred))

lr 1.0
svc 1.0
rf 1.0
knn 1.0


In [19]:
# Show confusion matrix
for algo, model in fit_models.items():
    pred = model.predict(X_test)
    print(algo, confusion_matrix(y_test, pred), sep="\n", end="\n\n")

lr
[[22  0  0]
 [ 0 28  0]
 [ 0  0 10]]

svc
[[22  0  0]
 [ 0 28  0]
 [ 0  0 10]]

rf
[[22  0  0]
 [ 0 28  0]
 [ 0  0 10]]

knn
[[22  0  0]
 [ 0 28  0]
 [ 0  0 10]]



In [20]:
# Create folder if it does not exist
if not os.path.exists("generated_model"):
    os.mkdir("generated_model")
    
# Save model to file
model_to_save = "rf"

with open("generated_model/body_language_model.pkl", "wb") as f:
    pickle.dump(fit_models[model_to_save], f)

## 4. Real-time Detections with Model <a id="real-time-detection"></a>

In [21]:
with open("generated_model/body_language_model.pkl", "rb") as f:
    model_inference = pickle.load(f)

In [None]:
cap = cv2.VideoCapture(0)
with mp.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    while cap.isOpened():
        success, frame = cap.read()

        if not success:
            break

        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Make detections
        results = holistic.process(image)


        # Recolor for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw face landmarks
        mp.drawing_utils.draw_landmarks(image, results.face_landmarks, mp.holistic.FACE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(242, 216, 175), thickness=1, circle_radius=1),
                                        mp.drawing_utils.DrawingSpec(color=(242, 216, 175), thickness=1))

        # Right hand
        mp.drawing_utils.draw_landmarks(image, results.right_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Left hand
        mp.drawing_utils.draw_landmarks(image, results.left_hand_landmarks, mp.holistic.HAND_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(181, 137, 82), thickness=2, circle_radius=4),
                                        mp.drawing_utils.DrawingSpec(color=(181, 137, 82), thickness=2))

        # Pose 
        mp.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.holistic.POSE_CONNECTIONS,
                                        mp.drawing_utils.DrawingSpec(
                                            color=(65, 53, 31), thickness=2, circle_radius=2),
                                        mp.drawing_utils.DrawingSpec(color=(65, 53, 31), thickness=2))
        
        # Export coordinates
        if results.pose_landmarks and results.face_landmarks:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in pose]).flatten())

                
            # Extract face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] 
                                      for landmark in face]).flatten())
           
            # Concate row
            row = pose_row + face_row 
            
            # Predict using inference model
            X = pd.DataFrame([row])
            pred = model_inference.predict(X)[0]
            prob = np.max(model_inference.predict_proba(X)[0]).round(2)
            print(pred, prob)
            
            # Display result
            cv2.rectangle(image, (0,0), (250,60), (245, 117, 16), -1)
            cv2.putText(image, "CLASS", (95,12), cv2.FONT_HERSHEY_SIMPLEX,
                       0.5, (0,0,0), 1, cv2.LINE_AA)
            cv2.putText(image, pred, (90,40), cv2.FONT_HERSHEY_SIMPLEX,
                       1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.putText(image, "PROB", (15,12), cv2.FONT_HERSHEY_SIMPLEX,
                       0.5, (0,0,0), 1, cv2.LINE_AA)
            cv2.putText(image, str(prob), (10,40), cv2.FONT_HERSHEY_SIMPLEX,
                       1, (255, 255, 255), 2, cv2.LINE_AA)
        
        
        cv2.imshow("Holistic Model Detection", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

hello 0.6
hello 0.62
hello 0.63
call me 0.49
call me 0.65
call me 0.56
call me 0.5
call me 0.56
call me 0.62
call me 0.73
call me 0.63
call me 0.52
hello 0.54
hello 0.63
hello 0.73
hello 0.67
hello 0.71
hello 0.68
hello 0.78
hello 0.78
hello 0.78
hello 0.81
hello 0.74
hello 0.83
hello 0.8
hello 0.77
hello 0.81
hello 0.82
hello 0.84
hello 0.77
hello 0.81
hello 0.74
hello 0.78
hello 0.77
hello 0.83
hello 0.92
hello 0.87
hello 0.85
hello 0.85
hello 0.78
hello 0.74
hello 0.61
hello 0.61
hello 0.6
hello 0.56
hello 0.68
hello 0.63
hello 0.73
hello 0.79
hello 0.8
hello 0.85
hello 0.68
hello 0.72
hello 0.73
hello 0.75
hello 0.72
hello 0.76
hello 0.85
hello 0.83
hello 0.77
hello 0.82
hello 0.76
hello 0.75
hello 0.66
hello 0.56
hello 0.51
hello 0.54
hello 0.54
hello 0.44
okay 0.41
hello 0.46
okay 0.36
call me 0.5
call me 0.43
call me 0.55
call me 0.6
call me 0.63
call me 0.58
call me 0.66
call me 0.69
call me 0.67
call me 0.69
call me 0.75
call me 0.78
call me 0.76
call me 0.77
call me 0.85
call