In [None]:
%reset -f

In [None]:
!pip install --upgrade pip
!pip install tensorflow #follow https://www.tensorflow.org/install/pip
!pip install numba
!pip install sklearn
!pip install mediapipe
!pip install tqdm
!pip install seaborn

!conda install graphviz
!conda install pydotplus

In [None]:
import tensorflow as tf
import sklearn
import mediapipe as mp
import cv2
import numpy as np
import seaborn as sns

from sklearn import datasets
from sklearn import model_selection
from numba import cuda
from matplotlib import pyplot as plt
from tqdm import tqdm

#Python libraries
import os
import random
from datetime import datetime
from pathlib import Path
from collections import Counter

In [None]:
#Seed data, to ensure that between runs, the results would not differ
seed = 1
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.experimental.numpy.random.seed(seed)
os.environ["TF_CUDNN_DETERMINISTIC"] = str(seed)
os.environ["TF_DETERMINISTIC_OPS"] = str(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

In [None]:
#Check we are running tensorflow in GPU mode.
device = cuda.get_current_device()
device.reset()
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices("GPU")
            print("{} physical GPU and {} logical GPU".format(len(gpus), len(logical_gpus)))
    except RuntimeError as e:
        print(e)

### Data Loading

In [None]:
#Load the data as files.

raw_data = sklearn.datasets.load_files(os.getcwd() + r"/data", shuffle = False, random_state = seed)
files = raw_data["filenames"]
targets = raw_data["target"]

class_names = np.array(raw_data["target_names"])
number_of_classes = len(class_names)
label_map = {label:num for num, label in enumerate(class_names)} #{"class_name": 0, ...}

### Drawing Code and Helper Functions

In [None]:
#A function to pad the data array with 0s
def pad_array(input_array, new_shape):
    first, second = np.shape(input_array)
    output_array = np.zeros(new_shape)
    output_array[0:first, 0:second] = input_array
    return output_array

In [None]:
#Normalise function to normalise the keypoint data so it's easier for the network
scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(10, 11))

In [None]:
#Mediapipe parameters and variables
min_detect = 0.7 #non-default
min_track = 0.7 #non-default
segmentation = False #default
model_complex = 1 #default

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

In [None]:
#Add keypoint data to each frame
def mediapipe_detection(image, pose):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = pose.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, 
                              landmark_drawing_spec = mp_drawing_styles.get_default_pose_landmarks_style())
    return image, results

In [None]:
#Extract keypoint data to each frame
def extract_keypoints(results):
    if results.pose_landmarks:
        output = np.array([[result.x, result.y, result.z, result.visibility] for result in results.pose_landmarks.landmark]).flatten()
        return output
    else:
        return None

In [None]:
#Forms a 3D array of (samples, timestep, keypoints_length) of all input samples
def keypoint_maker(video_list, shape):
    video_array = []
    with mp_pose.Pose(min_detection_confidence = min_detect, min_tracking_confidence = min_track,
                      model_complexity = model_complex, enable_segmentation = segmentation) as pose:
        
        for video in tqdm(video_list):
            cap = cv2.VideoCapture(video)
            keypoints_list = []
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                image, results = mediapipe_detection(frame, pose)
                keypoints = extract_keypoints(results)
                if keypoints is not None:
                    keypoints_list.append(keypoints)
                else:
                    continue
            video = np.stack(keypoints_list, axis = 0)
            video = scaler.fit_transform(video)
            video = pad_array(video, shape) #pads to largest video
            video = np.expand_dims(video, axis=0) #Turns (frame_size, keypoints_length) into (1, frame_size, keypoints_length)
            video_array.append(video) #Add current video to list
            cap.release()
            cv2.destroyAllWindows()
        tensor = np.vstack(video_array)
        return tensor

In [None]:
#Helper function to see all the frames as images in a folder to check them
def save_frames(video_list):
    frame_folder = "/frames_" + str(min_detect) + "_" + str(min_track) + "_" + str(segmentation) + "_" + str(model_complex) + "/"
    frame_path = os.getcwd() + frame_folder
    
    if not os.path.exists(frame_path):
        os.mkdir(frame_path)
    
    with mp_pose.Pose(min_detection_confidence = min_detect, min_tracking_confidence = min_track,
                      model_complexity = model_complex, enable_segmentation = segmentation) as pose:
        
        for video in tqdm(video_list):
            cap = cv2.VideoCapture(video)
            count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                image, results = mediapipe_detection(frame, pose)
                keypoints = extract_keypoints(results)
                file_name = frame_path + "/" + str(Path(video).stem) + "_" + str(count) + ".jpeg"
                cv2.imwrite(file_name, image)  
                count += 1
            cap.release()
            cv2.destroyAllWindows()

In [None]:
#Gives information on the videos such as FPS and the video with most frames.
def get_video_frame_data(video_list):
    frame_array = []
    fps_array = []
    with mp_pose.Pose(min_detection_confidence=min_detect, min_tracking_confidence=min_track,
                      model_complexity = model_complex, enable_segmentation=segmentation) as pose:
        for video in tqdm(video_list):
            cap = cv2.VideoCapture(video)
            frame_length = 0
            number_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            fps = cap.get(cv2.CAP_PROP_FPS)
            frame_array.append(number_of_frames)
            fps_array.append(fps)
            cap.release()
            cv2.destroyAllWindows()
    longest_video = max(frame_array)
    return frame_array, fps_array

### Data Splitting/Processing

In [None]:
train_ratio = 0.80
validation_ratio = 0.10
test_ratio = 0.10

In [None]:
#First slipts the train and test
train_files, test_files, train_targets, test_targets = model_selection.train_test_split(files, 
                                                                                        targets, 
                                                                                        test_size=1 - train_ratio, 
                                                                                        random_state = seed, 
                                                                                        stratify = targets)
#Then splits the test into validation and test.
valid_files, test_files, valid_targets, test_targets = model_selection.train_test_split(test_files,
                                                                                        test_targets,
                                                                                        test_size = test_ratio / (test_ratio + validation_ratio),
                                                                                        random_state = seed,
                                                                                        stratify = test_targets)

In [None]:
#Useful information
video_data = get_video_frame_data(files)
frame_limit = max(video_data[0])
fps_array = video_data[1]
fps_min = min(fps_array)
fps_max = max(fps_array)

In [None]:
#Takes a long time so commented out until final run
save_frames(files)

In [None]:
#Useful information printed
print(frame_limit)
print(fps_min)
print(fps_max)

In [None]:
#Ensure each set has an equally split number of classes, otherwise network is potentially biased to certain classes.
print(np.bincount(train_targets))
print(np.bincount(valid_targets))
print(np.bincount(test_targets))

In [None]:
#Set the shape and form the train, validation, and test sets of data as keypoint arrays

shape = list((len(files), frame_limit, 132)) #number of videos, number of frames, number of keypoints
x_train = keypoint_maker(train_files, shape[1:])
x_valid = keypoint_maker(valid_files, shape[1:])
x_test = keypoint_maker(test_files, shape[1:])
y_train = tf.keras.utils.to_categorical(train_targets, num_classes=number_of_classes)
y_valid = tf.keras.utils.to_categorical(valid_targets, num_classes=number_of_classes)
y_test = tf.keras.utils.to_categorical(test_targets, num_classes=number_of_classes)

In [None]:
print("Shape of all data:", shape)
print("Shape of training data:", x_train.shape)
print("Shape of training labels:", y_train.shape)
print("Shape of validation data:", x_valid.shape)
print("Shape of validation labels:", y_valid.shape)
print("Shape of testing data:", x_test.shape)
print("Shape of testing labels:", y_test.shape)

### Model Layers & Paramers

In [None]:
strides = 1
padding= "same"
activation_conv = "tanh"
activation_lstm = "tanh"
filters_conv = [128, 64, 32]
filters_lstm = [128, 64, 32]
kernel_size = [5,3,1]
return_sequences = True

MODEL = [
    tf.keras.layers.Masking(mask_value = 0., input_shape = shape[1:]),
    tf.keras.layers.Conv1D(filters_conv[0], kernel_size = kernel_size[0], strides = strides, padding = padding, activation = activation_conv),
    tf.keras.layers.LSTM(filters_lstm[0], activation = activation_lstm, return_sequences = return_sequences),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv1D(filters_conv[1], kernel_size = kernel_size[1], strides = strides, padding = padding, activation = activation_conv),
    tf.keras.layers.LSTM(filters_lstm[1], activation = activation_lstm, return_sequences = return_sequences),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv1D(filters_conv[2], kernel_size = kernel_size[2], strides = strides, padding = padding, activation = activation_conv),
    tf.keras.layers.LSTM(filters_lstm[2], activation = activation_lstm, return_sequences = return_sequences),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(number_of_classes, activation="softmax")]

In [None]:
model = tf.keras.Sequential(MODEL)
model.summary()

In [None]:
tf.keras.utils.plot_model(model,to_file = "model.png", show_shapes = True, show_layer_activations = True)

In [None]:
batch_size = x_train.shape[0]
epochs = 2000
learning_rate = 0.00001
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
loss = tf.keras.losses.CategoricalCrossentropy()
metrics = ["accuracy"]
model_file = "model.hdf5"

model.compile(optimizer = optimizer, loss = loss, metrics = metrics)

checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = model_file, monitor = "val_loss", save_best_only = True)

callbacks = [checkpoint]

history=model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (x_valid, y_valid),
                  callbacks = callbacks, verbose = 1)

### Evaluating Model

In [None]:
model.load_weights(model_file)
(loss, accuracy) = model.evaluate(x = x_test, y = y_test, batch_size = batch_size, verbose = 1)
print("Accuracy on test data: {:.2f}%".format(accuracy * 100))

### Loss and Accuracy 

In [None]:
epochs = range(1, len(history.history["loss"]) + 1)
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]

In [None]:
plt.figure(figsize=(20, 16))
plt.plot(epochs, acc, "b", label=  "Training accuracy")
plt.plot(epochs, val_acc, "y", label = "Validation accuracy")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(20, 16))
plt.plot(epochs, loss, "b", label="Training loss")
plt.plot(epochs, val_loss, "y", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
print("Loss")
print(np.min(loss))
print(np.argmin(loss))
print("Validation Loss")
print(np.min(val_loss))
print(np.argmin(val_loss))
print("Accuracy")
print(np.max(acc))
print(np.argmax(acc))
print("Validation Accuracy")
print(np.max(val_acc))
print(np.argmax(val_acc))

In [None]:
labels = list(test_targets)
predictions = list(np.argmax(model.predict(x_test), axis=1))
confusion_matrix = tf.math.confusion_matrix(labels, predictions)

In [None]:
print(labels)
print(predictions)

In [None]:
sns.heatmap(confusion_matrix, annot=True, cmap="Purples", xticklabels = class_names, yticklabels = class_names).set(title = "Confusion Matrix of Overall Accuracy")

### Rep Counter

In [None]:
print(type(shape))

In [None]:
def Rep_Counter(file_array, index, output_dir, log_output_filename):
    log = open(output_dir + log_output_filename, "a", encoding="utf-8")
    print("#############################################################", file=log)
    filename = Path(file_array[index])

    default_font = cv2.FONT_HERSHEY_SIMPLEX
    class_colours = [] 
    random.seed(seed)
    for i in class_names:
        B = random.randint(0, 255)
        G = random.randint(0, 255)
        R = random.randint(0, 255)
        colour = tuple([B, G, R])
        class_colours.append(colour)
        
    sequence = []
    logger = [] #sequential log, gives exact order
    predictions = []
    
    graph_data = [[] for i in range(0, number_of_classes)]
    
    threshold = 0.99
    count = 1
    cap = cv2.VideoCapture(file_array[index])
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    out = cv2.VideoWriter(output_dir + filename.name, fourcc, fps, (width, height))

    with mp_pose.Pose(min_detection_confidence=min_detect, min_tracking_confidence=min_track,
                      model_complexity = model_complex, enable_segmentation=segmentation) as pose:
        while cap.isOpened():
            number_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            count += 1
            ret, frame = cap.read()

            if not ret:
                break

            exact_time_in_seconds = round(count/fps, 2)

            image, results = mediapipe_detection(frame, pose)
            keypoints = extract_keypoints(results)
            
            if keypoints is None:
                continue
            
            sequence.append(keypoints)
            sequence_np = np.array(sequence)
            sequence_np = scaler.fit_transform(sequence_np)
            sequence_np = pad_array(sequence_np, shape[1:])
            
            result = model.predict(np.expand_dims(sequence_np, axis=0))[0]
            
            for i in range(number_of_classes):
                graph_data[i].append(result[i])
            
            predicted_class_index = np.argmax(result) #0, 1, 2                 
            predicted_class_percentage = result[np.argmax(result)] #0.58%
            predicted_class_percentage_rounded = round(predicted_class_percentage, 2)
            predicted_class_name = class_names[np.argmax(result)] #catpass etc
            predictions.append(predicted_class_index)

            if np.unique(predictions[-10:])[0] == predicted_class_index: #checks last 10 frames have same value, might change
                if predicted_class_percentage > threshold:   
                    if len(logger) > 0:
                        if predicted_class_name != logger[-1][0]:
                            logger.append([predicted_class_name, exact_time_in_seconds, predicted_class_percentage_rounded])
                    else:
                        logger.append([predicted_class_name, exact_time_in_seconds, predicted_class_percentage_rounded])

            #This does the probability boxes for each class
            for class_index, probability in enumerate(result):
                start_point = (0, 60 + class_index * 40)
                end_point = (int(probability * 100), 90 + class_index * 40)
                colour = class_colours[class_index]
                thickness = -1
                cv2.rectangle(image, start_point, end_point, colour, thickness)

                text = "{}:{}%".format(class_names[class_index], int(probability*100))
                org = (0, 85 + class_index * 40)
                font_scale = 1
                colour = (255,255,255)
                thickness = 2
                line_type = cv2.LINE_AA
                cv2.putText(image, text, org, default_font, font_scale, colour, thickness, line_type)
            

            move_count_array = [0] * len(class_names)
            if len(logger) > 0:
                for log_value in logger: #pair e.g. catpass, 0.01 seconds, 98%
                    index_to_increment = label_map.get(log_value[0])
                    move_count_array[index_to_increment] += 1

            text = "{}".format(logger)
            org = (3, 30)
            font_scale = 0.6
            colour = (255,255,255)
            thickness = 2
            line_type = cv2.LINE_AA
            cv2.putText(image, text, org, default_font, font_scale, colour, thickness, line_type)
            cv2.imshow("Rep-Tracker", image)
            out.write(image)
            if cv2.waitKey(10) & 0xFF == ord("q"):
                break
        count_array = [predictions.count(i) for i in range(len(class_names))]
        
        print("Video {}, {}".format(index+1,filename.name), file=log)
        print("###Repetiton Counter###", file=log)
        for i in range(len(class_names)):
            print("{}={}".format(class_names[i], move_count_array[i]), file=log)
            
        print("###Frames###", file=log)
        for i in range(len(class_names)):
            print("{}={}".format(class_names[i], count_array[i]), file=log)
        
        print("###Logger###", file=log)
        print(str(logger), file=log)
            
        print("Readable Frames: {}/{}".format(len(graph_data[0]),number_of_frames), file=log)
        print("Estimate: \"{}\" with {}/{} frames.".format(class_names[np.argmax(count_array)], np.max(count_array),number_of_frames), file=log)
        cap.release()
        out.release()
        cv2.destroyAllWindows()

    plt.figure(figsize=(20, 16))    
    for i in range(number_of_classes):
        plt.plot(graph_data[i], label=class_names[i])
        
    plt.title("Graph for {}".format(filename.name))
    plt.xlabel("Frame Number")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig(output_dir+filename.stem)
    return np.argmax(count_array)

In [None]:
def rep_counter_accuracy(file_list, log_output_filename):
    results = []
    output = os.getcwd() + r"/results/"
    
    if not os.path.exists(output):
        os.mkdir(output)
    
    for index in range(len(file_list)):
        results.append(Rep_Counter(test_files, index, output, log_output_filename))
    return results 

In [None]:
def similarity(labels, outputs):
    count = 0
    for index in range(len(labels)):
        if labels[index] == outputs[index]:
            count += 1
    return (count/len(labels))*100

### Rep Counter Testing

In [None]:
labels = list(test_targets)
accuracy_array = rep_counter_accuracy(test_files, str(datetime.now().strftime("%d-%m-%Y-%H-%M-%S.txt")))

In [None]:
similarity_percentage = similarity(labels, accuracy_array)

In [None]:
print(labels)
print(accuracy_array)
print(similarity_percentage)

In [None]:
confusion_matrix = tf.math.confusion_matrix(labels, accuracy_array)
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", xticklabels=class_names, yticklabels=class_names).set(title="Confusion Matrix of Frame Prediction Accuracy")