In [23]:
import os
import cv2
import numpy as np
import pandas as pd
from ast import literal_eval

## Gloabl Variables and Paths

In [268]:
# paths
THIS_PATH = os.getcwd()
HOME_PATH =  os.path.expanduser('~')
QHACKS_PATH = os.path.join(HOME_PATH, 'Desktop', 'QHacks')
DEADLIFT_VIDEO_PATH = os.path.join(QHACKS_PATH, 'raw_deadlift_videos')
BAD_DEADLIFT_IMAGE_PATH = os.path.join(QHACKS_PATH , 'bad_deadlift_images')
GOOD_DEADLIFT_IMAGE_PATH = os.path.join(QHACKS_PATH , 'good_deadlift_images')

BAD_LABELED_DEADLIFT_IMAGE_PATH = os.path.join(QHACKS_PATH , 'bad_labeled_deadlift_images_resized')
GOOD_LABELED_DEADLIFT_IMAGE_PATH = os.path.join(QHACKS_PATH , 'good_labeled_deadlift_images_resized')

# csv paths
CSV_PATH = os.path.join(QHACKS_PATH, 'annotations')

# resized image shape - 368x368
IMG_HEIGHT = 368
IMG_WIDTH = 368

## Convert Videos into Images

In [3]:
# get all videos in directory for each class
all_deadlift_videos = os.listdir(DEADLIFT_VIDEO_PATH)
all_deadlift_videos = [video for video in all_deadlift_videos if '.mp4' in video]
all_deadlift_videos

['BadDeadlift11.mp4',
 'BadDeadlift10.mp4',
 'BadDeadlift9.mp4',
 'BadDeadlift8.mp4',
 'Deadlifts4.mp4',
 'Deadlift14.mp4',
 'Deadlift9.mp4',
 'Deadlift10.mp4',
 'Deadlifts3.mp4',
 'Deadlift8.mp4',
 'Deadlift13.mp4',
 'Deadlifts1.mp4',
 'Deadlift12.mp4',
 'Deadlift6.mp4',
 'Deadlift7.mp4',
 'Deadlift5.mp4',
 'BadDeadlift5.mp4',
 'BadDeadlift4.mp4',
 'BadDeadlift6.mp4',
 'BadDeadlift7.mp4',
 'BadDeadlift3.mp4',
 'BadDeadlift2.mp4',
 'BadDeadlift1.mp4']

In [4]:
def get_frames(video_path, location, count, sec):
    video_name = video_path.split('/')[-1]
    filename = video_name.split('.')[0]
    
    vidcap = cv2.VideoCapture(video_path)
    vidcap.set(cv2.CAP_PROP_POS_MSEC, sec*1000)
    hasFrames,image = vidcap.read()
    if hasFrames:
        image_name = filename + "_" + str(count) + ".jpg"
        path = os.path.join(location, image_name)
        cv2.imwrite(path, image) 

    return hasFrames

In [5]:
def convert_video_to_images(video_path, location, frame_rate):
    sec = 0
    count = 1

    # first image
    success = get_frames(video_path, location, count, sec)
    # rest of the images
    while success:
        count = count + 1
        sec = sec + frame_rate
        sec = np.round(sec, 2)
        success = get_frames(video_path, location, count, sec)

    return None

In [None]:
for video in all_deadlift_videos:
    video_path = os.path.join(DEADLIFT_VIDEO_PATH, video)
    if 'Bad' in video:
        location = BAD_DEADLIFT_IMAGE_PATH
    else:
        location = GOOD_DEADLIFT_IMAGE_PATH

    convert_video_to_images(video_path=video_path, location=location, frame_rate=0.1) # 0.033 is 60 imgs/sec

## Preprocessing
- resize
- center crop images
- rename images to prevent target leakage

In [249]:
# all annotated csv paths
annotation_csv_paths = [os.path.join(CSV_PATH, csv) for csv in os.listdir(CSV_PATH) if '.csv' in csv] 

In [280]:
def get_annotations(csv_path):
    # read csv file and create new dataframe to get rectangle coords
    annotation_df = pd.read_csv(csv_path, sep=',')
    annotation_df = annotation_df[['filename', 'region_shape_attributes', 'region_attributes']]

    # convert str of dicts to actual dicts and expand cols, then concat new cols to dataframe
    region_shape_df = pd.json_normalize(annotation_df['region_shape_attributes'].apply(lambda x: literal_eval(x)))
    region_attr_df =  pd.json_normalize(annotation_df['region_attributes'].apply(lambda x: literal_eval(x.replace('true', '1'))))
    annotation_df = pd.concat([annotation_df, region_shape_df, region_attr_df], axis=1)

    # get rid of blank images (nans) and change col name
    annotation_df = annotation_df[pd.notnull(annotation_df['x'])]
    annotation_df.rename(columns={'class.good':'good'},inplace=True)
    annotation_df['good'] = annotation_df['good'].astype('int')

    # convert rect to coordinate form
    annotation_df['x1'] = annotation_df['x'].astype('int')
    annotation_df['y1'] = annotation_df['y'].astype('int')
    annotation_df['x2'] = (annotation_df['x'] + annotation_df['width']).astype('int')
    annotation_df['y2'] = (annotation_df['x'] + annotation_df['height']).astype('int')

    # drop cols
    annotation_df.drop(['region_shape_attributes', 'region_attributes', 'name', 'x', 'y', 'width', 'height'], axis=1, inplace=True)

    return annotation_df

In [283]:
def resize_image(csv_path):
    annotation_df = get_annotations(csv_path)
    for indx, row in annotation_df.iterrows():
        img_name = row['filename']
        if 'good' in annotation_df.columns:
            img_path = os.path.join(GOOD_DEADLIFT_IMAGE_PATH, img_name)
            new_img_path = os.path.join(GOOD_LABELED_DEADLIFT_IMAGE_PATH, img_name)
        else: 
            img_path = os.path.join(BAD_DEADLIFT_IMAGE_PATH, img_name)
            new_img_path = os.path.join(BAD_LABELED_DEADLIFT_IMAGE_PATH, img_name[img_name.find('D'):])

        # crop image to new coordinates and then resize image
        try:
            img = cv2.imread(img_path)
            cropped_img = img[row['y1']:row['y2'], row['x1']:row['x2']]
            resized_img = cv2.resize(cropped_img, (IMG_HEIGHT, IMG_WIDTH), interpolation=cv2.INTER_AREA)

            # save new image
            cv2.imwrite(new_img_path, resized_img)

        except:
            print(f'Image Error: {img_name}')

    return None

In [284]:
resize_image(annotation_csv_paths[0])