## Setup

In [24]:
import cv2
import numpy as np
import os
import pandas as pd
from PIL import Image
from tqdm import tqdm

In [2]:
## Data paths
images_path = os.path.join('..','data','raw','images')

In [3]:
## configs
labels_to_index = {'not_fencing':0, 'epee':1, 'foil': 2, 'sabre':3}
index_to_labels = {v:k for k, v in labels_to_index.items()}

IMAGE_SIZE = (128, 128, 3)

## Functions

In [26]:
def read_image_into_array(image_path, size=(64,64,3), norm=255):
    # Opens an image file from image_path, adjusts its size and returns a numpy array, normalized
    image = Image.open(image_path)
    # TODO: adjust size
    image = np.array(image, dtype=np.float32)
    image = image / norm
    return image

In [None]:
def displayVideo(clip_path, frame_rate=40):
    # Receives the path to a clip and displays it
    # repeatedly at frame_rate frames per second
    
    ret = True
    clip = cv2.VideoCapture(clip_path)
    while(ret):
        # Capture frame-by-frame
        ret, frame = clip.read()
        
        if ret:
            # Display the resulting frame
            cv2.imshow('Frame', frame)

            # Press q on keyboard to  exit
            if cv2.waitKey(int(1000 / frame_rate)) & 0xFF == ord('q'):
                break

        else:
            clip = cv2.VideoCapture(clip_path)
            ret = True

    clip.release()
    cv2.destroyAllWindows()

In [None]:
def takeFrames(clip, num_frames=10):
    # Receives a cv2 VideoCapture object and extracts num_frames frames from it
    # regularly spaced and returns them in a list
    
    frame_list = []
    
    clip_size = int(clip.get(cv2.CAP_PROP_FRAME_COUNT))
    sample_rate = int(clip_size / (num_frames+2))
    
    count = 0
    ret = True
    
    while(ret):
        print('reading frame:',count)
        ret, frame = clip.read()
        
        if ret:
            if (count>0) and (count%sample_rate==0):
                frame_list.append(frame)
                print('saved! list size now is ',len(frame_list))
        if len(frame_list) >= num_frames:
            break
        else:
            count += 1
    
    return frame_list

## Read data

In [25]:
full_dataset = pd.DataFrame()

for local_path in os.listdir(images_path):
    full_path = os.path.join(images_path, local_path)
    if os.path.isdir(full_path):
        list_of_file_paths = [os.path.join(full_path, f) for f in os.listdir(full_path)]
        local_df = pd.DataFrame()
        local_df['file_path'] = list_of_file_paths
        local_df['label'] = labels_to_index[local_path]
        full_dataset = full_dataset.append(local_df)

In [27]:
tqdm.pandas(desc='opening images')
full_dataset['image'] = full_dataset['file_path'].progress_apply(lambda x: read_image_into_array(x, size=IMAGE_SIZE))

opening images: 100%|████████████████████████████████████████████████████████████████| 244/244 [00:09<00:00, 24.95it/s]


In [28]:
full_dataset.head()

Unnamed: 0,file_path,label,image
0,..\data\raw\images\epee\000000000000.jpg,1,"[[[0.5372549, 0.5019608, 0.33333334], [0.52549..."
1,..\data\raw\images\epee\000000000001.jpg,1,"[[[0.0, 0.003921569, 0.011764706], [0.0, 0.003..."
2,..\data\raw\images\epee\000000000002.jpg,1,"[[[0.003921569, 0.003921569, 0.003921569], [0...."
3,..\data\raw\images\epee\000000000003.jpg,1,"[[[0.0, 0.19607843, 0.39215687], [0.003921569,..."
4,..\data\raw\images\epee\000000000004.jpg,1,"[[[0.019607844, 0.019607844, 0.019607844], [0...."
