# Lecture 59: Activity recognition using 3D-CNN

## Frame extraction and dataset preparation for training CNN
#### Dataset: [UCF101](https://www.crcv.ucf.edu/research/data-sets/ucf101/)

Download the dataset and unzip in the current directory

In [None]:
import os
import shutil
import numpy as np
import pickle

### Extract frames from all videos of the dataset

Install ffmpeg if not already installed

In [None]:
srcPath = 'UCF-101/'
classes = os.listdir(srcPath)
for c in classes:
    files = os.listdir(srcPath+c)
    for f in files:
        filename = srcPath+c+'/'+f
        dstPath = 'frames/'+c+'/'+f[:-4]
        if not os.path.exists(dstPath):
            os.makedirs(dstPath)
        os.system('ffmpeg -i {0} {1}/frame_%04d.jpg'.format(filename, dstPath))     
       

### Truncating the dataset to 5 classes

In [None]:
path = 'frames'
classes = os.listdir(path)
classes.sort()
classes_5 = classes[:5]
print(classes_5)

In [None]:
# The dataset has videos of 25 individuals performing each activity
# Choosing 20 for training; 5 for testing
trainList = []
testList = []
for c in classes_5:
    trainIndvList = []
    vidList = os.listdir(path+'/'+c)
    vidList.sort()   
    for item in vidList:
        # video name eg: v_ApplyEyeMakeup_g01_c01 => g01 
        user = item.split('_')[2]
        if (user not in trainIndvList):   
            if len(trainIndvList)<20:              
                trainIndvList.append(user) # Keeping track of train-test list
                trainList.append(item) # Adding the video name to train list
            else:
                testList.append(item) # Adding the video name to test list
        else:
            trainList.append(item)     

In [None]:
with open('trainList_5class.pckl','wb') as f:
    pickle.dump(trainList,f)
with open('testList_5class.pckl','wb') as f:
    pickle.dump(testList,f)

### Deleting videos from the train and test list with more than 1 frame drop

In [None]:
# Filtering train set
count = 1
delList = []
for item in trainList:
    print(str(count)+'/'+str(len(trainList)))
    cl = item.split('_')[1]
    srcPath = 'frames/'+cl+'/'+item    
    fNames = os.listdir(srcPath)
    fNums = [int(x[:-4].split('_')[-1]) for x in fNames]
    fNums.sort()    
    if fNums[-1]-len(fNames)>1:
        delList.append('frames/'+cl+'/'+item)
    count += 1    
for item in delList:
    shutil.rmtree(item)

In [None]:
# Filtering test set
count = 1
testDelList = []
for item in testList:
    print(str(count)+'/'+str(len(testList)))
    cl = item.split('_')[1]
    srcPath = 'frames/'+cl+'/'+item    
    fNames = os.listdir(srcPath)
    fNums = [int(x[:-4].split('_')[-1]) for x in fNames]
    fNums.sort()
    if fNums[-1]-len(fNames)>1: 
        testDelList.append('frames/'+cl+'/'+item)       
    count += 1   
for item in testDelList:
    shutil.rmtree(item)

In [None]:
for item in classes_5:
    print(item)
    srcPath = path+'/'+item
    files = os.listdir(srcPath)
    trainNum = np.floor(len(files)*0.8)
    testNum = len(files)-trainNum
    for idx in range(int(trainNum)):
        trainDst = 'train_5class/'+item+'/'+files[idx] 
        shutil.copytree(srcPath+'/'+files[idx],trainDst)         
        
    for idx2 in range(int(trainNum),int(trainNum+testNum)):
        testDst = 'test_5class/'+item+'/'+files[idx2]        
        shutil.copytree(srcPath+'/'+files[idx2],testDst)    
       
        