## Build train/test list
1. set a ratio for training data (you can change this)
2. go to rgb-images folder, save folder names (clip_ref) for training and testing in a list
3. save to txt

### How to run this
1. after running video_preprocessing (you have all frames in folders, labels in folders)
2. create a folder called splitfiles under data
3. run

In [80]:
import os
import pdb

In [81]:
def removeDS(array):
    if '.DS_Store' in array:
        array.remove('.DS_Store')
    
    return array

In [82]:
def get_Nclasses(path, N):
    '''
    This function is used to get the N classes we use for training
    '''
    labels = os.listdir(path)
    labels = removeDS(labels)
    
    clip_nums = {}
    for label in labels:
        label_path = path + label + '/'
        clips = os.listdir(label_path)
        clips = removeDS(clips)
        clip_nums[label] = len(clips)
        
    sortedClasses = sorted(clip_nums.items(), key = lambda item:item[1], reverse = True)
    NClasses = sortedClasses[:N]
    
    #NClasses_names = [c[0] for c in NClasses]
    
    return dict(NClasses)

In [83]:
def build_lists(path, list_root, Actions, test_train_split):
    '''
    This function is used to build training and testing data list we're gonna use.
    The label path (class/class_group_clip/frameidx.txt) will be stored in trainlist.txt
    The clip path (class/class_group_clip) will be stored in trainlist01.txt
    '''

    train_refs_full = []
    test_refs_full = []
    
    train_frames_full = []
    test_frames_full = []
    
    for label in Actions:
        label_path = path + label + '/'
        clips = os.listdir(label_path)
        clips = removeDS(clips)
        num_clips = len(clips)
        
        train_num = int(num_clips*test_train_split)
        
        training_clip_names = clips[0:train_num]
        train_paths = [label + '/' + s for s in training_clip_names ]
        
        test_clip_names = clips[train_num::]      
        test_paths = [label + '/' + s for s in test_clip_names]
        
        for i, clip_name in enumerate(training_clip_names):
            clip_path = label_path + clip_name + '/'
            frames = os.listdir(clip_path)
            frames = removeDS(frames)
                    
            for frame in frames:
                frame_path = train_paths[i] + '/' + frame
                train_frames_full.append(frame_path)
                
        for i, clip_name in enumerate(test_clip_names):
            clip_path = label_path + clip_name + '/'
            frames = os.listdir(clip_path)
            frames = removeDS(frames)
                    
            for frame in frames:
                frame_path = test_paths[i] + '/' + frame
                test_frames_full.append(frame_path)
            
        train_refs_full += train_paths
        test_refs_full += test_paths
        

    delimiter = '\n'
    train_frame_str = delimiter.join(train_frames_full)#contains training
    test_frame_str = delimiter.join(test_frames_full)
    train_clip_str = delimiter.join(train_refs_full)
    test_clip_str = delimiter.join(test_refs_full)
        
    train_frame_path = list_root + 'trainlist.txt'
    test_frame_path = list_root + 'testlist.txt'
    train_clip_path = list_root + 'trainlist01.txt'
    test_clip_path = list_root + 'testlist01.txt'
        
    file = open(train_frame_path,'w+') 
    file.write(train_frame_str)
    file.close()
    file = open(test_frame_path,'w+')
    file.write(test_frame_str)
    file.close()
    
    file = open(train_clip_path,'w+') 
    file.write(train_clip_str)
    file.close()
    file = open(test_clip_path,'w+')
    file.write(test_clip_str)
    file.close()
        

In [85]:
path = 'ucf24/labels/'
list_root = 'ucf24/splitfiles/'
N = 24
test_train_split = 0.7
Actions = get_Nclasses(path, N)
build_lists(path, list_root, Actions, test_train_split)

### datasetName.names file
These files for different dataset is stored in 'YOWO/data/', they contains all the classes names used for trainning for different datasets.

In [55]:
def build_class_names(NClasses_names, path, fileName):
    delimiter = '\n'
    names_str = delimiter.join(NClasses_names)
    names_path = path + fileName
    file = open(names_path,'w+') 
    file.write(names_str)
    file.close()

'''
path: folder path to save datasetNames.names files
class_names: returned by build_labelref_list function, contains class names list
'''

path = '../YOWO/data/'
fileName = 'restaurant.names'
build_class_names(NClasses_names, path, fileName)