### Get instructions

In [18]:
instruction_dict = {'up': 0, 'right': 1, 'down': 2, 'left': 3}
reverse_instruction_dict = dict((v,k) for (k,v) in instruction_dict.items())
def decode ( sequence ):
    return [reverse_instruction_dict[s] for s in sequence]

In [24]:
import cv2
def get_instruction_btw_frame( prev_frame, cur_frame ):
    diff = abs(prev_frame.astype(np.int32) - cur_frame.astype(np.int32))
    vals = np.argwhere(diff >= 20)
    
    points = set()
    for val in vals:
        points.add(tuple(val[:2]))
    
    # if prev_frame[points[0]] ~ (255,255,255), points[0] is target, points[1] is start
    # otherwise reverse
    
    points = list(points)
    
    if np.sum(abs(prev_frame[points[0]].astype(np.int32) - np.array([255, 255, 255]))) > 20:
        source = np.array(points[0])
        target = np.array(points[1])
    else:
        source = np.array(points[1])
        target = np.array(points[0])

        
    if np.array_equal(target - source , [0, 1]):
        return 1
    if np.array_equal(target - source , [0, -1]):
        return 3
    if np.array_equal(target - source , [1, 0]):
        return 2
    if np.array_equal(target - source , [-1, 0]):
        return 0
        

def get_instructions ( video_path, debug = True ):
    cap = cv2.VideoCapture(video_path)
    
    prev_frame = None
    instructions = []

    counter = 0
    while (cap.isOpened()):
        ret, frame = cap.read()

        if not ret:
            break
            
        c_frame = crop_and_resize(frame)
        
        # Get the instruction between prev_frame and frame
        if prev_frame is not None:
            instruction = get_instruction_btw_frame (prev_frame, c_frame)
            instructions.append(instruction)

        counter += 1

        prev_frame = c_frame
        
    #pl.imshow(prev_frame)
    
    if debug :
        print ('There are %d frames ' % counter)
        print ('The list of instructions are %s ' % decode( instructions ))
    
    return decode( instructions )

ModuleNotFoundError: No module named 'cv2'

### Get puzzle_to_instructions dict

In [21]:
from collections import defaultdict
import os

puzzle_to_instructions = defaultdict(list)

with open(os.path.join('..', 'annotation.csv'), 'r') as fh:
    for line in fh:
        parts = line.split(',')
        
        puzzle = int(parts[1])
        text = ' '.join( parts[2:] )
        
        text = text.replace ('-', ' ').strip()
        
        if text != '':
            puzzle_to_instructions[puzzle].append(text)

with open(os.path.join('..', 'annotation2.csv'), 'r') as fh:
    for line in fh:
        parts = line.split(',')
        
        puzzle = int(parts[1])
        text = ' '.join( parts[2:] )
        
        text = text.replace ('-', ' ').strip()
        
        if text != '':
            puzzle_to_instructions[puzzle].append(text)

### Create training dataset

In [25]:
src_file = 'train_instructions.txt'
tgt_file = 'train_commands.txt'
with open(os.path.join('..', 'new_data', src_file), 'w') as fh:
    with open(os.path.join('..', 'new_data', tgt_file), 'w') as fh2:
        
        for puzzle in range(200):
            instructions = puzzle_to_instructions[puzzle]
            
            directory = puzzle // 100
            
            commands = get_instructions(os.path.join('..', 'target', str(directory), str(puzzle) + '.mp4'), debug = False)
            
            for instruction in instructions[:-1]:
                fh.write(instruction.lower())
                fh.write('\n')
                fh2.write(' '.join(commands))
                fh2.write('\n')

### Create validation dataset

In [None]:
src_file = 'eval_instructions.txt'
tgt_file = 'eval_commands.txt'
with open(os.path.join('..', 'data', src_file), 'w') as fh:
    with open(os.path.join('..', 'data', tgt_file), 'w') as fh2:
        
        for puzzle in range(200):
            instructions = puzzle_to_instructions[puzzle]
            
            directory = puzzle // 100
            
            commands = get_instructions(os.path.join('..', 'target', str(directory), str(puzzle) + '.mp4'), debug = False)
            
            for instruction in instructions[-1]:
                fh.write(instruction.lower())
                fh.write('\n')
                fh2.write(' '.join(commands))
                fh2.write('\n')

### Create testing dataset

In [None]:
src_file = 'test_instructions.txt'
tgt_file = 'test_commands.txt'
with open(os.path.join('..', 'data', src_file), 'w') as fh:
    with open(os.path.join('..', 'data', tgt_file), 'w') as fh2:
        
        for puzzle in range(200, 300):
            instructions = puzzle_to_instructions[puzzle]
            
            directory = puzzle // 100
            
            commands = get_instructions(os.path.join('..', 'target', str(directory), str(puzzle) + '.mp4'), debug = False)
            
            for instruction in instructions[-1]:
                fh.write(instruction.lower())
                fh.write('\n')
                fh2.write(' '.join(commands))
                fh2.write('\n')