# Data Pre-Processing and Data Augmentation

In this file we read data from all participants. Using Open CV we detect blob of touch area and crop them out from original capacitive touch matrix and append in a new column in the same dataframe. 
Later after cropping we run filtering on the cropped images to remove noise and empty images (blob area less than 5). 

In [1]:
from numpy import *
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import os, sys

In [2]:
def blob_detection(cap_matrix):
    cap_matrix[cap_matrix < 0] = 0 #Negative pixel values are set to 0.
    
    image = np.array(abs(cap_matrix),dtype=np.uint8,copy = True)
    
    #Pixels below this threshold considered as noise.
    threshold=30
    ret, threshold = cv2.threshold (image,threshold,255,cv2.THRESH_BINARY)
    
    # Find contours will return contours from Threshold image.
    _ ,contours, _ = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    
    #If there is no contours return null coordinates.
    if (contours ==[]):
        rec_pts = [(0,0),(0,0)]

    else:
        c = max(contours, key = cv2.contourArea) #Determines maximum area contour.

        # determine the most extreme points along the contour
        extLeft  = tuple(c[c[:,:, 0].argmin()][0])
        extRight = tuple(c[c[:,:, 0].argmax()][0])
        extTop   = tuple(c[c[:,:, 1].argmin()][0])
        extBot   = tuple(c[c[:,:, 1].argmax()][0])

        alist = [extLeft,extRight,extTop,extBot]
        temp  = tuple(map(sorted, zip(*alist)))
        min_x, max_x, min_y, max_y = temp[0][0], temp[0][-1], temp[1][0], temp[1][-1]
        #print(min_x, max_x, min_y, max_y)

        extreme_left  = (min_x,min_y)
        extreme_right = (max_x,max_y)
    

        rec_pts = [extreme_left,extreme_right]
            
    return rec_pts

In [3]:
def data_augmentation(n):
    
    df_ud = df_new.copy(deep=True)
    
    for i in range(df_ud.shape[0]):
        df_ud.at[i,'Cropped_Matrix'] = np.flipud(df_new.Cropped_Matrix[i])

    df_original_axes = pd.concat([df_new,df_ud],ignore_index=True)

    df_lr = df_original_axes.copy(deep=True)

    for i in range(df_original_axes.shape[0]):
        df_lr.at[i,'Cropped_Matrix'] = np.fliplr(df_original_axes.Cropped_Matrix[i])
        
    df_final = pd.concat([df_original_axes,df_lr],ignore_index=True)
    print('Writing data for Participant '+str(n))
    
    #Save augmented data file for individual participant.
    df_final.to_pickle('DataSet_Phalanx/03_Augmented_DataSet/data'+str(n)+'.pkl')


In [None]:
%%time

#Read data files of all participants for Pre-processing.
rec_pts = np.zeros(2)

for i in range(1,26):
    
    DATA_PATH ='DataSet_Phalanx/01_Input_Data/data'+str(i)+'.pkl'
    print('Reading data from Participant '+str(i)+'.')
    data_frame = pd.read_pickle(DATA_PATH)
    cropped = []
    
    for j,matrix in enumerate(data_frame.Matrix):
       
        cap_matrix = reshape(matrix,(27,15))
        rec_pts = blob_detection(cap_matrix) #Run blob detection on reshaped capacitive matrix.  
        
        #Cropping capacitive matrix with blob coordinates with offset of 1 extra row and column.
        crop_matrix = cap_matrix[rec_pts[0][1]-1:rec_pts[1][1]+1,rec_pts[0][0]-1:rec_pts[1][0]+1]
        cropped.append(crop_matrix)
        
    data_frame['Cropped_Matrix'] = pd.Series(cropped)
    
    df_new = data_frame.copy(deep=False)
            
    counter =0
    
    for k,cm in enumerate(df_new.Cropped_Matrix):
        area_min = cm.shape[0]*cm.shape[1]
        if(area_min>5):
            pass # If area is greater than 5 pixel proceed as normal. 
        else:
            counter = counter +1 #will keep track of deleted images.
            df_new.drop(k, inplace=True) # Drop the corresponding row from data frame.
            
    df_new = df_new.reset_index()
    print("Number of noisy images deleted are - ",counter,'.')
    
    # Store individual participant's pre processed data as pickle before augmentation. 
    df_new.to_pickle('DataSet_Phalanx/02_Pre_Processed_Data/data_new'+str(i)+'.pkl')
    
    # Before running augmentation on data set, drop the following columns which are not necessary for training set.
    List = ['Handedness','Finger','index','Timestamp','Matrix']
    df_new=df_new.drop(List, axis=1)
    data_augmentation(i) 
    


Reading data from Participant 1.
Number of noisy images deleted are -  9559 .
Writing data for Participant 3
Reading data from Participant 4.
Number of noisy images deleted are -  12650 .
Writing data for Participant 4
Reading data from Participant 5.
Number of noisy images deleted are -  14757 .
Writing data for Participant 5
Reading data from Participant 6.
Number of noisy images deleted are -  15881 .
Writing data for Participant 6
Reading data from Participant 7.
Number of noisy images deleted are -  18617 .
Writing data for Participant 7
Reading data from Participant 8.
Number of noisy images deleted are -  11551 .
Writing data for Participant 8
Reading data from Participant 9.
Number of noisy images deleted are -  12818 .
Writing data for Participant 9
Reading data from Participant 10.
Number of noisy images deleted are -  13694 .
Writing data for Participant 10
Reading data from Participant 11.
Number of noisy images deleted are -  11618 .
Writing data for Participant 11
Reading