# Data Prep for FYP17 robotic grasping

In [1]:
import math
import numpy as np
import pandas as pd
import os
from collections import Counter
from random import shuffle
from tqdm import tqdm
import cv2
TRAIN_DIR='G:\FYPDATA5'
IMG_SIZE=80

## Creating Training Data for Image prediction
### 1.
In order to create training data, we run through every folder in the directory. and then every file within that folder. There are conditional statements which help identify whether it is looking at an image or a csv file. 

There is an index which increments every time a folder changes. This is to differentiate the same image name between folders. There is an increment of 2000 everytime this happens

### 2. 
Within every csv file. The angle, success and index are recorded. The angle is then converted into a one-hot encoded label format. Furthemore, the raw angle input is from -180 to 180. We have shifted this to 0 to 360 for ease of analysis. 
### 3. 
Within every image file. The m by m image matrix and the index is recorded.

In [2]:
##need to filter successful grasps only. 
def split_to_index(img_label):
    #splits grasp_0_0_colour.jpg to just the '0' in int
    return int(img_label.split('.')[0].split('_')[1])

In [3]:
def create_train_data():
    training_data = []
    label_data=[]
    img_data=[]
    n=0
    for folder in tqdm(os.listdir(TRAIN_DIR)):
        folder_increment=2000*n
        n=n+1 #each time it's a new folder. Increment by 2000
        for file in os.listdir(TRAIN_DIR+'/'+folder):
            if file.endswith(".csv"):
                index=split_to_index(file)+folder_increment
                grasp_csv=pd.read_csv(TRAIN_DIR+'/'+folder+'/'+file)
                angle=grasp_csv['Angle'][1]+180
                angle=int(round(angle/30)*30)
                success=grasp_csv['success'][1]
                if success==1:
                    success=[0,1]
                else:
                    success=[1,0]
                item=grasp_csv['item'][1]
                label_data.append([angle,success,item,index])

            # grab first image of every grasp
            if file.endswith("0_colour.jpg"):
                index=split_to_index(file)+folder_increment
                path = os.path.join(TRAIN_DIR+'/'+folder,file) ##path of the image TRAIN_DIR+img
            
                img = cv2.imread(path,cv2.IMREAD_GRAYSCALE) ##read img in grayscale
                #only append valid images.
                if img!=None:   
                    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE)) ##resize the img to 80,80
                    img_data.append([img,index])
    return label_data, img_data

In [4]:
[label_data,img_data]=create_train_data()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [04:00<00:00,  3.58s/it]


## Merge label_data and img_data

### 1. 
label_data and img_data are stored in a pd.Dataframe with labelled columns.
### 2.  
The DataFrames are then merged via the id column. 
### 3. 
All grasp data is selected. There is no filtering of whether it's success/failure, as this is only to train the image recognition model, which is seperate from determining angle. This is to ensure that the model trains on only sucess stories. It ain't 'picking' any bad habits.
### 4. 
Finally the dataframe is converted to a np.array via the .values method.
### 5.
It is then shuffled and saved to a .npy format.

In [5]:
cv2.imshow('iameg',img_data[0][0])
cv2.waitKey()

-1

In [6]:
img_data_df=pd.DataFrame(img_data,columns=['image','id'])
img_data_df.head()
#len(label_data_df.id.unique())

Unnamed: 0,image,id
0,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",0
1,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",100
2,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",101
3,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",102
4,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",103


In [7]:
label_data_df=pd.DataFrame(label_data,columns=['angle','success','item','id'])
label_data_df.head()
#len(label_data_df.id.unique())

Unnamed: 0,angle,success,item,id
0,60,"[1, 0]",ice_tray_2,0
1,240,"[1, 0]",ice_tray_2,1
2,150,"[1, 0]",ice_tray_2,10
3,150,"[0, 1]",ice_tray_2,100
4,210,"[1, 0]",ice_tray_2,101


In [8]:
final_data_df=img_data_df.merge(label_data_df,how='inner',on='id')
final_data_df.head()


Unnamed: 0,image,id,angle,success,item
0,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",0,60,"[1, 0]",ice_tray_2
1,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",100,150,"[0, 1]",ice_tray_2
2,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",101,210,"[1, 0]",ice_tray_2
3,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",102,270,"[0, 1]",ice_tray_2
4,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",103,270,"[1, 0]",ice_tray_2


In [9]:
bum=[]
count_unique=len(final_data_df['item'].unique())
n=0
for item in final_data_df['item'].unique():
    item_onehot=np.zeros(count_unique,dtype=int)
    item_onehot[n]=1
    bum.append([item,item_onehot])
    n=n+1
bum_df=pd.DataFrame(bum,columns=['item','item_onehot'])
bum_df

Unnamed: 0,item,item_onehot
0,ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,action_figurine,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,egg_tray,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
3,water_bottle,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]"
4,calculator,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]"
5,nothing,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]"
6,hairbrush,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
7,black_box,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
8,scissors,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
9,tennis_tube,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]"


In [10]:
final_data_df=final_data_df.merge(bum_df,how='outer',on='item')
final_data_df.head()

Unnamed: 0,image,id,angle,success,item,item_onehot
0,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",0,60,"[1, 0]",ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",100,150,"[0, 1]",ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",101,210,"[1, 0]",ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
3,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",102,270,"[0, 1]",ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
4,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...",103,270,"[1, 0]",ice_tray_2,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [11]:
final_data=final_data_df.values
shuffle(final_data)
np.save('training_data_all_360.npy', final_data)

In [5]:
int(round(344/30)*30)

330