In [None]:
%matplotlib inline

import numpy as np
import datetime, os
import pandas as pd
import math
import pickle
import matplotlib.pyplot as plt
import cv2
from scipy.misc import imresize

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

## Loading global functions from external file

In [None]:
%run functions.ipynb

print('importing functions done')

## Loading data

In [None]:
# Load Data
dataTrain = pickle.load(open("a1_dataTrain.pkl", 'rb'))
dataTest = pickle.load(open("a1_dataTest.pkl", 'rb'))

print('data loading done')

## Checking for false segmentation masks

In [None]:
train_count = len(dataTrain['rgb'])
test_count = len(dataTest['rgb'])
good_train_indices = [i for i in range(0, train_count) if dataTrain['segmentation'][i].sum() != 0]
good_test_indices = [i for i in range(0, test_count) if dataTest['segmentation'][i].sum() != 0]

print('found data with good segmentation masks')

## Generating features - local histograms on depth images

In [None]:
%run functions.ipynb

train_count = len(dataTrain['rgb'])
test_count = len(dataTest['rgb'])

# Number of parts in which to divide each depth image
x_parts = 8
y_parts = 6
bins = 5
std = 3
# Plus 1 because of adding subject label
n_features = x_parts * y_parts * bins + 1

X_train = np.zeros((train_count, n_features))
X_test = np.zeros((test_count, n_features))

for i in good_train_indices:
    img_depth = dataTrain['depth'][i]
    img_segmentation = dataTrain['segmentation'][i]
    subject_train = dataTrain['subjectLabels'][i]
    
    img_depth = rescaleDepthImage(img_depth, img_segmentation, std)
    img_depth, mask = cropImage(img_depth, img_segmentation)
    img_depth = imresize(img_depth*mask,(120,90))
    img_depth[90:,:] = 0
    img_depth[75:, 30:60] = 0
    
    X_train[i,:n_features-1] = part_histograms(img_depth, x_parts, y_parts, bins)
    X_train[i,n_features-1] = subject_train
    
    if i%1000 == 0: print(i)
    
for i in good_test_indices:
    img_depth = dataTest['depth'][i]
    img_segmentation = dataTest['segmentation'][i]
    subject_test = dataTest['subjectLabels'][i]

    img_depth = rescaleDepthImage(img_depth, img_segmentation, std)
    img_depth, mask = cropImage(img_depth, img_segmentation)
    img_depth = imresize(img_depth*mask,(120,90))
    img_depth[90:,:] = 0
    img_depth[75:, 30:60] = 0
    
    X_test[i,:n_features-1] = part_histograms(img_depth, x_parts, y_parts, bins)
    X_test[i,n_features-1] = subject_test
    
    if i%1000 == 0: print(i)
        
print('features generation done')

## Saving features to pickle files

In [None]:
pickle.dump(X_train, open( "train_features_histograms.p", "wb" ) )
pickle.dump(X_test, open( "test_features_histograms.p", "wb" ) )

print('saving features done')

## Training classifier - Random Forest

In [None]:
labels_train = dataTrain['gestureLabels'][:]

clf = train(X_train, labels_train, 'rf')

print(clf.best_estimator_)
print(clf.best_params_)
print(clf.best_score_)

print('training done')

## Predicting labels for test data

In [None]:
labels_test = clf.predict(X_test)

print('prediction done')

## Saving data

In [None]:
today = datetime.datetime.today()
date_format = "%d%m%y_%H%M%S"
date = today.strftime(date_format)

savepath = os.path.join("results", "output_" + date + ".txt")
with open(savepath, "w") as text_file:
    text_file.write("Date: " + str(today) + "\n")
    text_file.write("Parts: " + str(x_parts) + " " + str(y_parts) + "\n")
    text_file.write("Std: " + str(std) + "\n")
    text_file.write("Best estimator: " + str(clf.best_estimator_) + "\n")
    text_file.write("Best params: " + str(clf.best_params_) + "\n")
    text_file.write("Score: " + str(clf.best_score_) + "\n")

labels_test = labels_test.ravel()

nr = np.arange(1,test_count+1)

savepath = os.path.join("results", "submission_" + date + ".csv")
data = {"Id" : nr, "Prediction" : labels_test}
df = pd.DataFrame(data, columns=['Id', 'Prediction'])
df.to_csv(savepath, index=False)

print('results saved')