In [1]:
import numpy as np

from sklearn import linear_model
from tools.data_loading import load_images, load_labels, dummy_code, load_images_resized, store_resized_images
from tools.feature_learning import pins_generation
from tools.hog import hog
from tools.kernels import kernel_matrix
from tools.optimization import find_f
from tools.prediction import pred
from tools.process_images import process_images
from tools.quantization import kmeans
from tools.submission import labels_to_csv
from tools.visualization import imshow, dump_as_png, reshape_as_images

# Data loading

In [2]:
X_train = load_images_resized(type="train")
n_train = X_train.shape[0]
Y_labels_train = load_labels()
Y_train = dummy_code(Y_labels_train)
n_classes = Y_train.shape[1]
X_train = X_train - X_train.min(axis=0)
X_train = X_train / X_train.max(axis=0)

In [3]:
X_train2 = np.zeros((5000,63,63,3))
for i in range(3):
    X_train2[:,:,:,i] = X_train
image_list = X_train2

# Visual features

In [25]:
filter_sigma=0.1
filter_shape=5
hog_cell_size=15
disc_grid=16

In [26]:
n_images = image_list.shape[0]

In [None]:
hog_list = []
for i in range(n_images):
    image = image_list[i,:,:,:]
    hog_list.append(
        hog(
            image,
            filter_sigma=filter_sigma,
            filter_shape=filter_shape,
            hog_cell_size=hog_cell_size,
            disc_grid=disc_grid))
    
n_features = hog_list[0].size
X_hog = np.array(hog_list).reshape((n_images, n_features)) 


In [None]:
imshow(image_list[0])

In [None]:
X_hog.shape

# Data separation

In [11]:
indices = np.random.permutation(X_hog.shape[0])
training_idx, test_idx = indices[:int(0.8*n_train)], indices[int(0.8*n_train):]

X_sample = X_hog[training_idx, :]
n_sample = X_sample.shape[0]
Y_sample = Y_train[training_idx,:]
Y_labels_sample = Y_labels_train[training_idx]

X_test = X_hog[test_idx, :]
n_test = X_test.shape[0]
Y_labels_test = Y_labels_train[test_idx]

# Training

## Kernel choice

In [None]:
kernel_type = "hellinger"
K_sample = kernel_matrix(X_sample, kernel_type=kernel_type)

In [None]:
K_sample.shape

## Classifier choice

In [None]:
classifier_type = "svm"
alpha = np.zeros((n_classes, n_sample))
for dig in range(n_classes):
    alpha[dig, :] = find_f(K_sample, Y_sample[:, dig],
                           prob_type=classifier_type, lamb=1, n_iter=90000)

# Evaluation

### Training error

In [None]:
Y_pred_train = np.zeros((X_sample[:500].shape[0], n_classes))
for dig in range(n_classes):
    Y_pred_train[:, dig] = pred(X_sample, X_sample[:500], alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_pred_train = np.argmax(Y_pred_train, axis=1)
prec = np.mean(Y_labels_pred_train == Y_labels_sample[:500])
print("The precision on the train set is of {}".format(prec))

### Testing error

In [None]:
Y_pred = np.zeros((X_test.shape[0], n_classes))
for dig in range(n_classes):
    Y_pred[:, dig] = pred(X_sample, X_test, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_pred = np.argmax(Y_pred, axis=1)
prec = np.mean(Y_labels_pred == Y_labels_test)
print("The precision on the test set is of {}".format(prec))

# Prediction

In [17]:
#store_resized_images("test")

In [18]:
X_eval = load_images_resized(type="test")

In [19]:
X_eval = X_eval - X_eval.min(axis=0)
X_eval = X_eval / X_eval.max(axis=0)

In [20]:
n_eval = X_eval.shape[0]
X_eval2 = np.zeros((2000,63,63,3))
for i in range(3):
    X_eval2[:,:,:,i] = X_eval
image_list_eval = X_eval2
# Visual features for submission
hog_list_eval = []
for i in range(n_eval):
    image = image_list_eval[i,:,:,:]
    hog_list_eval.append(
        hog(
            image,
            filter_sigma=filter_sigma,
            filter_shape=filter_shape,
            hog_cell_size=hog_cell_size,
            disc_grid=disc_grid))
    
n_features = hog_list_eval[0].size
X_hog_eval = np.array(hog_list_eval).reshape((n_eval, n_features)) 

In [21]:
X_hog_eval.shape

(2000, 392)

In [22]:
Y_eval = np.zeros((n_eval, n_classes))
for dig in range(n_classes):
    Y_eval[:, dig] = pred(X_sample, X_hog_eval, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_eval = np.argmax(Y_eval, axis=1)


# Submission
labels_to_csv(Y_labels_eval, kernel=kernel_type, algo="svm_hog_63",user="Pierre")