In [26]:
import numpy as np

from tools.data_loading import load_images, load_labels, dummy_code
from tools.feature_learning import pins_generation
from tools.kernels import kernel_matrix
from tools.optimization import find_f
from tools.prediction import pred
from tools.process_images import process_images
from tools.quantization import kmeans
from tools.submission import labels_to_csv
from tools.visualization import imshow, dump_as_png

# Data loading

In [27]:
#X_train = load_images(type="train")
n_train = 5000#X_train.shape[0]
Y_labels_train = load_labels()
Y_train = dummy_code(Y_labels_train)
n_classes = Y_train.shape[1]

# Data separation

In [28]:
indices = np.random.permutation(n_train)
training_idx, test_idx = indices[:int(0.9*n_train)], indices[int(0.9*n_train):]

# Visual features

In [29]:
ratio = 10
patch = 16
filter_s = 1
resized = True

pins_dict_train = pins_generation(training_idx=training_idx, ratio_pins_per_image=ratio, patch_size=patch, filter_sigma=filter_s, resized=resized)
pins_train = pins_dict_train["pins"]
train_pins = pins_dict_train["train_pins"]
pin_to_im_train = pins_dict_train["pin_to_im"]
pins_mat = np.vstack(train_pins)

In [30]:
n_pins = pins_mat.shape[0]
sample_indices = np.random.choice(n_pins, n_pins//4)
visual_features = kmeans(pins_mat[sample_indices], 100)

Wrongly clusterized pins: 40409
Wrongly clusterized pins: 16809
Wrongly clusterized pins: 8417
Wrongly clusterized pins: 5248
Wrongly clusterized pins: 3731
Wrongly clusterized pins: 2937
Wrongly clusterized pins: 2456
Wrongly clusterized pins: 2019
Wrongly clusterized pins: 1647
Wrongly clusterized pins: 1298
Wrongly clusterized pins: 1102
Wrongly clusterized pins: 931
Wrongly clusterized pins: 812
Wrongly clusterized pins: 708
Wrongly clusterized pins: 612
Wrongly clusterized pins: 540
Wrongly clusterized pins: 468
Wrongly clusterized pins: 382
Wrongly clusterized pins: 318
Wrongly clusterized pins: 290
Wrongly clusterized pins: 257
Wrongly clusterized pins: 224
Wrongly clusterized pins: 217
Wrongly clusterized pins: 192
Wrongly clusterized pins: 213
Wrongly clusterized pins: 215
Wrongly clusterized pins: 214
Wrongly clusterized pins: 226
Wrongly clusterized pins: 209
Wrongly clusterized pins: 225
Wrongly clusterized pins: 201
Wrongly clusterized pins: 183
Wrongly clusterized pins: 1

# Data processing

In [31]:
X_train = process_images(n_train, visual_features, pins_train, pin_to_im_train)
n_train, n_var = X_train.shape

# Data separation (bis)

In [41]:
X_sample = X_train[training_idx, :]
n_sample = X_sample.shape[0]
X_test = X_train[test_idx, :]
n_test = X_test.shape[0]
Y_sample = Y_train[training_idx, :]


# Normalization

In [37]:
X_sample = np.sqrt(X_sample)
X_test = np.sqrt(X_test)

X_sample = X_sample / np.linalg.norm(X_sample, axis=1)[:, None]
X_test = X_test / np.linalg.norm(X_test, axis=1)[:, None]


# Training

## Kernel choice

In [50]:
kernel_type = "rbf"
sig = 0.1
K_sample = kernel_matrix(X_sample, kernel_type=kernel_type,sigma=sig)

## Classifier choice

In [51]:
classifier_type = "svm"
alpha = np.zeros((n_classes, n_sample))
for dig in range(n_classes):
    alpha[dig, :] = find_f(K_sample, Y_sample[:, dig],
                           prob_type=classifier_type, lamb=10)

# Evaluation

In [52]:
Y_pred = np.zeros((X_test.shape[0], n_classes))
for dig in range(n_classes):
    Y_pred[:, dig] = pred(X_sample, X_test, alpha[dig, :],
                          kernel_type=kernel_type, sigma=sig)


Y_labels_pred = np.argmax(Y_pred, axis=1)
prec = np.mean(Y_labels_pred == Y_labels_train[test_idx])
print("The precision on the test set is of {}".format(prec))

The precision on the test set is of 0.11


# Prediction

In [12]:
X_eval = load_images(type="test")
n_eval = X_eval.shape[0]

# Visual features for submission
pins_dict_eval = pins_generation(data_type="test",ratio_pins_per_image=ratio, patch_size=patch, filter_ssigma= filter_s, resized=resized)
pins_eval = pins_dict_eval["pins"]
pin_to_im_eval = pins_dict_eval["pin_to_im"]


# Data processing
X_eval = process_images(n_eval, visual_features, pins_eval, pin_to_im_eval)
n_eval, n_var = X_eval.shape

Y_eval = np.zeros((n_eval, n_classes))
for dig in range(n_classes):
    Y_eval[:, dig] = pred(X_sample, X_eval, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_eval = np.argmax(Y_eval, axis=1)


# Submission
labels_to_csv(Y_labels_eval, kernel=kernel_type, algo=classifier_type, user='PdT')