In [1]:
import numpy as np

from tools.data_loading import load_images, load_labels, dummy_code
from tools.feature_learning import pins_generation
from tools.kernels import kernel_matrix
from tools.optimization import find_f
from tools.prediction import pred
from tools.process_images import process_images
from tools.quantization import kmeans, em_full
from tools.submission import labels_to_csv
from tools.visualization import imshow, dump_as_png

# Data loading

In [2]:
X_train = load_images(type="train")
n_train = X_train.shape[0]
Y_labels_train = load_labels()
Y_train = dummy_code(Y_labels_train)
n_classes = Y_train.shape[1]

# Data separation

In [3]:
indices = np.random.permutation(X_train.shape[0])
training_idx, test_idx = indices[:int(0.9*n_train)], indices[int(0.9*n_train):]

# Visual features

In [4]:
pins_dict_train = pins_generation(training_idx=training_idx, ratio_pins_per_image=33)
pins_train = pins_dict_train["pins"]
train_pins = pins_dict_train["train_pins"]
pin_to_im_train = pins_dict_train["pin_to_im"]
pins_mat = np.vstack(train_pins)

#pi, mu, sigma, labels = em_full(pins_mat, 100)
visual_features , _ = kmeans(pins_mat, 100)

Wrongly clusterized pins: 119610
Wrongly clusterized pins: 43998
Wrongly clusterized pins: 21652
Wrongly clusterized pins: 14790
Wrongly clusterized pins: 11306
Wrongly clusterized pins: 9077
Wrongly clusterized pins: 7598
Wrongly clusterized pins: 6377
Wrongly clusterized pins: 5491
Wrongly clusterized pins: 4867
Wrongly clusterized pins: 4397
Wrongly clusterized pins: 4122
Wrongly clusterized pins: 3740
Wrongly clusterized pins: 3523
Wrongly clusterized pins: 3229
Wrongly clusterized pins: 3059
Wrongly clusterized pins: 2929
Wrongly clusterized pins: 2730
Wrongly clusterized pins: 2563
Wrongly clusterized pins: 2450
Wrongly clusterized pins: 2247
Wrongly clusterized pins: 2067
Wrongly clusterized pins: 2003
Wrongly clusterized pins: 1875
Wrongly clusterized pins: 1748
Wrongly clusterized pins: 1648
Wrongly clusterized pins: 1598
Wrongly clusterized pins: 1537
Wrongly clusterized pins: 1499
Wrongly clusterized pins: 1440
Wrongly clusterized pins: 1397
Wrongly clusterized pins: 1340
Wr

# Data processing

In [5]:
X_proc = process_images(n_train, visual_features, pins_train, pin_to_im_train, gaussian_mixture=False)


# Color

In [6]:
X_color = X_train[np.random.choice(X_train.shape[0], 1000)].reshape((1000*32*32,3))
color_number = 20
color_perimage = 5
pi, mu, sigma, q = em_full(X_color, color_number)

Wrongly clusterized pins: 970812
Wrongly clusterized pins: 239573
Wrongly clusterized pins: 166839
Wrongly clusterized pins: 127994
Wrongly clusterized pins: 92771
Wrongly clusterized pins: 64650
Wrongly clusterized pins: 45265
Wrongly clusterized pins: 33502
Wrongly clusterized pins: 27799
after 2 iterations


In [9]:
X_boC = np.zeros((X_train.shape[0], color_number))
for i, image in enumerate(X_train):
    _, colors, _, _ = em_full(image.reshape((32*32,3)), color_perimage, verbose=False)
    color_dist = np.linalg.norm(mu[:, None, :] - colors, axis=2)
    X_boC[i, np.argmin(color_dist, axis=0)] = 1 / (np.min(color_dist, axis=0)+ 1)

  c *= 1. / np.float64(fact)
  c *= 1. / np.float64(fact)
  r = _umath_linalg.det(a, signature=signature)
  det = np.sqrt(np.abs(np.linalg.det(sig)))


# Data separation

In [15]:
# merging doesnt work !

X_merged = np.hstack((X_proc, X_boC))
X_sample = X_merged[training_idx, :]
n_sample = X_sample.shape[0]
X_test = X_merged[test_idx, :]
n_test = X_test.shape[0]
Y_sample = Y_train[training_idx, :]
#X_sample = np.sqrt(X_sample)
#X_test = np.sqrt(X_test)

#X_sample = X_sample / np.linalg.norm(X_sample, axis=1)[:, None]
#X_test = X_test / np.linalg.norm(X_test, axis=1)[:, None]


# Training

## Kernel choice

In [19]:
kernel_type = "linear"
K_sample = kernel_matrix(X_sample, kernel_type=kernel_type, sigma=1)

## Classifier choice

In [20]:
classifier_type = "linear regression"
alpha = np.zeros((n_classes, n_sample))
for dig in range(n_classes):
    alpha[dig, :] = find_f(K_sample, Y_sample[:, dig],
                           prob_type=classifier_type, lamb=1.)

# Evaluation

In [21]:
Y_pred = np.zeros((X_test.shape[0], n_classes))
for dig in range(n_classes):
    Y_pred[:, dig] = pred(X_sample, X_test, alpha[dig, :],
                          kernel_type=kernel_type, sigma=1)


Y_labels_pred = np.argmax(Y_pred, axis=1)
prec = np.mean(Y_labels_pred == Y_labels_train[test_idx])
print("The precision on the test set is of {}".format(prec))

The precision on the test set is of 0.096


# Prediction

In [None]:
X_eval = load_images(type="test")
n_eval = X_eval.shape[0]

# Visual features for submission
pins_dict_eval = pins_generation(data_type="test")
pins_eval = pins_dict_eval["pins"]
pin_to_im_eval = pins_dict_eval["pin_to_im"]


# Data processing
X_eval = process_images(n_eval, visual_features, pins_eval, pin_to_im_eval)
n_eval, n_var = X_eval.shape

Y_eval = np.zeros((n_eval, n_classes))
for dig in range(n_classes):
    Y_eval[:, dig] = pred(X_sample, X_eval, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_eval = np.argmax(Y_eval, axis=1)


# Submission
labels_to_csv(Y_labels_eval, kernel=kernel_type, algo=classifier_type, user='PdT')