In [1]:
import numpy as np
import sklearn as 

from tools.data_loading import load_images, load_labels, dummy_code
from tools.feature_learning import pins_generation
from tools.kernels import kernel_matrix
from tools.optimization import find_f
from tools.prediction import pred
from tools.process_images import process_images
from tools.quantization import kmeans
from tools.submission import labels_to_csv
from tools.visualization import imshow, dump_as_png

# Data loading

In [2]:
X_train = load_images(type="train")
n_train = X_train.shape[0]
Y_labels_train = load_labels()
Y_train = dummy_code(Y_labels_train)
n_classes = Y_train.shape[1]

# Data separation

In [3]:
indices = np.random.permutation(X_train.shape[0])
training_idx, test_idx = indices[:int(0.9*n_train)], indices[int(0.9*n_train):]

# Visual features

In [4]:
pins_dict_train = pins_generation(training_idx=training_idx)
pins_train = pins_dict_train["pins"]
train_pins = pins_dict_train["train_pins"]
pin_to_im_train = pins_dict_train["pin_to_im"]
pins_mat = np.vstack(train_pins)

In [8]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=280, n_jobs=-1,verbose=1)
km.fit(pins_mat)
visual_features = km.cluster_centers_
# visual_features = kmeans(pins_mat, 1000)

Initialization complete
Initialization complete
Initialization complete
Initialization complete
start iteration
done sorting
start iteration
done sorting
start iteration
done sorting
start iteration
done sorting
end inner loop
end inner loop
end inner loop
end inner loop
Iteration 0, inertia 1172902.64522
start iteration
done sorting
Iteration 0, inertia 1175888.13903
start iteration
done sorting
Iteration 0, inertia 1173588.54208
start iteration
done sorting
Iteration 0, inertia 1174934.61725
start iteration
done sorting
end inner loop
end inner loop
end inner loop
end inner loop
Iteration 1, inertia 1120378.8081
start iteration
done sorting
Iteration 1, inertia 1118671.62877
Iteration 1, inertia 1119334.31569
start iteration
start iteration
done sorting
done sorting
Iteration 1, inertia 1120715.50255
start iteration
done sorting
end inner loop
end inner loop
end inner loop
end inner loop
Iteration 2, inertia 1103472.61216
start iteration
done sorting
Iteration 2, inertia 1102094.1028

In [9]:
visual_features.shape

(280, 16)

# Data processing

In [10]:
X_train = process_images(n_train, visual_features, pins_train, pin_to_im_train)
n_train, n_var = X_train.shape

# Data separation (bis)

In [11]:
X_sample = X_train[training_idx, :]
n_sample = X_sample.shape[0]
X_test = X_train[test_idx, :]
n_test = X_test.shape[0]
Y_sample = Y_train[training_idx, :]


# Training

## Kernel choice

In [12]:
kernel_type = "hellinger"
K_sample = kernel_matrix(X_sample, kernel_type=kernel_type)

## Classifier choice

In [30]:
classifier_type = "linear regression"
alpha = np.zeros((n_classes, n_sample))
for dig in range(n_classes):
    alpha[dig, :] = find_f(K_sample, Y_sample[:, dig],
                           prob_type=classifier_type, lamb=100000000.0)

# Evaluation

In [31]:
X_sample.shape

(4500, 280)

In [32]:
Y_pred_test = np.zeros((X_sample[:1000].shape[0], n_classes))
for dig in range(n_classes):
    Y_pred_test[:, dig] = pred(X_sample, X_sample[:1000], alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_pred_test = np.argmax(Y_pred_test, axis=1)
prec = np.mean(Y_labels_pred_test == Y_labels_train[training_idx][:1000])
print("The precision on the train set is of {}".format(prec))

The precision on the train set is of 0.346


In [33]:
Y_pred = np.zeros((X_test.shape[0], n_classes))
for dig in range(n_classes):
    Y_pred[:, dig] = pred(X_sample, X_test, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_pred = np.argmax(Y_pred, axis=1)
prec = np.mean(Y_labels_pred == Y_labels_train[test_idx])
print("The precision on the test set is of {}".format(prec))

The precision on the test set is of 0.226


# Prediction

In [34]:
X_eval = load_images(type="test")
n_eval = X_eval.shape[0]

# Visual features for submission
pins_dict_eval = pins_generation(data_type="test")
pins_eval = pins_dict_eval["pins"]
pin_to_im_eval = pins_dict_eval["pin_to_im"]


# Data processing
X_eval = process_images(n_eval, visual_features, pins_eval, pin_to_im_eval)
n_eval, n_var = X_eval.shape

Y_eval = np.zeros((n_eval, n_classes))
for dig in range(n_classes):
    Y_eval[:, dig] = pred(X_sample, X_eval, alpha[dig, :],
                          kernel_type=kernel_type)


Y_labels_eval = np.argmax(Y_eval, axis=1)


# Submission
labels_to_csv(Y_labels_eval, kernel=kernel_type, algo=classifier_type)