In [None]:
import argparse
import numpy as np
from get_data import main
from knn.knn import KNN
from utils.display.results_display import plot_neighbors, ConfusionMatrix_display
from matplotlib import cm 

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--config', help='Path to the config file', default='./default_config.yml')
parser.add_argument('--log-dir', help='Path to the log directory', default='../log/')
parser.add_argument('--posters', help='Path to the posters', default='../data/posters/')
parser.add_argument('--models-dir', help='Path to the saved models', default='../data/models/')
parser.add_argument('--sets-dir', help='Path to the training and testing sets', default='../data/sets/')
parser.add_argument('--database', help='Path to the databse csv', default='../data/poster_data.csv')
parser.add_argument('--csv', help='Path to the clean csv', default='../data/')
parser.add_argument('-s', '--save', help='Save model', action='store_true')
parser.add_argument('-v', '--verbose', help='Verbose', action='store_true')

str_args = '-v -s'.split()
args, _ = parser.parse_known_args(str_args)

In [None]:
clean_movies, train_posters, train_genres, train_ids, test_posters, test_genres, test_ids, model_name, save_model, verbose = main(args)

# Histogramme de couleurs

In [None]:
#Training sets
train_features_rgb_r = np.load('../data/features/histo_rgb_r_train.npy')
train_features_rgb_g= np.load('../data/features/histo_rgb_g_train.npy')
train_features_rgb_b = np.load('../data/features/histo_rgb_b_train.npy')
print('shape train rgb r', train_features_rgb_r.shape)
print('shape train rgb g', train_features_rgb_g.shape)
print('shape train rgb b', train_features_rgb_b.shape)

train_features_rgb = np.concatenate((train_features_rgb_r, train_features_rgb_g, train_features_rgb_b), axis=1)
print('Shape train rgb', train_features_rgb.shape)

#Testing sets
test_features_rgb_r = np.load('../data/features/histo_rgb_r_test.npy')
test_features_rgb_g = np.load('../data/features/histo_rgb_g_test.npy')
test_features_rgb_b = np.load('../data/features/histo_rgb_b_test.npy')
print('shape test rgb r', test_features_rgb_r.shape)
print('shape test rgb g', test_features_rgb_g.shape)
print('shape test rgb b', test_features_rgb_b.shape)

test_features_rgb = np.concatenate((test_features_rgb_r, test_features_rgb_g, test_features_rgb_b), axis=1)
print('Shape test rgb', test_features_rgb.shape)

In [None]:
# Training sets
train_features_lab_l = np.load('../data/features/histo_lab_l_train.npy')
train_features_lab_a= np.load('../data/features/histo_lab_a_train.npy')
train_features_lab_b = np.load('../data/features/histo_lab_b_train.npy')
print('shape train lab l', train_features_lab_l.shape)
print('shape train lab a', train_features_lab_a.shape)
print('shape train lab b', train_features_lab_b.shape)

train_features_lab = np.concatenate((train_features_lab_l, train_features_lab_a, train_features_lab_b), axis=1)

# Testing sets
test_features_lab_l = np.load('../data/features/histo_lab_l_test.npy')
test_features_lab_a = np.load('../data/features/histo_lab_a_test.npy')
test_features_lab_b = np.load('../data/features/histo_lab_b_test.npy')
print('shape test lab l', test_features_lab_l.shape)
print('shape test lab a', test_features_lab_a.shape)
print('shape test lab b', test_features_lab_b.shape)

test_features_lab = np.concatenate((test_features_lab_l, test_features_lab_a, test_features_lab_b), axis=1)

# KNN

In [None]:
k = 3

knn = KNN(k)
knn.fit(train_features_lab, train_genres)

In [None]:
from utils.accuracy import mono_label
# Make prediction for each poster in the testing set
predicted_genres = knn.predict(test_features_lab, train_genres)
# Calculate the accuracy of the predictions compared to the ground truth
mono_label(test_genres, predicted_genres)

# Display results

In [None]:
import yaml

config = yaml.safe_load(open(args.config))
class_names = {config['genres'][k]: k for k in config['genres'].keys()}

Display neighbors for a few members of the testing set

In [None]:
from utils.display.results_display import plot_neighbors
%matplotlib inline

starting_index = np.random.randint(0, 700)
num_images = 5
title = 'Histogramme LAB+kNN, k='+str(k) 
plot_neighbors(test_posters, test_genres, class_names, predicted_genres, starting_index, num_images, train_posters, train_genres, knn.neighbors, title)

Display confusion matrix

In [None]:
title = 'Histogramme LAB+kNN, k='+str(k) 
ConfusionMatrix_display(test_genres, predicted_genres, config['genres'], title)

In [None]:
from utils.display.results_display import histogram
histogram(test_genres, predicted_genres, k, config['genres'])

# PARTIE SUIVANTE PAS ADAPTÉE

In [None]:
# POUR LA METHODE RESNET :

RESNET = False #encore à faire

# POUR LES HISTOGRAMMES :

# On renseigne d'abord si on veut l'histogramme pour la couleur considérée, puis le nombre de bins
# Attention, les histogrammes avec le nombre de bins correspondants doivent avoir été calculés

RGB_R = False
RGB_R_bins = 256

RGB_G = False
RGB_G_bins = 256 

RGB_B = False
RGB_B_bins = 256 

LAB_L = True
LAB_L_bins = 16

LAB_A = True
LAB_A_bins = 16

LAB_B = True
LAB_B_bins = 16

In [None]:
obs_train = Observations(1) # 4 pour la distance
obs_test = Observations(1)

if RGB_R:
    obs_train.add_histo_feature(np.load('../data/features/histo_rgb_r_train_' + str(RGB_R_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_rgb_r_test_' + str(RGB_R_bins) + '.npy'))

if RGB_G:
    obs_train.add_histo_feature(np.load('../data/features/histo_rgb_g_train_' + str(RGB_G_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_rgb_g_test_' + str(RGB_G_bins) + '.npy'))

if RGB_B:
    obs_train.add_histo_feature(np.load('../data/features/histo_rgb_b_train_' + str(RGB_B_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_rgb_b_test_' + str(RGB_B_bins) + '.npy'))
    
if LAB_L:
    obs_train.add_histo_feature(np.load('../data/features/histo_lab_l_train_' + str(LAB_L_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_lab_l_test_' + str(LAB_L_bins) + '.npy'))
    
if LAB_A:
    obs_train.add_histo_feature(np.load('../data/features/histo_lab_a_train_' + str(LAB_A_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_lab_a_test_' + str(LAB_A_bins) + '.npy'))
    
if LAB_B:
    obs_train.add_histo_feature(np.load('../data/features/histo_lab_b_train_' + str(LAB_B_bins) + '.npy'))
    obs_test.add_histo_feature(np.load('../data/features/histo_lab_b_test_' + str(LAB_B_bins) + '.npy'))

In [None]:
# ATTENTION NE CONVIENT PLUS

k=7
obs_train.compute_distance()

predicted_genres = KNN(
    dataset=clean_movies,
    Xtr=train_posters,
    tr_features=obs_train.observations,
    Ytr=train_genres,
    training_ids=train_ids,
    Xtest=test_posters,
    test_features=obs_test.observations,
    testing_ids=test_ids,
    ind=-1,
    k=k,
    metric=obs_train.distance,
    print_results=False
)

In [None]:
histogram(test_genres, predicted_genres, k, config['genres'])