In [None]:
import argparse
import numpy as np
from get_data import main
from cnn.training import get_trained_model
from knn.knn import KNN

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--config', help='Path to the config file', default='./default_config.yml')
parser.add_argument('--log-dir', help='Path to the log directory', default='../log/')
parser.add_argument('--posters', help='Path to the posters', default='../data/posters/')
parser.add_argument('--models-dir', help='Path to the saved models', default='../data/models/')
parser.add_argument('--sets-dir', help='Path to the training and testing sets', default='../data/sets/')
parser.add_argument('--database', help='Path to the databse csv', default='../data/poster_data.csv')
parser.add_argument('--csv', help='Path to the clean csv', default='../data/')
parser.add_argument('-s', '--save', help='Save model', action='store_true')
parser.add_argument('-v', '--verbose', help='Verbose', action='store_true')

str_args = '-v -s'.split()
args, _ = parser.parse_known_args(str_args)

In [None]:
clean_movies, train_posters, train_genres, train_ids, test_posters, test_genres, test_ids, model_name, save_model, verbose = main(args)

In [None]:
resnet, _ = get_trained_model(model_name)

In [None]:
train_features_resnet = resnet.predict(train_posters).reshape((len(train_posters), -1))
test_features_resnet = resnet.predict(test_posters).reshape((len(test_posters), -1))

# XGBoost

In [None]:
import xgboost as xgb
from xgboost import XGBClassifier

max_depth = 2 #testé avec 5: pas mieux, 0.336

train_genres_flat = np.array([np.nonzero(x)[0] for x in train_genres]).squeeze()
boost = XGBClassifier(max_depth=max_depth, objective='multi:softmax', num_classes=7)
boost.fit(train_features_resnet, train_genres_flat)

In [None]:
predicted_genres = boost.predict(test_features_resnet)
test_genres_flat = np.array([np.nonzero(x)[0] for x in test_genres]).squeeze()

In [None]:
boost.save_model('first.model')
np.mean(test_genres_flat == predicted_genres)

# SVM

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


SC = StandardScaler()
SC.fit(train_features_resnet)
X = SC.transform(train_features_resnet)
X2 = SC.transform(test_features_resnet)

pca = PCA(n_components=400)
pca.fit(X)

X_train = pca.transform(X)
X_test = pca.transform(X2)

In [None]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier # Regarder les autres multiclass possibles
# J'ai testé vite fait OneVsOne: pb de dimension, autre format d'input attendu

svc = OneVsRestClassifier(SVC(gamma='auto'))
svc.fit(X_train, train_genres)

In [None]:
from utils.accuracy import mono_label

predicted_genres = svc.predict(X_test)
mono_label(test_genres, predicted_genres) #0.14 pour 100 et 400 composantes conservées...

# KNN 

In [None]:
k = 7

knn = KNN(k)
knn.fit(train_features_resnet, train_genres)

In [None]:
from utils.accuracy import mono_label

predicted_genres = knn.predict(test_features_resnet, train_genres)
mono_label(test_genres, predicted_genres)

# Analyse des résultats

In [None]:
import yaml

config = yaml.safe_load(open(args.config))
class_names = {config['genres'][k]: k for k in config['genres'].keys()}

In [None]:
from utils.display.results_display import plot_neighbors
%matplotlib inline

starting_index = np.random.randint(0, 700)
num_images = 5
plot_neighbors(test_posters, test_genres, class_names, predicted_genres, starting_index, num_images, train_posters, train_genres, knn.neighbors)

In [None]:
from utils.display.results_display import histogram

histogram(test_genres, predicted_genres, k, config['genres'])

Affichage de la matrice de confusion

In [None]:
from utils.display.results_display import ConfusionMatrix_display

title = 'ResNet+kNN, k='+str(k) 
ConfusionMatrix_display(test_genres, predicted_genres, config['genres'], title)