In [1]:
import argparse
from get_data import main
from cnn.training import get_trained_model

In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('--config', help='Path to the config file', default='./default_config.yml')
parser.add_argument('--log-dir', help='Path to the log directory', default='../log/')
parser.add_argument('--posters', help='Path to the posters', default='../data/posters/')
parser.add_argument('--models-dir', help='Path to the saved models', default='../data/models/')
parser.add_argument('--sets-dir', help='Path to the training and testing sets', default='../data/sets/')
parser.add_argument('--database', help='Path to the databse csv', default='../data/poster_data.csv')
parser.add_argument('--csv', help='Path to the clean csv', default='../data/')
parser.add_argument('-s', '--save', help='Save model', action='store_true')
parser.add_argument('-v', '--verbose', help='Verbose', action='store_true')

str_args = '-v -s --config ./transfer_learning.yml'.split()
args, _ = parser.parse_known_args(str_args)

In [3]:
clean_movies, train_posters, train_genres, train_ids, test_posters, test_genres, test_ids, model_name, save_model, verbose = main(args)
clean_movies.set_index('allocine_id', inplace=True)

  5%|▍         | 541/11935 [00:00<00:02, 5407.36it/s]

Database already cleaned
Posters database downloading


 86%|████████▋ | 10319/11935 [00:07<00:03, 466.72it/s]

Error HTTP Error 404: Not Found with film 9532


100%|██████████| 11935/11935 [00:08<00:00, 1474.32it/s]


Database downloaded
Training and testing sets already made


In [4]:
custom_resnet, training_history = get_trained_model(model_name, train_posters, train_genres)

Model already trained
No training history


In [None]:
predicted_genres = custom_resnet.predict(test_posters)

In [None]:
from utils.display.training_display import plot_loss, plot_accuracy
from utils.accuracy import mono_label
%matplotlib inline

print(mono_label(test_genres, predicted_genres))
if training_history is not None:
    plot_loss(training_history)
    plot_accuracy(training_history)

In [None]:
import yaml
config = yaml.safe_load(open(args.config))
genres_inv = {config['genres'][k]: k for k in config['genres'].keys()}
print(genres_inv)

In [None]:
import numpy as np

# FOR MULTI LABEL
# predictions = [] 
# for pred in predicted_genres:
#     first3_index = np.argsort(pred)[-3:]
#     predictions.append([genres_inv[k] for k in first3_index])
# ground_truth = []
# for truth in test_genres:
#     first3_index = np.nonzero(truth)[0]
#     ground_truth.append([genres_inv[k] for k in first3_index])

predictions = np.array([genres_inv[k] for k in np.argmax(predicted_genres, axis=1)])
ground_truth = np.array([genres_inv[k] for k in np.argmax(test_genres, axis=1)])

In [None]:
r = np.random.permutation(100)[:10]
for i in r:
    print('{}: truth: {}, pred: {}'.format(clean_movies.loc[test_ids[i], 'title'], ground_truth[i], predictions[i]))
#     print('Ground truth: {}, real: {}'.format(ground_truth[i], movies.loc[test_ids[i], 'genres']))

In [None]:
genres = genres_inv.values()

results_per_genre = {
    genre_true : {genre_pred : 0 for genre_pred in genres}
    for genre_true in genres
}

total_per_genre = {
    genre : 0
    for genre in genres
}

n = len(predictions)
for i in range(n):
    results_per_genre[ground_truth[i]][predictions[i]] += 1
    total_per_genre[ground_truth[i]] += 1

for genre_true in genres:
    print(
        genre_true + " (" + str(total_per_genre[genre_true] / n) + "):"
    )
    for genre_pred in genres:
        print(
            "    " + genre_pred + ": " + str(
                results_per_genre[genre_true][genre_pred] / total_per_genre[genre_true]
            )
        )
    print()

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
from utils.display.results_display import plot_test_results

starting_index = 10
num_row, num_cols = 10, 4
plot_test_results(test_posters, test_genres, genres_inv, predicted_genres, starting_index, num_row, num_cols)