## Optimizing `max_distance` and `hash_function` parameters of `ImageHashPageDetector`

In [1]:
from itertools import product

from video699.configuration import get_configuration
from video699.video.annotated import (
    AnnotatedSampledVideoScreenDetector,
    evaluate_event_detector,
    get_videos,
)
from video699.page.imagehash import ImageHashPageDetector
from video699.quadrangle.rtree import RTreeDequeConvexQuadrangleTracker
from video699.event.screen import ScreenEventDetector


CONFIGURATION = get_configuration()['ImageHashPageDetector']

In [2]:
def accuracy(max_distance, hash_function):
    CONFIGURATION['max_distance'] = str(max_distance)
    CONFIGURATION['hash_function'] = hash_function
    num_successes_total = 0
    num_trials_total = 0
    for annotated_video in get_videos().values():
        convex_quadrangle_tracker = RTreeDequeConvexQuadrangleTracker(2)
        screen_detector = AnnotatedSampledVideoScreenDetector()
        documents = annotated_video.documents.values()
        page_detector = ImageHashPageDetector(documents)
        screen_event_detector = ScreenEventDetector(
            annotated_video,
            convex_quadrangle_tracker,
            screen_detector,
            page_detector
        )
        num_successes, num_trials = evaluate_event_detector(annotated_video, screen_event_detector)
        num_successes_total += num_successes
        num_trials_total += num_trials
    accuracy = 1.0 * num_successes_total / num_trials_total
    return accuracy

In [3]:
%%time
max_distances = range(0, 64 + 1, 1)
hash_functions = ['average_hash', 'phash', 'dhash', 'whash']
parameters = list(product(max_distances, hash_functions))
accuracies = [accuracy(*parameter) for parameter in parameters]

best_accuracy, (best_max_distance, best_hash_function) = max(zip(accuracies, parameters))
print('Optimal parameters (accuracy {}):'.format(best_accuracy))
print('- max_distance:  {}'.format(best_max_distance))
print('- hash_function: {}'.format(best_hash_function))

Optimal parameters (accuracy 0.37163814180929094):
- max_distance:  22
- hash_function: phash
CPU times: user 1d 18h 46min 51s, sys: 9h 12min 36s, total: 2d 3h 59min 28s
Wall time: 2d 1h 32min 36s
