## Optimizing `num_features`, `min_feature_percentage`, `num_nearest_features`, and `min_page_vote_percentage` parameters of `LocalFeatureKNNPageDetector`

In [1]:
from itertools import product

from video699.configuration import get_configuration
from video699.video.annotated import (
    AnnotatedSampledVideoScreenDetector,
    evaluate_event_detector,
    get_videos,
)
from video699.page.feature import LocalFeatureKNNPageDetector
from video699.quadrangle.rtree import RTreeDequeConvexQuadrangleTracker
from video699.event.screen import ScreenEventDetector

import numpy as np


CONFIGURATION = get_configuration()['LocalFeatureKNNPageDetector']

In [2]:
def accuracy(num_features, min_feature_percentage, num_nearest_features, min_page_vote_percentage):
    CONFIGURATION['num_features'] = str(num_features)
    CONFIGURATION['min_feature_percentage'] = str(min_feature_percentage)
    CONFIGURATION['num_nearest_features'] = str(num_nearest_features)
    CONFIGURATION['min_page_vote_percentage'] = str(min_page_vote_percentage)
    num_successes_total = 0
    num_trials_total = 0
    for annotated_video in get_videos().values():
        convex_quadrangle_tracker = RTreeDequeConvexQuadrangleTracker(2)
        screen_detector = AnnotatedSampledVideoScreenDetector()
        documents = annotated_video.documents.values()
        page_detector = LocalFeatureKNNPageDetector(documents)
        screen_event_detector = ScreenEventDetector(
            annotated_video,
            convex_quadrangle_tracker,
            screen_detector,
            page_detector
        )
        num_successes, num_trials = evaluate_event_detector(annotated_video, screen_event_detector)
        num_successes_total += num_successes
        num_trials_total += num_trials
    accuracy = 1.0 * num_successes_total / num_trials_total
    return accuracy

In [None]:
%%time
nums_features = [5, 10, 20, 40, 80, 160, 320, 640, 1280]
min_feature_percentages = np.linspace(0.0, 1.0, num=50)
nums_nearest_features = [5, 10, 20, 40]
min_page_vote_percentages = np.linspace(0.0, 1.0, num=50)
parameters = list(product(nums_features, min_feature_percentages, nums_nearest_features, min_page_vote_percentages))
accuracies = [accuracy(*parameter) for parameter in parameters]

best_accuracy, (
    best_num_features,
    best_min_feature_percentage,
    best_num_nearest_features,
    best_min_page_vote_percentage,
) = max(zip(accuracies, parameters))
print('Optimal parameters (accuracy {}):'.format(best_accuracy))
print('- num_features:             {}'.format(best_num_features))
print('- min_feature_percentage:   {}'.format(best_min_feature_percentage))
print('- num_nearest_features:     {}'.format(best_num_nearest_features))
print('- min_page_vote_percentage: {}'.format(best_min_page_vote_percentage))