## Optimizing `significance_level`, `correlation_threshold`, `use_homography`, `num_features`, `good_match_percentage` parameters of `RollingPearsonPageDetector`

In [None]:
from itertools import product

from video699.configuration import get_configuration
from video699.video.annotated import (
    AnnotatedSampledVideoScreenDetector,
    evaluate_event_detector,
    get_videos,
)
from video699.page.pearson import RollingPearsonPageDetector
from video699.quadrangle.rtree import RTreeDequeConvexQuadrangleTracker
from video699.event.screen import ScreenEventDetector

import numpy as np


CONFIGURATION = get_configuration()['RollingPearsonPageDetector']

In [None]:
def accuracy(significance_level, correlation_threshold, use_homography, num_features, good_match_percentage):
    CONFIGURATION['significance_level'] = str(significance_level)
    CONFIGURATION['correlation_threshold'] = str(correlation_threshold)
    CONFIGURATION['use_homography'] = str(use_homography)
    CONFIGURATION['num_features'] = str(num_features)
    CONFIGURATION['good_match_percentage'] = str(good_match_percentage)
    num_successes_total = 0
    num_trials_total = 0
    for annotated_video in get_videos().values():
        convex_quadrangle_tracker = RTreeDequeConvexQuadrangleTracker(2)
        screen_detector = AnnotatedSampledVideoScreenDetector()
        documents = annotated_video.documents.values()
        page_detector = RollingPearsonPageDetector(documents)
        screen_event_detector = ScreenEventDetector(
            annotated_video,
            convex_quadrangle_tracker,
            screen_detector,
            page_detector
        )
        num_successes, num_trials = evaluate_event_detector(annotated_video, screen_event_detector)
        num_successes_total += num_successes
        num_trials_total += num_trials
    accuracy = 1.0 * num_successes_total / num_trials_total
    return accuracy

In [None]:
%%time
significance_levels = [0.01, 0.05, 0.1]
correlation_thresholds = np.linspace(0.0, 1.0, num=10)
uses_homography = [True, False]
nums_features = [5, 10, 20, 40, 80, 160, 320, 640, 1280]
good_match_percentages = np.linspace(0.0, 1.0, num=10)

parameters = []
for significance_level in significance_levels:
    for correlation_threshold in correlation_thresholds:
        for use_homography in uses_homography:
            for num_features in nums_features if use_homography else [nums_features[0]]:
                for good_match_percentage in good_match_percentages if use_homography else [good_match_percentages[0]]:
                    parameter = (significance_level, correlation_threshold, use_homography, num_features, good_match_percentage)
                    parameters.append(parameter)
accuracies = [accuracy(*parameter) for parameter in parameters]

best_accuracy, (
    best_significance_level,
    best_correlation_threshold,
    best_use_homography,
    best_num_features,
    best_good_match_percentage,
) = max(zip(accuracies, parameters))
print('Optimal parameters (accuracy {}):'.format(best_accuracy))
print('- significance_level:    {}'.format(best_significance_level))
print('- correlation_threshold: {}'.format(best_correlation_threshold))
print('- use_homography:        {}'.format(best_use_homography))
if best_use_homography:
    print('- num_features:          {}'.format(best_num_features))
    print('- good_match_percentage: {}'.format(best_good_match_percentage))