# Evaluation

In [83]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [84]:
import pandas as pd
import numpy as np
from src import preprocess as prep
import random
from src.Video import Video
from src.Segment import Segment
from src import search
import cv2

## Parameters

In [95]:
NUM_VIDEOS = 200
GRID_SIZE = 2
BINS = [180, 180]

## Load training set

In [96]:
training_set_generator = prep.load_training_set(range(1, NUM_VIDEOS+1), GRID_SIZE, BINS)

Loading / processing dataset...
Done processing!

In [97]:
training_set = list(training_set_generator)

In [98]:
# Print statistics
print("TRAINING SET:")
print("Num. videos:   {:d}".format( len(training_set)) )
print("Num. segments: {:d}".format( np.sum([len(video.segments) for video in training_set])) )
print("Duration:      {:,.1f} s".format( np.sum([np.sum([segment.duration() for segment in video.segments]) for video in training_set])) )

TRAINING SET:
Num. videos:   200
Num. segments: 30070
Duration:      97,332.7 s


## Select random test set

In [99]:
test_n_segments = 1000
test_set = []
labels = []

for i in range(test_n_segments):
    
    # Find random video
    video = random.choice(training_set)
    
    # Select random segment and add histogram to test set
    segment = random.choice(video.segments)
    test_set.append(segment.histograms)
    labels.append(segment)

In [100]:
# Print statistics
print("TEST SET:")
print("Num. histograms: {:d}".format( len(test_set) ))

TEST SET:
Num. histograms: 1000


<br><br>

## Run model on test set

In [101]:
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_CORREL)
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_CHISQR_ALT)
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_INTERSECT)
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_BHATTACHARYYA)
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_CHISQR_ALT)
%timeit search.find(test_set[0], training_set, cv2.HISTCMP_KL_DIV)

126 ms ± 827 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
153 ms ± 781 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
121 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
134 ms ± 500 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
159 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


SyntaxError: invalid syntax (<unknown>, line 1)

In [None]:
results = []

for i, histogram in enumerate(test_set):
    print('{}/{} searches'.format(i, len(test_set)), end = '\r')
    results.append(search.find(histogram, training_set, cv2.HISTCMP_INTERSECT))

163/1000

## Evaluate performance

In [103]:
movie_correct = 0
movie_wrong = 0

for segment, label in zip(results, labels):
    
    # Check if movie is correct
    if segment == label:
        movie_correct += 1
    else:
        movie_wrong += 1

total = movie_correct + movie_wrong
fraction = movie_correct / total if total > 0 else 0

print("Correct: {:d}".format(movie_correct))
print("Wrong:   {:d}".format(movie_wrong))
print("Total:   {:d}".format(total))
print("TPR:     {:.1f}%".format(movie_correct / total * 100))

Correct: 960
Wrong:   40
Total:   1000
TPR:     96.0%
