# Evaluation

In [189]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [190]:
import pandas as pd
import numpy as np
from src import preprocess as prep
from src.evaluation import pick_test_segments, generate_test_segments, evaluate_segments
from src.Video import Video
from src.Segment import Segment
from src import search
import random
import cv2

## Parameters

In [252]:
NUM_VIDEOS = 100
GRID_SIZE = 3
BINS = [180, 256]
HIST_FRAME_SKIP = 20
REFRESH = False

# vergeet gebruikte params soms dus print ze maar afentoe
def printParams():
    print('Num. Vid {} - Grid {} - Bins {} - Skip {}'.format(NUM_VIDEOS, GRID_SIZE, BINS, HIST_FRAME_SKIP))

## Load training set / generate test set

In [253]:
printParams()
training_set = prep.load_training_set(range(1, NUM_VIDEOS+1), GRID_SIZE, BINS, HIST_FRAME_SKIP, force_refresh=REFRESH)

Num. Vid 100 - Grid 3 - Bins [180, 256] - Skip 20
Loading / processing dataset...
Done processing!

In [273]:
# Set of 100 custom fragments with duration 20sec
test_set, labels = generate_test_segments(training_set, n=100, duration=20)

no histststs
no histststs
no histststs
no histststs
no histststs


In [271]:
# Print statistics
print("TRAINING SET:")
print("Num. videos:   {:d}".format( len(training_set)) )
print("Num. segments: {:d}".format( np.sum([len(video.segments) for video in training_set])) )
print("Duration:      {:,.1f} s".format( np.sum([np.sum([segment.duration() for segment in video.segments]) for video in training_set])) )
print("Num frames:      {:d}".format( np.sum([np.sum([segment.num_frames() for segment in video.segments]) for video in training_set])) )
print("Num histograms:      {:d}".format( np.sum([np.sum([len(segment.histograms) for segment in video.segments]) for video in training_set])) )

print("TEST SET:")
print("Size: {:d}".format( len(test_set) ))

TRAINING SET:
Num. videos:   100
Num. segments: 12694
Duration:      48,647.6 s
Num frames:      1347680
Num histograms:      73522
TEST SET:
Size: 100


# Small manual test

In [264]:
pr = False
for i in range(10):
#     x = random.choice(range(len(test_set[i])))
    found = search.findFrame(test_set[i][0], training_set, cv2.HISTCMP_CHISQR, 2, prints= pr, warnings=pr)
    print('Found {} - Expected {}'.format(found, labels[i]))

Found ('00090.mp4', 2402, 2964) - Expected ('00090.mp4', 2391, 2870)
Found ('00031.mp4', 6088, 6639) - Expected ('00031.mp4', 6087, 6566)
Found ('00086.mp4', 474, 1113) - Expected ('00086.mp4', 468, 968)
Found ('00050.mp4', 9334, 9907) - Expected ('00050.mp4', 9292, 9792)
Found ('00090.mp4', 4146, 4732) - Expected ('00090.mp4', 4034, 4513)
Found ('00066.mp4', 6797, 7405) - Expected ('00066.mp4', 6739, 7338)
Found ('00034.mp4', 1194, 1737) - Expected ('00034.mp4', 1183, 1662)
Found ('00070.mp4', 12716, 13280) - Expected ('00070.mp4', 12477, 12977)
Found ('00091.mp4', 3626, 4254) - Expected ('00091.mp4', 3563, 4042)
Found ('00078.mp4', 1721, 2994) - Expected ('00078.mp4', 1755, 2255)


## Run model on test set

In [261]:
for method in [cv2.HISTCMP_CORREL, cv2.HISTCMP_CHISQR, cv2.cv2.HISTCMP_INTERSECT,
               cv2.HISTCMP_BHATTACHARYYA, cv2.HISTCMP_CHISQR_ALT, cv2.HISTCMP_KL_DIV]:
    %timeit -n 10 search.findFrame(test_set[0][0], training_set, method, warnings = False)

# for ch in [[0], [1], [0, 1]]:
#     print('{}'.format(ch))
#     %timeit -n 10 search.findFrame(test_set[0], training_set, cv2.HISTCMP_CORREL, channels=ch)


In [274]:
results = []

for i, histogram in enumerate(test_set):
    print('\rSearching segment {}/{}'.format(i+1, len(test_set), len(histogram), end='', flush=True))
    
    results.append(search.findFrame(histogram[0], training_set, cv2.HISTCMP_CHISQR_ALT, 2, warnings = False))

Searching segment 1/100 - Histograms 37
Searching segment 2/100 - Histograms 26
Searching segment 3/100 - Histograms 49
Searching segment 4/100 - Histograms 36
Searching segment 5/100 - Histograms 31
Searching segment 6/100 - Histograms 26
Searching segment 7/100 - Histograms 37
Searching segment 8/100 - Histograms 33
Searching segment 9/100 - Histograms 30
Searching segment 10/100 - Histograms 37
Searching segment 11/100 - Histograms 38
Searching segment 12/100 - Histograms 43
Searching segment 13/100 - Histograms 25
Searching segment 14/100 - Histograms 27
Searching segment 15/100 - Histograms 41
Searching segment 16/100 - Histograms 33
Searching segment 17/100 - Histograms 36
Searching segment 18/100 - Histograms 17
Searching segment 19/100 - Histograms 37
Searching segment 20/100 - Histograms 32
Searching segment 21/100 - Histograms 31
Searching segment 22/100 - Histograms 37
Searching segment 23/100 - Histograms 37
Searching segment 24/100 - Histograms 11
Searching segment 25/100 

## Evaluate performance

In [275]:
evaluate_segments(results, labels)

Segment evaluation:
Correct movies: 91
Wrong movies:   9
Total:   100
Start frame distance (correct movies only):   9366
Avg Start frame distance (correct movies only):   93.660000
TPR:     91.0%
