In [1]:
import cv2
import os
import random
import numpy as np
import time
import pickle
import pandas as pd

In [2]:
# hyper-parameters
ROOT_FOLDER = "..\.."
DATA_FOLDER = os.path.join(ROOT_FOLDER, "data")
TRAIN_FOLDER = os.path.join(DATA_FOLDER, "train_set")
TEST_FOLDER = os.path.join(DATA_FOLDER, "test_set")
PROCESSED_DATA = os.path.join(ROOT_FOLDER, 'processed_data')
TEMPLATE_FOLDER = os.path.join(PROCESSED_DATA, 'templates')

In [55]:
def match_template(
    input_path,
    template_path,
    category,
    path=True,
    method="pixel_count",
    debug_mode=False,
):
    """
    function that matches the image to a template.

    :input_path: str path to image
    :template_path: str path to template
    :method: str identifier of what method to use for matching
    :debug_mode: decides whether to return additional debugging data
    :return: an error variable dependent on the chosen method and debugging info or None depending on mode.
    """

    # sometimes the category is given as as

    category_converter = {
        "00": 1,
        "01": 2,
        "02": 2,
        "03": 3,
        "04": 3,
        "05": 4,
        "06": 5,
        "07": 6,
        "08": 6,
        "09": 6,
        "10": 6,
    }
    if category in list(category_converter.keys()):
        category = category_converter[category]

        stored_parameters = {1: 50, 2: 50, 3: 50, 4: 50, 5: 50, 6: 25}

    else:
        category = int(category)

        stored_parameters = {1: 35, 2: 25, 3: 50, 4: 75, 5: 75, 6: 100}

    if path:
        # load in image and template
        sample_image = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
    else:
        sample_image = np.array(input_path)

    template_image = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)

    if method == "pixel_count":
        # important for this method is that the oprder of subtracting does matter.
        # because a 2 will have holes in the same spots as a 5 and will register a false positive

        thresh = stored_parameters[category]

        diff = template_image - sample_image
        errors = (diff > thresh).sum()

        if debug_mode:
            return errors, sample_image, template_image

        return errors, None, None

    elif method == "MSE":
        # iThe next method uses MSE to calculate the distances between to images

        mse = np.square(np.subtract(template_image, sample_image)).mean()

        if debug_mode:
            return mse, sample_image, template_image

        return mse, None, None


In [56]:
def numpy_model(image, path=True, thresholds=None):
    """
    predicts whether a dice is an anomaly or not

    :image: str path to image file
    :thresholds: dict containg custom thresholds foe each category, load it in from thresholds.pickle
    :return: return a predictions overall, and a prediction list as to what class it may belong
    """

    all_templates = ["1.png", "2.png", "3.png", "4.png", "5.png", "6.png"]

    predicted = 0
    predictions = []

    if not thresholds:
        thresholds = {
            1: 62.02780473883087,
            2: 60.430025740950214,
            3: 66.30725609998866,
            4: 69.93280870737878,
            5: 74.30941474250847,
            6: 82.75002536741725,
        }

    for idx, template in enumerate(all_templates):
        # this should link to a folder containg the templates.
        template_path = os.path.join(TEMPLATE_FOLDER, template)

        errors, _, _ = match_template(
            image, template_path, path=path, category=1, method="MSE"
        )
        thresh = thresholds[idx + 1]

        print(errors, thresh)

        if errors > thresh:
            predictions.append(1)
        else:
            predictions.append(0)

    if sum(predictions) >= 6:
        predicted = 1

    return predicted, predictions


In [57]:
new_data = os.path.join(PROCESSED_DATA, "train_set")
folder = os.path.join(new_data, "1")
for x in os.listdir(folder)[:10]:
    path = os.path.join(folder, x)
    print(numpy_model(path))

62.1368408203125
35.5228271484375 62.02780473883087
81.18719482421875
52.44366455078125 60.430025740950214
89.55743408203125
50.23321533203125 66.30725609998866
88.81103515625
56.977294921875 69.93280870737878
95.5618896484375
54.762451171875 74.30941474250847
94.6248779296875
62.4407958984375 82.75002536741725
(0, [0, 0, 0, 0, 0, 0])
78.36871337890625
48.64520263671875 62.02780473883087
82.2784423828125
58.8927001953125 60.430025740950214
87.320556640625
58.71630859375 66.30725609998866
91.93353271484375
61.30950927734375 69.93280870737878
96.21563720703125
61.97613525390625 74.30941474250847
89.57550048828125
65.86077880859375 82.75002536741725
(0, [0, 0, 0, 0, 0, 0])
27.27667236328125
36.89691162109375 62.02780473883087
59.2489013671875
45.8560791015625 60.430025740950214
55.837890625
37.24365234375 66.30725609998866
80.65399169921875
54.30267333984375 69.93280870737878
81.60797119140625
49.84185791015625 74.30941474250847
93.17095947265625
53.51910400390625 82.75002536741725
(0, [0

In [24]:
def gather_samples(processed_data=True):
    """
    gather all files from all folders

    :type: str representing which files we want
    :return: list containg a list for each sample with the path and the category
    """

    if processed_data:
        data_folder = PROCESSED_DATA
        templates = [1,2,3,4,5,6]
    else:
        data_folder = DATA_FOLDER
        template_folder = os.path.join(DATA_FOLDER, "train_set")
        templates = os.listdir(template_folder)

    all_files = []

    for template in templates:

        # get the folder name, all filenames inside it, and make a list of all the image files inside
        train_folder = os.path.join(data_folder, 'train_set')
        folder = os.path.join(train_folder, str(template))
        filenames = os.listdir(folder)
        files = [file for file in filenames if ".png" in file]

        for file in files:
            random_file_path = os.path.join(folder,file)

            all_files.append([random_file_path, template])

    return all_files


In [19]:
all_files = gather_samples()

df = pd.DataFrame(all_files)
df = df.set_axis(['path', 'category'], axis=1)

In [20]:
# code below calculates the errors for each of the categories.

def train_model(processed_data = True, matching_method = "pixel_count"):
    """
    A very basic model where each sample n has x features that represent the error per category
    calculated by the match template - pixel count method.

    :returns: a list containing a list for each sample containing x errors, 1 for each category. 
    """

    if processed_data:
        template_folder = TEMPLATE_FOLDER
        all_templates = os.listdir(TEMPLATE_FOLDER)
    else:
        template_folder = os.path.join(DATA_FOLDER, "templates")
        all_templates = os.listdir(template_folder)
        all_templates.remove('ano.png')
    
    all_errors = []
    # for each sample
    for idx, row in df.iterrows():

        sample_path = row['path']
        errors_per_row = []

        for idx, template in enumerate(all_templates):
            category = template
            category = category.replace(".png","")
            template_file = template
            correct_template_path = os.path.join(template_folder, template_file)
            errors, _, _ = match_template(sample_path, correct_template_path, category, method = matching_method)
            errors_per_row.append(errors)
        all_errors.append(errors_per_row)

    return all_errors


In [21]:
# trying with adaptive threshold per category

In [None]:
# gather processed data
all_files = gather_samples(processed_data=True)

df = pd.DataFrame(all_files)
df = df.set_axis(['path', 'category'], axis=1)

# gather all anomaly files 
anom_files = []

train_folder = os.path.join(PROCESSED_DATA, 'train_set')
folder = os.path.join(train_folder, "ano")
filenames = os.listdir(folder)
files = [file for file in filenames if ".png" in file]

for file in files:
    random_file_path = os.path.join(folder,file)

    anom_files.append([random_file_path,None])

# gather some normal files 
normal_files = []

train_folder = os.path.join(PROCESSED_DATA, 'train_set')
for cat in [1,2,3,4,5,6]:
    folder = os.path.join(train_folder, str(cat))
    filenames = os.listdir(folder)
    files = [file for file in filenames if ".png" in file]
    files = random.sample(files, 10)

    for file in files:
        random_file_path = os.path.join(folder,file)

        normal_files.append([random_file_path,None])

all_errors = train_model(matching_method="MSE")

results = pd.DataFrame(all_errors)
results = results.transpose().reset_index(drop=True).transpose()
results = results.set_axis([1,2,3,4,5,6], axis=1)

df = pd.concat([df,results], axis=1)
df.to_csv('results.csv')

df.head()

In [25]:
def model(image_path, processed_data = True, thresholds = [50 for x in range(11)]):
    """
    
    """
    if processed_data:
        template_folder = os.path.join(PROCESSED_DATA, "templates")
        all_templates = os.listdir(template_folder)
    else:
        template_folder = os.path.join(DATA_FOLDER, "templates")
        all_templates = os.listdir(template_folder)        

    predicted = 0
    predictions = []

    for idx, template in enumerate(all_templates):
        correct_template_path = os.path.join(TEMPLATE_FOLDER, template)
        errors, _, _ = match_template(image_path, correct_template_path, category=idx+1, method="MSE")
        thresh = thresholds[idx+1]

        if errors > thresh:
            predictions.append(1)
            predicted = 1
        else:
            predictions.append(0)

    return predicted, predictions

In [26]:
coeff = 0.75

thresholds = {}

for x in range(6):
    col = x+1

    correct = df.loc[df.category == col]
    false = df.loc[df.category != col]

    correct_mean = correct[col].mean()
    false_mean = false[col].mean()

    thresholds[col] = (correct_mean + false_mean)*coeff

template_folder = os.path.join(PROCESSED_DATA, "templates")
all_templates = os.listdir(template_folder)

y_pred = []

y_true = []

for anom in anom_files:
    prediction = 0
    for idx, template in enumerate(all_templates):
        category = idx + 1
        template_file = template
        correct_template_path = os.path.join(TEMPLATE_FOLDER, template_file)
        pred, _ = model(anom[0], correct_template_path, thresholds)
        y_pred.append(pred)
        y_true.append(1)

for norm in normal_files:
    prediction = 0
    for idx, template in enumerate(all_templates):
        category = idx + 1
        template_file = template
        correct_template_path = os.path.join(TEMPLATE_FOLDER, template_file)
        pred, _ = model(norm[0], correct_template_path, thresholds)
        y_pred.append(pred)
        y_true.append(0)

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import rand_score
print(coeff, 'f1 : ', f1_score(y_true, y_pred, average='macro'),"acc : ",accuracy_score(y_true, y_pred),"roc : ", roc_auc_score(y_true, y_pred),"rand : ", rand_score(y_true, y_pred))

0.75 f1 :  0.8706031085000372 acc :  0.8706896551724138 roc :  0.8726190476190476 rand :  0.7744976432646986


In [27]:
import pickle

with open('thresholds.pickle', 'wb') as handle:
    pickle.dump(thresholds, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [28]:
thresholds

{1: 62.02780473883087,
 2: 60.430025740950214,
 3: 66.30725609998866,
 4: 69.93280870737878,
 5: 74.30941474250847,
 6: 82.75002536741725}

In [None]:
template_folder = os.path.join(PROCESSED_DATA, "templates")
os.listdir(template_folder)

['1.png', '2.png', '3.png', '4.png', '5.png', '6.png']

In [40]:
def numpy_model(image, path=True, thresholds=None):
    """
    predicts whether a dice is an anomaly or not

    :image: str path to image file
    :thresholds: dict containg custom thresholds foe each category, load it in from thresholds.pickle
    :return: return a predictions overall, and a prediction list as to what class it may belong
    """

    all_templates = ["1.png", "2.png", "3.png", "4.png", "5.png", "6.png"]

    predicted = 0
    predictions = []

    if not thresholds:
        thresholds = {
            1: 62.02780473883087,
            2: 60.430025740950214,
            3: 66.30725609998866,
            4: 69.93280870737878,
            5: 74.30941474250847,
            6: 82.75002536741725,
        }

    for idx, template in enumerate(all_templates):
        # this should link to a folder containg the templates.
        template_path = os.path.join(TEMPLATE_FOLDER, template)

        errors, _, _ = match_template(
            image, template_path, path=path, category=1, method="MSE"
        )
        thresh = thresholds[idx + 1]

        print(errors, thresh)

        if errors > thresh:
            predictions.append(1)
        else:
            predictions.append(0)

    if sum(predictions) >= 6:
        predicted = 1

    return predicted, predictions


In [42]:
new_data = os.path.join(PROCESSED_DATA, "train_set")
folder = os.path.join(new_data, "1")
for x in os.listdir(folder)[:10]:
    path = os.path.join(folder, x)
    print(numpy_model(path))

mse
35.5228271484375 62.02780473883087
mse
52.44366455078125 60.430025740950214
mse
50.23321533203125 66.30725609998866
mse
56.977294921875 69.93280870737878
mse
54.762451171875 74.30941474250847
mse
62.4407958984375 82.75002536741725
(0, [0, 0, 0, 0, 0, 0])
mse
48.64520263671875 62.02780473883087
mse
58.8927001953125 60.430025740950214
mse
58.71630859375 66.30725609998866
mse
61.30950927734375 69.93280870737878
mse
61.97613525390625 74.30941474250847
mse
65.86077880859375 82.75002536741725
(0, [0, 0, 0, 0, 0, 0])
mse
36.89691162109375 62.02780473883087
mse
45.8560791015625 60.430025740950214
mse
37.24365234375 66.30725609998866
mse
54.30267333984375 69.93280870737878
mse
49.84185791015625 74.30941474250847
mse
53.51910400390625 82.75002536741725
(0, [0, 0, 0, 0, 0, 0])
mse
44.870361328125 62.02780473883087
mse
48.71258544921875 60.430025740950214
mse
47.27459716796875 66.30725609998866
mse
54.0482177734375 69.93280870737878
mse
53.3214111328125 74.30941474250847
mse
53.9534912109375 8

In [16]:


new_data = os.path.join(PROCESSED_DATA, ('alber test'))
proc_data = os.path.join(new_data, ('proc'))
for x in os.listdir(proc_data):
    path = os.path.join(proc_data, x)
    print(numpy_model(path))

(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(0, [1, 1, 1, 1, 1, 0])
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(0, [1, 1, 1, 1, 1, 0])
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(0, [1, 1, 1, 1, 1, 0])
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(128, 128)
(0, [1, 1, 1, 1, 1, 0])


In [62]:
y_pred = []
y_true = []

for x in normal_files:
    res = numpy_model(x[0])
    y_pred.append(res[0])
    y_true.append(0)

for x in anom_files:
    res = numpy_model(x[0])
    y_pred.append(res[0])
    y_true.append(1)

In [63]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import rand_score
print(coeff, 'f1 : ', f1_score(y_true, y_pred, average='macro'),"acc : ",accuracy_score(y_true, y_pred),"roc : ", roc_auc_score(y_true, y_pred),"rand : ", rand_score(y_true, y_pred))

0.75 f1 :  0.9051089462333606 acc :  0.9051724137931034 roc :  0.9053571428571427 rand :  0.8268365817091454


In [None]:
# trying with mean squared error

In [256]:
# gather processed data
all_files = gather_samples(processed_data=True)

df = pd.DataFrame(all_files)
df = df.set_axis(['path', 'category'], axis=1)

In [257]:
# gather all anomaly files 
anom_files = []

train_folder = os.path.join(PROCESSED_DATA, 'train_set')
folder = os.path.join(train_folder, "ano")
filenames = os.listdir(folder)
files = [file for file in filenames if ".png" in file]

for file in files:
    random_file_path = os.path.join(folder,file)

    anom_files.append([random_file_path,None])

In [258]:
# gather all anomaly files 
normal_files = []

train_folder = os.path.join(PROCESSED_DATA, 'train_set')
for cat in [1,2,3,4,5,6]:
    folder = os.path.join(train_folder, str(cat))
    filenames = os.listdir(folder)
    files = [file for file in filenames if ".png" in file]
    files = random.sample(files, 10)

    for file in files:
        random_file_path = os.path.join(folder,file)

        normal_files.append([random_file_path,None])

In [259]:
all_errors = train_model(matching_method="MSE")

results = pd.DataFrame(all_errors)
results = results.transpose().reset_index(drop=True).transpose()
results = results.set_axis([1,2,3,4,5,6], axis=1)

df = pd.concat([df,results], axis=1)
df.to_csv('results.csv')

df.head()

Unnamed: 0,path,category,1,2,3,4,5,6
0,..\..\processed_data\train_set\1\16_09_21_00_0...,1,35.522827,52.443665,50.233215,56.977295,54.762451,62.440796
1,..\..\processed_data\train_set\1\16_09_21_00_0...,1,48.645203,58.8927,58.716309,61.309509,61.976135,65.860779
2,..\..\processed_data\train_set\1\16_09_21_00_0...,1,36.896912,45.856079,37.243652,54.302673,49.841858,53.519104
3,..\..\processed_data\train_set\1\16_09_21_00_0...,1,44.870361,48.712585,47.274597,54.048218,53.321411,53.953491
4,..\..\processed_data\train_set\1\16_09_21_00_0...,1,40.584656,54.987915,52.628052,57.314392,54.350647,63.11322


In [260]:
thresholds = {}

for x in range(6):
    col = x+1

    correct = df.loc[df.category == col]
    false = df.loc[df.category != col]
    
    print(correct[col].mean(), false[col].mean())

    thresholds[col] = (correct[col].mean() + false[col].mean())/2

29.618622778552197 53.085116873222304
31.468391180038452 49.10497647456184
38.14536787823933 50.26430692174555
39.7240070785064 53.5197378646653
44.042650235409766 55.036569421268204
50.06285226605105 60.27051489050528


In [265]:
average = 0
for key, val in thresholds.items():
    average += val

average = average
average = average / 6
average

46.195259488563806

In [270]:
template_folder = os.path.join(PROCESSED_DATA, "templates")
all_templates = os.listdir(template_folder)

coeff = 0.7

for x in range(20):

    coeff += 0.01

    thresholds = {}

    for x in range(6):
        col = x+1

        correct = df.loc[df.category == col]
        false = df.loc[df.category != col]

        thresholds[col] = (correct[col].mean() + false[col].mean())*coeff

    y_pred = []

    y_true = []

    for anom in anom_files:
        prediction = 0
        for idx, template in enumerate(all_templates):
            category = idx + 1
            template_file = template
            correct_template_path = os.path.join(TEMPLATE_FOLDER, template_file)
            pred, _ = model(anom[0], correct_template_path, thresholds)
            y_pred.append(pred)
            y_true.append(1)



    for anom in normal_files:
        prediction = 0
        for idx, template in enumerate(all_templates):
            category = idx + 1
            template_file = template
            correct_template_path = os.path.join(TEMPLATE_FOLDER, template_file)
            pred, _ = model(anom[0], correct_template_path, thresholds)
            y_pred.append(pred)
            y_true.append(0)

    from sklearn.metrics import f1_score
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import rand_score
    print(coeff, 'f1 : ', f1_score(y_true, y_pred, average='macro'),"acc : ",accuracy_score(y_true, y_pred),"roc : ", roc_auc_score(y_true, y_pred),"rand : ", rand_score(y_true, y_pred))

0.71 f1 :  0.8619047619047618 acc :  0.8620689655172413 roc :  0.8642857142857143 rand :  0.7618456958571074
0.72 f1 :  0.896551724137931 acc :  0.896551724137931 roc :  0.8976190476190476 rand :  0.8142396427685438
0.73 f1 :  0.9137674695212608 acc :  0.9137931034482759 roc :  0.9142857142857144 rand :  0.8422227735053337
0.74 f1 :  0.9137674695212608 acc :  0.9137931034482759 roc :  0.9142857142857144 rand :  0.8422227735053337
0.75 f1 :  0.9223618651000223 acc :  0.9224137931034483 roc :  0.9226190476190476 rand :  0.8566608781939965
0.76 f1 :  0.9222693768148315 acc :  0.9224137931034483 roc :  0.9220238095238095 rand :  0.8566608781939965
0.77 f1 :  0.9048258372491982 acc :  0.9051724137931034 roc :  0.9041666666666667 rand :  0.8280823616968495
0.78 f1 :  0.8960573476702509 acc :  0.896551724137931 roc :  0.8952380952380953 rand :  0.8142396427685438
0.79 f1 :  0.8695163104611923 acc :  0.8706896551724138 roc :  0.868452380952381 rand :  0.7744976432646986
0.8 f1 :  0.85158425528

In [None]:
all_errors = train_model()

results = pd.DataFrame(all_errors)
results = results.transpose().reset_index(drop=True).transpose()
results = results.set_axis([1,2,3,4,5,6], axis=1)

df = pd.concat([df,results], axis=1)
df.to_csv('results.csv')

df.head()

In [263]:
thresholds = {}

for x in range(6):
    col = x+1

    correct = df.loc[df.category == col]
    false = df.loc[df.category != col]
    
    print(correct[col].mean(), false[col].mean())

    thresholds[col] = (correct[col].mean() + false[col].mean())/2

5077.420624151968 5529.879499485773
5736.462890625 6387.752478817379
5769.956910569105 6368.983710915559
5832.717811158798 6556.790920375953
5771.823399558499 6686.0946160635485
5733.020665901263 7280.811141022986


In [264]:
thresholds

{1: 5303.65006181887,
 2: 6062.10768472119,
 3: 6069.470310742332,
 4: 6194.754365767376,
 5: 6228.959007811023,
 6: 6506.915903462124}

In [322]:
average = 0
for key, val in thresholds.items():
    average += val

average = average - 500
average = average / 6

In [323]:
# gather all anomaly files 
anom_files = []

train_folder = os.path.join(PROCESSED_DATA, 'train_set')
folder = os.path.join(train_folder, "ano")
filenames = os.listdir(folder)
files = [file for file in filenames if ".png" in file]

for file in files:
    random_file_path = os.path.join(folder,file)

    anom_files.append([random_file_path, template])



In [324]:
predictions = []
total_detection = 0

for anom in anom_files:
    prediction = 0
    for idx, template in enumerate(all_templates):
        category = idx + 1
        template_file = template
        correct_template_path = os.path.join(TEMPLATE_FOLDER, template_file)
        errors, _, _ = match_template(anom[0], correct_template_path, category)
        if errors > average:
            prediction = 1
    predictions.append(prediction)

correct_predictions = [1 for x in predictions]

from sklearn.metrics import f1_score
f1_score(correct_predictions, predictions, average='macro')

In [325]:
correct_predictions = [1 for x in predictions]

In [326]:
from sklearn.metrics import f1_score
f1_score(correct_predictions, predictions, average='macro')

0.44

In [445]:
# now try with the old images
all_files = gather_samples(processed_data=False)

df = pd.DataFrame(all_files)
df = df.set_axis(['path', 'category'], axis=1)

In [446]:
all_errors = train_model(processed_data=False)

In [447]:
results = pd.DataFrame(all_errors)
results = results.transpose().reset_index(drop=True).transpose()
results = results.set_axis([1,2,3,4,5,6,7,8,9,10,11], axis=1)

df = pd.concat([df,results], axis=1)
df.to_csv('results.csv')
df = df[df.category != "ano"]

df.head()

Unnamed: 0,path,category,1,2,3,4,5,6,7,8,9,10,11
0,..\..\data\train_set\00\16_09_21_00_000.png,0,5212,6575,6435,7241,6962,7876,8134,10128,9965,10064,10176
1,..\..\data\train_set\00\16_09_21_00_001.png,0,7718,7810,8564,8488,9142,9239,9321,12242,12369,11866,12491
2,..\..\data\train_set\00\16_09_21_00_002.png,0,2547,5038,4951,5853,4696,7310,7227,8043,8479,8138,8663
3,..\..\data\train_set\00\16_09_21_00_003.png,0,2329,4782,4542,5059,4336,6893,6905,7885,8254,8400,8276
4,..\..\data\train_set\00\16_09_21_00_004.png,0,3795,5476,5726,6524,5837,7495,7561,9945,9539,9579,9929


In [448]:
df['category'] = pd.to_numeric(df.category)

In [468]:
thresholds = {}

for x in range(10):
    col = x+1

    correct = df.loc[df.category == col]
    false = df.loc[df.category != col]
    
    print(correct[col].mean(), false[col].mean())

    thresholds[col] = (correct[col].mean() + false[col].mean())/2

8503.953703703704 8180.433758912286
9335.028925619834 9155.987021521274
9064.86303630363 9183.330427493713
9822.004807692309 9802.97948545485
9311.492489270386 9663.397942897676
8987.030905077263 10396.141394527802
10920.125907990314 10235.96346216304
10825.793969849246 12040.647011177709
10877.939334637966 11959.446864686468
10932.016666666666 11982.497317509347


In [477]:
thresholds

{1: 8342.193731307994,
 2: 9245.507973570555,
 3: 9124.096731898671,
 4: 9812.492146573579,
 5: 9487.44521608403,
 6: 9691.586149802533,
 7: 10578.044685076677,
 8: 11433.220490513479,
 9: 11418.693099662218,
 10: 11457.256992088007}

In [473]:
average = 0
for key, val in thresholds.items():
    average += val

average = average - 500
average = average / 10

In [474]:
average

10009.053721657776

In [475]:
# gather all anomaly files 
anom_files = []

train_folder = os.path.join(DATA_FOLDER, 'train_set')
folder = os.path.join(train_folder, "ano")
filenames = os.listdir(folder)
files = [file for file in filenames if ".png" in file]

for file in files:
    random_file_path = os.path.join(folder,file)

    anom_files.append([random_file_path, template])


In [476]:
template_folder = os.path.join(DATA_FOLDER, "templates")
all_templates = os.listdir(template_folder)
all_templates.remove('ano.png')

predictions = []
total_detection = 0

for anom in anom_files:
    prediction = 0
    for idx, template in enumerate(all_templates):
        category = template
        category = category.replace('.png', '')
        template_file = template
        correct_template_path = os.path.join(template_folder, template_file)
        errors, _, _ = match_template(anom[0], correct_template_path, category)
        if errors > average:
            prediction = 1
    predictions.append(prediction)

correct_predictions = [1 for x in predictions]

from sklearn.metrics import f1_score
f1_score(correct_predictions, predictions, average='macro')

0.4042553191489362