In [1]:
import os
from shutil import rmtree

import numpy as np
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm

import sys
sys.path.insert(0, '../input/sefa-utils')
import utils

import random
import shutil

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor

from tqdm.auto import tqdm

from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn import metrics

In [2]:
!mkdir -p './aug-data/19-01 synth'
!cp '../input/sefa-shaver-aug/sefa_augmented/19-01 synth/'* './aug-data/19-01 synth'

In [3]:
models = [
    MLPClassifier(random_state=744, max_iter=10000)
]

embeddings = [utils.get_resnet50_embeddings, utils.get_vgg16_embeddings, utils.get_inception_embeddings]

seeds = range(0, 5)

In [4]:
def predict_image(class_label, show = True, in_set = None):
    path = os.path.join('../input/custom-defects-4cats/shaver-shell-novel', class_label)
    if not os.path.exists(path):
        path = os.path.join('../input/sefa-shaver-aug/sefa_augmented', class_label)
    class_files = os.listdir(path)
    
    image_file = random.choice(class_files)
    if in_set:
        while not(image_file in in_set):
            image_file = random.choice(class_files)
        
    print('Image file: ' + path + "/" + image_file) if show else True

    try:
        test_img = Image.open(path + "/" + image_file)
    except:
        return
    
    
    test_img = Image.open(path + "/" + image_file)
    test_img = test_img.resize((256, 256), Image.Resampling.LANCZOS)
    
    plt.imshow(test_img, cmap='gray') if show else True
    print("True label: ", class_label) if show else True

    os.makedirs("./tmp/" + class_label, exist_ok = True)
    shutil.copy(path + "/" + image_file, "./tmp/" + class_label + "/")
    single_image_ds = utils.DefectDataset('./tmp/',
                     normalize_transform=utils.normalize)
    pre_processed_image = get_embeddings(single_image_ds, tqdm_suppress = True)
    pred = model.predict(pre_processed_image)

    print("Raw Prediction: ", pred) if show else True
    
    shutil.rmtree("./tmp")
    
    return pred

def calc_statistics(class_label, samples = 50, good = False, in_set = None):
    preds = []
    
    for i in range(samples):
        pred = predict_image(class_label, show = False, in_set = in_set)
        preds.append(pred)
    
    coeff = 1 if good else -1
    acc = accuracy_score(coeff*np.ones_like(preds), preds)
    
    return acc

In [5]:
def load_data():
    synth = utils.DefectDataset('../input/custom-defects-4cats/shaver-shell-novel/',
                      normalize_transform=utils.normalize)
    Xs = get_embeddings(synth)
    ys = np.array(synth.targets)
    ys[:] = -1
    
    
    aug = utils.DefectDataset('./aug-data',
                     normalize_transform=utils.normalize)
    Xaug = get_embeddings(aug)
    yaug = np.array(aug.targets)
    yaug[:] = -1
    
    
    shavers = utils.DefectDataset('../input/shaver-shell-full-all-classes-v2/shaver-shell-full/',
                        normalize_transform=utils.normalize)
    
    
    Xr = get_embeddings(shavers)
    yr = np.array(shavers.targets)
    yr[yr!=1] = -1
    
    
    NOVEL_DEFECT_NO = 250
    sample_inds = np.random.choice(range(ys.shape[0]), NOVEL_DEFECT_NO)
    Xs = Xs[sample_inds]
    ys = ys[sample_inds]

    return Xr, yr, Xaug, yaug, Xs, ys, shavers

def split_data(Xr, yr, Xaug, yaug, Xs, ys, shavers, RANDOM_SEED):
    X_train, X_test, y_train, y_test = train_test_split(Xr, yr, test_size=0.3, random_state=RANDOM_SEED)

    X_train = np.vstack([X_train, Xaug])
    y_train = np.hstack([y_train, yaug])
    
    X_test = np.vstack([X_test, Xs])
    y_test = np.hstack([y_test, ys])
    
    shavers_files = [i[0].split("/")[-1] for i in shavers.samples]
    files_train, files_test = train_test_split(shavers_files, test_size=0.3, random_state=RANDOM_SEED)
    
    return X_train, y_train, X_test, y_test, files_train, files_test

In [6]:
def run_experiment(model, Xr, yr, Xaug, yaug, Xs, ys, shavers, RANDOM_SEED):
    X_train, y_train, X_test, y_test, files_train, files_test = split_data(Xr, yr, Xaug, yaug, Xs, ys, shavers, RANDOM_SEED)
    
    model.fit(X_train, y_train)
    
    yp = model.predict(X_test)
    f1 = f1_score(y_test, yp)

    auroc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1], multi_class='ovr', average='weighted')
    
    r1 = calc_statistics("19-01 goed", good = True, in_set = files_test)
    r2 = calc_statistics("19-01 onderbroken", in_set = files_test)
    r3 = calc_statistics("19-01 dubbeldruk", in_set = files_test)
    r4 = calc_statistics("lines")
    r5 = calc_statistics("missing_letter")
    r6 = calc_statistics("color_stains")
    r7 = calc_statistics("vert_flip")
    r8 = calc_statistics("horz_flip")

    closed_r = [r2, r3]
    open_r = [r4, r5, r6, r7, r8]
    
#     print('CLOSED: ', closed_r)
#     print('OPEN: ', open_r)
#     print('AUROC: ', auroc)
    
    return {
        'AUROC': auroc,
        'F1': f1,
        'R_CLOSED': np.mean(closed_r), 
        'R_OPEN': np.mean(open_r)
    }

In [7]:
def key_mean(l_dic, key, n):
    return sum(d[key] for d in l_dic) / n

In [8]:
results = {}

for get_embeddings in embeddings:
    results[get_embeddings.__name__] = {}
    
    Xr, yr, Xaug, yaug, Xs, ys, shavers = load_data()
    for model in models:
        individual_results = []
        for GLOBAL_SEED in seeds:
            r = run_experiment(model, Xr, yr, Xaug, yaug, Xs, ys, shavers, GLOBAL_SEED)
            individual_results.append(r)
        results[get_embeddings.__name__][ model.__class__.__name__] = {
            'AUROC': key_mean(individual_results, 'AUROC', len(seeds)),
            'F1': key_mean(individual_results, 'F1', len(seeds)),
            'R_CLOSED': key_mean(individual_results, 'R_CLOSED', len(seeds)),
            'R_OPEN': key_mean(individual_results, 'R_OPEN', len(seeds)),
        }

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2432 [00:00<?, ?it/s]

  0%|          | 0/3518 [00:00<?, ?it/s]

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2432 [00:00<?, ?it/s]

  0%|          | 0/3518 [00:00<?, ?it/s]

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2432 [00:00<?, ?it/s]

  0%|          | 0/3518 [00:00<?, ?it/s]

In [9]:
print(results)

{'get_resnet50_embeddings': {'MLPClassifier': {'AUROC': 0.9928443107919533, 'F1': 0.9686260558140415, 'R_CLOSED': 0.9083333333333332, 'R_OPEN': 0.9813333333333333}}, 'get_vgg16_embeddings': {'MLPClassifier': {'AUROC': 0.9949237256247265, 'F1': 0.9806183989667469, 'R_CLOSED': 0.9149999999999999, 'R_OPEN': 0.9966666666666667}}, 'get_inception_embeddings': {'MLPClassifier': {'AUROC': 0.9962496732295142, 'F1': 0.9783244370521769, 'R_CLOSED': 0.94, 'R_OPEN': 0.9860000000000001}}}


In [10]:
import csv

def dict_to_csv(r):
    with open("test_output.csv", "w") as csv_file:
        csvwriter = csv.writer(csv_file)
        csvwriter.writerow(['', '', list(r.keys())[0], '', '', 
                            '', list(r.keys())[1], '', '',
                            '', list(r.keys())[2], '', ''])
        metrics = ['AUROC', 'F1', 'R_CLOSED', 'R_OPEN']
        csvwriter.writerow(['Model'] + metrics*3)

        models_col = list(r[list(r.keys())[0]].keys())
        table_cols_1 = []
        table_cols_2 = []
        table_cols_3 = []

        for model in models_col:
            table_cols_1.append(list(r[list(r.keys())[0]][model].values()))
            table_cols_2.append(list(r[list(r.keys())[1]][model].values()))
            table_cols_3.append(list(r[list(r.keys())[2]][model].values()))
        
        j = 0
        while j < len(models_col):
            csvwriter.writerow([models_col[j]] + table_cols_1[j] + table_cols_2[j] + table_cols_3[j])
            j+=1

In [12]:
dict_to_csv(results)