# Data Metrics
### Imports

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
import matplotlib
import tqdm.auto as tqdm

import os
from PIL import Image
from sklearn.metrics import accuracy_score
import torchvision
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_curve, auc

import torchvision.models as models

plt.style.use('seaborn')

import DiagnosisFunctions.tools as tools

import albumentations as A
import torchvision.transforms.functional as TF

### Read files

In [2]:
ground_truths = pd.read_csv('GT.csv')
predictions = pd.read_csv('pred.csv')

## Classify predictions

In [10]:
pred_class = tools.classify_probability_predictions(predictions)
pred_class

Unnamed: 0,scale,plaque,pustule,patch,papule,dermatoglyph_disruption,open_comedo,diagnosis_acne,diagnosis_actinic_keratosis,diagnosis_psoriasis,diagnosis_seborrheic_dermatitis,diagnosis_viral_warts,diagnosis_vitiligo,area_acral_distribution,area_exposed_areas,area_extensor_sites,area_seborrheic_region
0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1
1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0
2,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0
3,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1
4,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
260,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
261,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
262,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0


## Measure scores

* By only supplying targets and predictions, f1 scores are calculated for diagnosis_variables, area_variables and characteristics_variables. These are returned as single values
* If 'type' is supplied (f1:default/recall/precission), said method will be used for measurements
* If 'average' is supplied (samples:default/micro/macro), said average filter will be applied to measurement method
* If 'single_char_scores' is set to True, individual characteristic scores will be calculated with default=f1 or supplied score type
* If 'single_acc' is set to True, accuracies will be calculated for all variables

In [4]:
print('Metric Scores overall')
print(tools.compute_metrics_scores(ground_truths, pred_class))
print('Metric Scores with single characteristics')
print(tools.compute_metrics_scores(ground_truths, pred_class, single_char_scores=True))
print('Metric Scores with single characteristics and accuracy')
print(tools.compute_metrics_scores(ground_truths, pred_class, single_char_scores=True, single_acc=True))

Metric Scores overall
(0.557783189033189, 0.1893939393939394, 0.38257575757575757)
Metric Scores with single characteristics
({'total': 0.557783189033189, 'singles': {'scale': 0.49255952380952384, 'plaque': 0.4270702455739577, 'pustule': 0.4648945007476325, 'patch': 0.5596963423050381, 'papule': 0.5214035087719298, 'dermatoglyph_disruption': 0.47514910536779326, 'open_comedo': 0.463841059602649}}, 0.1893939393939394, 0.38257575757575757)
Metric Scores with single characteristics and accuracy
({'total': 0.557783189033189, 'singles': {'scale': {'score': 0.49255952380952384, 'accuracy': 0.5303030303030303}, 'plaque': {'score': 0.4270702455739577, 'accuracy': 0.6401515151515151}, 'pustule': {'score': 0.4648945007476325, 'accuracy': 0.7689393939393939}, 'patch': {'score': 0.5596963423050381, 'accuracy': 0.5606060606060606}, 'papule': {'score': 0.5214035087719298, 'accuracy': 0.5303030303030303}, 'dermatoglyph_disruption': {'score': 0.47514910536779326, 'accuracy': 0.9053030303030303}, 'open

In [11]:
print('Metric Scores with single characteristics and accuracy')
print(tools.compute_metrics_scores(ground_truths, pred_class))

Metric Scores with single characteristics and accuracy
(0.5964646464646466, 0.1893939393939394, 0.38257575757575757)


## Test of variable f1 scores

In [9]:
diagnosis_variables, area_variables, characteristics_variables = tools.getVariableGroups()

thresholds = np.arange(0, 1, 0.01)
def predict(y_est_prob, threshold):
	return (y_est_prob >= threshold).astype('int')
    
# Basic characteristics
basic_chars_thresholds = []
basic_chars_scores = []
for char in characteristics_variables:
    scores = [f1_score(ground_truths[char], predict(predictions[char], t)) for t in thresholds]
    i_opt = np.argmax(scores)
    basic_chars_thresholds.append(thresholds[i_opt])
    basic_chars_scores.append(scores[i_opt])
thresholds_scores_opt = pd.DataFrame(
    [basic_chars_thresholds, basic_chars_scores],
    index=['threshold','score'],
    columns=characteristics_variables)

thresholds_final = thresholds_scores_opt.loc['threshold',:]
thresholds_scores_opt

Unnamed: 0,scale,plaque,pustule,patch,papule,dermatoglyph_disruption,open_comedo
threshold,0.0,0.0,0.0,0.0,0.0,0.0,0.0
score,0.727711,0.818792,0.339623,0.649616,0.693069,0.17301,0.37037


In [6]:
# General issue with f1
gt1 = ground_truths.iloc[:,0]
pred1 = predictions.iloc[:,0]
idx = np.argmax(scores)
print('Best Threshold=%f, F-Score=%.3f' % (thresholds[idx], scores[idx]))
for i in range(0,len(scores)):
    print(f'Threshold={thresholds[i]:.2f}, f1-Score={scores[i]:.3f}')

NameError: name 'scores' is not defined