# Benchmark of various outlier detection models

### The models are evaluaed on ROC, Precision @ n and execution time on 17 benchmark datasets. All datasets are splitted 60% for training and 40% for testing.

**[PyOD](https://github.com/yzhao062/Pyod)** is a comprehensive **Python toolkit** to **identify outlying objects** in 
multivariate data with both unsupervised and supervised approaches.


  1. Linear Models for Outlier Detection:
     1. **PCA: Principal Component Analysis** use the sum of
       weighted projected distances to the eigenvector hyperplane 
       as the outlier outlier scores) [10]
     2. **MCD: Minimum Covariance Determinant** (use the mahalanobis distances 
       as the outlier scores) [11, 12]
     3. **One-Class Support Vector Machines** [3]
     
  2. Proximity-Based Outlier Detection Models:
     1. **LOF: Local Outlier Factor** [1]
     2. **CBLOF: Clustering-Based Local Outlier Factor** [15]
     3. **kNN: k Nearest Neighbors** (use the distance to the kth nearest 
     neighbor as the outlier score)
     4. **Average kNN** Outlier Detection (use the average distance to k 
     nearest neighbors as the outlier score)
     5. **Median kNN** Outlier Detection (use the median distance to k nearest 
     neighbors as the outlier score)
     6. **HBOS: Histogram-based Outlier Score** [5]
     
  3. Probabilistic Models for Outlier Detection:
     1. **ABOD: Angle-Based Outlier Detection** [7]
     2. **FastABOD: Fast Angle-Based Outlier Detection using approximation** [7]
  
  4. Outlier Ensembles and Combination Frameworks
     1. **Isolation Forest** [2]
     2. **Feature Bagging** [9]

In [1]:
from __future__ import division
from __future__ import print_function

import os
import sys
from time import time

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
# supress warnings for clean output
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [2]:
# Define data file and read X and y
mat_file_list = ['arrhythmia.mat',
                 'cardio.mat',
                 'glass.mat',
                 'ionosphere.mat',
                 'letter.mat',
                 'lympho.mat',
                 'mnist.mat',
                 'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'shuttle.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat']

# Define nine outlier detection tools to be compared
random_state = np.random.RandomState(42)

df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
              'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD',
              'OCSVM', 'PCA']
roc_df = pd.DataFrame(columns=df_columns)
prn_df = pd.DataFrame(columns=df_columns)
time_df = pd.DataFrame(columns=df_columns)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat(os.path.join('data', mat_file))

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
        contamination=outliers_fraction),
        'Cluster-based Local Outlier Factor': CBLOF(
            contamination=outliers_fraction, random_state=random_state),
        'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                          random_state=random_state),
        'Histogram-base Outlier Detection (HBOS)': HBOS(
            contamination=outliers_fraction),
        'Isolation Forest': IForest(contamination=outliers_fraction,
                                    random_state=random_state),
        'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
        'Local Outlier Factor (LOF)': LOF(
            contamination=outliers_fraction),
        'Minimum Covariance Determinant (MCD)': MCD(
            contamination=outliers_fraction),
        'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
        'Principal Component Analysis (PCA)': PCA(
            contamination=outliers_fraction),
    }

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
            clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7687, precision @ rank n:0.3571, execution time: 0.1935s
Cluster-based Local Outlier Factor ROC:0.778, precision @ rank n:0.5, execution time: 0.753s
Feature Bagging ROC:0.7736, precision @ rank n:0.5, execution time: 0.5565s
Histogram-base Outlier Detection (HBOS) ROC:0.8511, precision @ rank n:0.5714, execution time: 0.1925s
Isolation Forest ROC:0.8217, precision @ rank n:0.5, execution time: 0.2065s
K Nearest Neighbors (KNN) ROC:0.782, precision @ rank n:0.5, execution time: 0.0772s
Local Outlier Factor (LOF) ROC:0.7787, precision @ rank n:0.4643, execution time: 0.0702s




Minimum Covariance Determinant (MCD) ROC:0.8228, precision @ rank n:0.4286, execution time: 0.4933s
One-class SVM (OCSVM) ROC:0.7986, precision @ rank n:0.5, execution time: 0.0441s
Principal Component Analysis (PCA) ROC:0.7997, precision @ rank n:0.5, execution time: 0.0501s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6199, precision @ rank n:0.2603, execution time: 0.5134s
Cluster-based Local Outlier Factor ROC:0.95, precision @ rank n:0.6575, execution time: 0.7389s
Feature Bagging ROC:0.5718, precision @ rank n:0.2192, execution time: 0.729s
Histogram-base Outlier Detection (HBOS) ROC:0.8417, precision @ rank n:0.4932, execution time: 0.0481s
Isolation Forest ROC:0.9358, precision @ rank n:0.5342, execution time: 0.2216s
K Nearest Neighbors (KNN) ROC:0.7666, precision @ rank n:0.3562, execution time: 0.1273s
Local Outlier Factor (LOF) ROC:0.5744, precision @ rank n:0.2055, execution time: 0.0902s




Minimum Covariance Determinant (MCD) ROC:0.8198, precision @ rank n:0.4795, execution time: 0.4161s
One-class SVM (OCSVM) ROC:0.935, precision @ rank n:0.5342, execution time: 0.0702s
Principal Component Analysis (PCA) ROC:0.9449, precision @ rank n:0.6027, execution time: 0.004s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8099, precision @ rank n:0.2, execution time: 0.0672s
Cluster-based Local Outlier Factor ROC:0.7704, precision @ rank n:0.2, execution time: 0.752s
Feature Bagging ROC:0.7827, precision @ rank n:0.2, execution time: 0.0251s
Histogram-base Outlier Detection (HBOS) ROC:0.7012, precision @ rank n:0.0, execution time: 0.005s
Isolation Forest ROC:0.7086, precision @ rank n:0.2, execution time: 0.1334s
K Nearest Neighbors (KNN) ROC:0.8519, precision @ rank n:0.2, execution time: 0.006s
Local Outlier Factor (LOF) ROC:0.7704, precision @ rank n:0.0, execution time: 0.002s




Minimum Covariance Determinant (MCD) ROC:0.8099, precision @ rank n:0.0, execution time: 0.0351s
One-class SVM (OCSVM) ROC:0.5951, precision @ rank n:0.2, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.6568, precision @ rank n:0.2, execution time: 0.001s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9089, precision @ rank n:0.8148, execution time: 0.0943s
Cluster-based Local Outlier Factor ROC:0.8365, precision @ rank n:0.6852, execution time: 0.775s
Feature Bagging ROC:0.9083, precision @ rank n:0.7407, execution time: 0.0551s
Histogram-base Outlier Detection (HBOS) ROC:0.5324, precision @ rank n:0.3519, execution time: 0.0201s
Isolation Forest ROC:0.8457, precision @ rank n:0.6481, execution time: 0.1524s
K Nearest Neighbors (KNN) ROC:0.9304, precision @ rank n:0.8519, execution time: 0.014s
Local Outlier Factor (LOF) ROC:0.9089, precision @ rank n:0.7407, execution time: 0.006s
Minimum Covariance Determinant (MCD) ROC:0.9606, precision



Angle-based Outlier Detector (ABOD) ROC:0.8695, precision @ rank n:0.3953, execution time: 0.4803s
Cluster-based Local Outlier Factor ROC:0.4743, precision @ rank n:0.0465, execution time: 0.779s
Feature Bagging ROC:0.8371, precision @ rank n:0.2326, execution time: 0.6668s
Histogram-base Outlier Detection (HBOS) ROC:0.6166, precision @ rank n:0.1395, execution time: 0.0612s
Isolation Forest ROC:0.6154, precision @ rank n:0.093, execution time: 0.2276s
K Nearest Neighbors (KNN) ROC:0.8447, precision @ rank n:0.3023, execution time: 0.1224s
Local Outlier Factor (LOF) ROC:0.822, precision @ rank n:0.2791, execution time: 0.0882s
Minimum Covariance Determinant (MCD) ROC:0.7936, precision @ rank n:0.1395, execution time: 0.8422s
One-class SVM (OCSVM) ROC:0.6032, precision @ rank n:0.1163, execution time: 0.0852s
Principal Component Analysis (PCA) ROC:0.5339, precision @ rank n:0.0698, execution time: 0.004s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9153, prec



Minimum Covariance Determinant (MCD) ROC:0.9492, precision @ rank n:0.0, execution time: 0.0401s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.002s
Principal Component Analysis (PCA) ROC:0.9831, precision @ rank n:0.0, execution time: 0.001s

... Processing mnist.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7783, precision @ rank n:0.3614, execution time: 7.3676s
Cluster-based Local Outlier Factor ROC:0.8882, precision @ rank n:0.4246, execution time: 0.8161s
Feature Bagging ROC:0.735, precision @ rank n:0.3544, execution time: 46.2038s
Histogram-base Outlier Detection (HBOS) ROC:0.5717, precision @ rank n:0.1088, execution time: 1.1511s
Isolation Forest ROC:0.7995, precision @ rank n:0.2842, execution time: 1.6785s
K Nearest Neighbors (KNN) ROC:0.8503, precision @ rank n:0.4281, execution time: 7.6172s
Local Outlier Factor (LOF) ROC:0.7381, precision @ rank n:0.3544, execution time: 8.0045s




Minimum Covariance Determinant (MCD) ROC:0.8335, precision @ rank n:0.2526, execution time: 2.2795s
One-class SVM (OCSVM) ROC:0.8579, precision @ rank n:0.4035, execution time: 4.7961s
Principal Component Analysis (PCA) ROC:0.8596, precision @ rank n:0.4035, execution time: 0.1233s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.1574, precision @ rank n:0.1224, execution time: 2.7311s
Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.7992s
Feature Bagging ROC:0.6067, precision @ rank n:0.3878, execution time: 13.2708s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.6006s
Isolation Forest ROC:1.0, precision @ rank n:0.9796, execution time: 0.8081s
K Nearest Neighbors (KNN) ROC:0.8691, precision @ rank n:0.3673, execution time: 1.6925s
Local Outlier Factor (LOF) ROC:0.6182, precision @ rank n:0.3878, execution time: 1.5571s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n



Minimum Covariance Determinant (MCD) ROC:0.4005, precision @ rank n:0.0, execution time: 1.0097s
One-class SVM (OCSVM) ROC:0.5176, precision @ rank n:0.0, execution time: 1.3767s
Principal Component Analysis (PCA) ROC:0.5239, precision @ rank n:0.0, execution time: 0.0461s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6494, precision @ rank n:0.0784, execution time: 1.9542s
Cluster-based Local Outlier Factor ROC:0.9631, precision @ rank n:0.3725, execution time: 0.8121s
Feature Bagging ROC:0.427, precision @ rank n:0.098, execution time: 4.4919s
Histogram-base Outlier Detection (HBOS) ROC:0.9324, precision @ rank n:0.3333, execution time: 0.1264s
Isolation Forest ROC:0.945, precision @ rank n:0.3725, execution time: 0.5003s
K Nearest Neighbors (KNN) ROC:0.7279, precision @ rank n:0.098, execution time: 0.5214s
Local Outlier Factor (LOF) ROC:0.4257, precision @ rank n:0.098, execution time: 0.5334s
Minimum Covariance Determinant (MCD) ROC:0.8296, precision 



Angle-based Outlier Detector (ABOD) ROC:0.583, precision @ rank n:0.3942, execution time: 2.1738s
Cluster-based Local Outlier Factor ROC:0.7217, precision @ rank n:0.4512, execution time: 0.764s
Feature Bagging ROC:0.5649, precision @ rank n:0.4043, execution time: 6.8743s
Histogram-base Outlier Detection (HBOS) ROC:0.7591, precision @ rank n:0.5665, execution time: 0.2968s
Isolation Forest ROC:0.6735, precision @ rank n:0.5311, execution time: 0.6687s
K Nearest Neighbors (KNN) ROC:0.6767, precision @ rank n:0.493, execution time: 1.0558s
Local Outlier Factor (LOF) ROC:0.5618, precision @ rank n:0.3916, execution time: 1.0377s
Minimum Covariance Determinant (MCD) ROC:0.7978, precision @ rank n:0.6793, execution time: 1.9011s
One-class SVM (OCSVM) ROC:0.6654, precision @ rank n:0.526, execution time: 1.3436s
Principal Component Analysis (PCA) ROC:0.5984, precision @ rank n:0.4689, execution time: 0.021s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.859, pr



Angle-based Outlier Detector (ABOD) ROC:0.6037, precision @ rank n:0.1732, execution time: 18.5023s
Cluster-based Local Outlier Factor ROC:0.992, precision @ rank n:0.9549, execution time: 0.8713s
Feature Bagging ROC:0.474, precision @ rank n:0.0451, execution time: 69.5151s
Histogram-base Outlier Detection (HBOS) ROC:0.9829, precision @ rank n:0.9992, execution time: 0.5174s
Isolation Forest ROC:0.9979, precision @ rank n:0.9753, execution time: 3.0391s
K Nearest Neighbors (KNN) ROC:0.6404, precision @ rank n:0.2023, execution time: 8.0203s
Local Outlier Factor (LOF) ROC:0.5229, precision @ rank n:0.1164, execution time: 11.1998s






Minimum Covariance Determinant (MCD) ROC:0.99, precision @ rank n:0.7358, execution time: 10.0648s
One-class SVM (OCSVM) ROC:0.9909, precision @ rank n:0.9592, execution time: 47.05s
Principal Component Analysis (PCA) ROC:0.9892, precision @ rank n:0.9556, execution time: 0.0361s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4171, precision @ rank n:0.0909, execution time: 0.0732s
Cluster-based Local Outlier Factor ROC:0.3176, precision @ rank n:0.0, execution time: 0.8824s
Feature Bagging ROC:0.4193, precision @ rank n:0.0, execution time: 0.0271s
Histogram-base Outlier Detection (HBOS) ROC:0.2947, precision @ rank n:0.0, execution time: 0.004s
Isolation Forest ROC:0.3497, precision @ rank n:0.0909, execution time: 0.1595s
K Nearest Neighbors (KNN) ROC:0.3658, precision @ rank n:0.0, execution time: 0.0088s
Local Outlier Factor (LOF) ROC:0.4075, precision @ rank n:0.0, execution time: 0.003s
Minimum Covariance Determinant (MCD) ROC:0.3497, precision @ ran



Angle-based Outlier Detector (ABOD) ROC:0.9517, precision @ rank n:0.4118, execution time: 0.4392s
Cluster-based Local Outlier Factor ROC:0.5871, precision @ rank n:0.0588, execution time: 0.8893s
Feature Bagging ROC:0.9412, precision @ rank n:0.2941, execution time: 0.2848s
Histogram-base Outlier Detection (HBOS) ROC:0.6968, precision @ rank n:0.0588, execution time: 0.0241s
Isolation Forest ROC:0.7647, precision @ rank n:0.1765, execution time: 0.2357s
K Nearest Neighbors (KNN) ROC:0.9591, precision @ rank n:0.5294, execution time: 0.0681s
Local Outlier Factor (LOF) ROC:0.94, precision @ rank n:0.3529, execution time: 0.0351s
Minimum Covariance Determinant (MCD) ROC:0.7168, precision @ rank n:0.0588, execution time: 0.6507s
One-class SVM (OCSVM) ROC:0.7714, precision @ rank n:0.3529, execution time: 0.0381s
Principal Component Analysis (PCA) ROC:0.6421, precision @ rank n:0.0588, execution time: 0.002s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8782, precis

In [11]:
print('Time complexity')
time_df

Time complexity


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.1935,0.753,0.5565,0.1925,0.2065,0.0772,0.0702,0.4933,0.0441,0.0501
0,cardio,1831,21,9.6122,0.5134,0.7389,0.729,0.0481,0.2216,0.1273,0.0902,0.4161,0.0702,0.004
0,glass,214,9,4.2056,0.0672,0.752,0.0251,0.005,0.1334,0.006,0.002,0.0351,0.001,0.001
0,ionosphere,351,33,35.8974,0.0943,0.775,0.0551,0.0201,0.1524,0.014,0.006,0.0541,0.004,0.002
0,letter,1600,32,6.25,0.4803,0.779,0.6668,0.0612,0.2276,0.1224,0.0882,0.8422,0.0852,0.004
0,lympho,148,18,4.0541,0.0351,0.746,0.0211,0.006,0.1524,0.005,0.001,0.0401,0.002,0.001
0,mnist,7603,100,9.2069,7.3676,0.8161,46.2038,1.1511,1.6785,7.6172,8.0045,2.2795,4.7961,0.1233
0,musk,3062,166,3.1679,2.7311,0.7992,13.2708,0.6006,0.8081,1.6925,1.5571,8.1717,1.1812,0.1303
0,optdigits,5216,64,2.8758,2.7172,0.746,10.892,0.4241,0.6217,1.6153,1.517,1.0097,1.3767,0.0461
0,pendigits,6870,16,2.2707,1.9542,0.8121,4.4919,0.1264,0.5003,0.5214,0.5334,1.4679,0.8543,0.006


Analyze the performance of ROC and Precision @ n

In [12]:
print('ROC Performance')
roc_df

ROC Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7687,0.778,0.7736,0.8511,0.8217,0.782,0.7787,0.8228,0.7986,0.7997
0,cardio,1831,21,9.6122,0.6199,0.95,0.5718,0.8417,0.9358,0.7666,0.5744,0.8198,0.935,0.9449
0,glass,214,9,4.2056,0.8099,0.7704,0.7827,0.7012,0.7086,0.8519,0.7704,0.8099,0.5951,0.6568
0,ionosphere,351,33,35.8974,0.9089,0.8365,0.9083,0.5324,0.8457,0.9304,0.9089,0.9606,0.841,0.7993
0,letter,1600,32,6.25,0.8695,0.4743,0.8371,0.6166,0.6154,0.8447,0.822,0.7936,0.6032,0.5339
0,lympho,148,18,4.0541,0.9153,1.0,0.9661,0.9831,1.0,0.9661,0.9661,0.9492,1.0,0.9831
0,mnist,7603,100,9.2069,0.7783,0.8882,0.735,0.5717,0.7995,0.8503,0.7381,0.8335,0.8579,0.8596
0,musk,3062,166,3.1679,0.1574,1.0,0.6067,1.0,1.0,0.8691,0.6182,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.5442,0.3684,0.5656,0.8583,0.68,0.3917,0.6153,0.4005,0.5176,0.5239
0,pendigits,6870,16,2.2707,0.6494,0.9631,0.427,0.9324,0.945,0.7279,0.4257,0.8296,0.9227,0.9375


In [13]:
print('Precision @ n Performance')
prn_df

Precision @ n Performance


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.3571,0.5,0.5,0.5714,0.5,0.5,0.4643,0.4286,0.5,0.5
0,cardio,1831,21,9.6122,0.2603,0.6575,0.2192,0.4932,0.5342,0.3562,0.2055,0.4795,0.5342,0.6027
0,glass,214,9,4.2056,0.2,0.2,0.2,0.0,0.2,0.2,0.0,0.0,0.2,0.2
0,ionosphere,351,33,35.8974,0.8148,0.6852,0.7407,0.3519,0.6481,0.8519,0.7407,0.8704,0.7037,0.6111
0,letter,1600,32,6.25,0.3953,0.0465,0.2326,0.1395,0.093,0.3023,0.2791,0.1395,0.1163,0.0698
0,lympho,148,18,4.0541,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
0,mnist,7603,100,9.2069,0.3614,0.4246,0.3544,0.1088,0.2842,0.4281,0.3544,0.2526,0.4035,0.4035
0,musk,3062,166,3.1679,0.1224,1.0,0.3878,1.0,0.9796,0.3673,0.3878,1.0,1.0,1.0
0,optdigits,5216,64,2.8758,0.0172,0.0,0.0172,0.2241,0.0172,0.0,0.0172,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0784,0.3725,0.098,0.3333,0.3725,0.098,0.098,0.098,0.3137,0.2745
