In [6]:
import math
import numpy as np
from pandas import DataFrame
import datetime
from itertools import repeat
from statistics import mean

class HBOS:
        
    def __init__(self, log_scale=True, ranked=False, bin_info_array=[], mode_array=[], nominal_array=[]):
        self.log_scale = log_scale
        self.ranked = ranked
        self.bin_info_array = bin_info_array
        self.mode_array = mode_array
        self.nominal_array = nominal_array
        # self.histogram_list = []
        
    def fit(self, data):
        attr_size = len(data.columns)
        total_data_size = len(data)
        
        # init params if needed
        if len(self.bin_info_array) == 0:
            self.bin_info_array = list(repeat(-1, attr_size))
        
        if len(self.mode_array) == 0:
            self.mode_array = list(repeat('dynamic binwidth', attr_size))
            
        if len(self.nominal_array) == 0:
            self.nominal_array = list(repeat(False, attr_size))
                  
        if self.ranked:
            self.log_scale = False
            
        normal = 1.0
        
        # calculate standard _bin size if needed
        for i in range(len(self.bin_info_array)):
            if self.bin_info_array[ i ] == -1:
                self.bin_info_array[ i ] = round(math.sqrt(len(data)))
                
        # initialize histogram
        self.histogram_list = []
        for i in range(attr_size):
            self.histogram_list.append([])
            
        # save maximum value for every attribute(needed to normalize _bin width)
        maximum_value_of_rows = data.apply(max).values
        
        # sort data
        sorted_data = data.apply(sorted)
                
        # create histograms
        for attrIndex in range(len(sorted_data.columns)):
            attr = sorted_data.columns[ attrIndex ]
            last = 0
            bin_start = sorted_data[ attr ][ 0 ]
            if self.mode_array[ attrIndex ] == 'dynamic binwidth':
                if self.nominal_array[ attrIndex ] == True:
                    while last < len(sorted_data) - 1:
                        last = self.create_dynamic_histogram(self.histogram_list, sorted_data, last, 1, attrIndex, True)
                else:
                    length = len(sorted_data)
                    binwidth = self.bin_info_array[ attrIndex ]
                    while last < len(sorted_data) - 1:
                        values_per_bin = math.floor(len(sorted_data) / self.bin_info_array[ attrIndex ])
                        last = self.create_dynamic_histogram(self.histogram_list, sorted_data, last, values_per_bin, attrIndex, False)
                        if binwidth > 1:
                            length = length - self.histogram_list[ attrIndex ][ -1 ].quantity
                            binwidth = binwidth - 1
            else:
                count_bins = 0
                binwidth = (sorted_data[ attr ][ len(sorted_data) - 1 ] - sorted_data[ attr ][ 0 ]) / self.bin_info_array[ attrIndex ]
                if (self.nominal_array[ attrIndex ] == True) | (binwidth == 0):
                    binwidth = 1
                while last < len(sorted_data):       
                    is_last_bin = count_bins == self.bin_info_array[ attrIndex ] - 1
                    last = self.create_static_histogram(self.histogram_list, sorted_data, last, binwidth, attrIndex, bin_start, is_last_bin)
                    bin_start = bin_start + binwidth
                    count_bins = count_bins + 1
    
        # calculate score using normalized _bin width
        # _bin width is normalized to the number of datapoints
        # save maximum score for every attr(needed to normalize score)
        max_score = []
        
        # loop for all histograms
        for i in range(len(self.histogram_list)):
            max_score.append(0)
            histogram = self.histogram_list[ i ]
            
            # loop for all bins
            for k in range(len(histogram)):
                _bin = histogram[ k ]
                _bin.total_data_size = total_data_size
                _bin.calc_score(maximum_value_of_rows[ i ])
                if max_score[ i ] < _bin.score:
                    max_score[ i ] = _bin.score
                    
        for i in range(len(self.histogram_list)):
            histogram = self.histogram_list[ i ]
            for k in range(len(histogram)):
                _bin = histogram[ k ]
                _bin.normalize_score(normal, max_score[ i ], self.log_scale)
                                    
        # if ranked
        
    def predict(self, data):
        score_array = []
        for i in range(len(data)):
            each_data = data.values[ i ]
            value = 1
            if self.log_scale | self.ranked:
                value = 0
            for attr in range(len(data.columns)):
                score = self.get_score(self.histogram_list[ attr ], each_data[ attr ])
                if self.log_scale:
                    value = value + score
                elif self.ranked:
                    value = value + score
                else:
                    value = value * score
            score_array.append(value)
        return score_array
    
    def fit_predict(self, data):
        self.fit(data)
        return self.predict(data)
    
    def get_score(self, histogram, value):
        for i in range(len(histogram) - 1):
            _bin = histogram[ i ]
            if (_bin.range_from <= value) & (value < _bin.range_to):
                return _bin.score
            
        _bin = histogram[ -1 ]
        if (_bin.range_from <= value) & (value <= _bin.range_to):
            return _bin.score
        return 0
          
    @staticmethod  
    def check_amount(sortedData, first_occurrence, values_per_bin, attr):
        # check if there are more than values_per_bin values of a given value
        if first_occurrence + values_per_bin < len(sortedData):
            if sortedData[ attr ][ first_occurrence ] == sortedData[ attr ][ first_occurrence + values_per_bin ]:
                return True
            else:
                return False
        else:
            return False
                    
    @staticmethod
    def create_dynamic_histogram(histogram_list, sortedData, first_index, values_per_bin, attrIndex, isNominal):
        last_index = 0
        attr = sortedData.columns[ attrIndex ]
        
        # create new _bin
        _bin = HistogramBin(sortedData[ attr ][ first_index ], 0, 0)
            
        # check if an end of the data is near
        if first_index + values_per_bin < len(sortedData):
            last_index = first_index + values_per_bin
        else:
            last_index = len(sortedData)
    
        # the first value always goes to the _bin
        _bin.add_quantitiy(1)
        
        # for every other value
        # check if it is the same as the last value
        # if so
        #   put it into the _bin
        # if not
        #   check if there are more than values_per_bin of that value
        #   if so
        #     open new _bin
        #   if not
        #     continue putting the value into the _bin
        
        cursor = first_index
        for i in range(first_index + 1, last_index):
            if sortedData[ attr ][ i ] == sortedData[ attr ][ cursor ]:
                _bin.add_quantitiy(1)
                cursor = cursor + 1
            else:
                if HBOS.check_amount(sortedData, i, values_per_bin, attr):
                    break
                else:
                    _bin.add_quantitiy(1)
                    cursor = cursor + 1
                    
        # continue to put values in the _bin until a new values arrive
        for i in range(cursor + 1, len(sortedData)):
            if sortedData[ attr ][ i ] == sortedData[ attr ][ cursor ]:
                _bin.quantity = _bin.quantity + 1
                cursor = cursor + 1
            else:
                break
                                
        # adjust range of the bins
        if cursor + 1 < len(sortedData):
            _bin.range_to = sortedData[ attr ][ cursor + 1 ]
        else:  # last data
            if isNominal:
                _bin.range_to = sortedData[ attr ][ len(sortedData) - 1 ] + 1
            else:
                _bin.range_to = sortedData[ attr ][ len(sortedData) - 1 ]
                
        # save _bin
        if _bin.range_to - _bin.range_from > 0:
            histogram_list[ attrIndex ].append(_bin)
        elif len(histogram_list[ attrIndex ]) == 0:
            _bin.range_to = _bin.range_to + 1
            histogram_list[ attrIndex ].append(_bin)
        else:
            # if the _bin would have length of zero
            # we merge it with previous _bin
            # this can happen at the end of the histogram
            lastBin = histogram_list[ attrIndex ][ -1 ]
            lastBin.add_quantitiy(_bin.quantity)
            lastBin.range_to = _bin.range_to
        
        return cursor + 1

    
    @staticmethod
    def create_static_histogram(histogram_list, sorted_data, first_index, binwidth, attrIndex, bin_start, last_bin):
        attr = sorted_data.columns[ attrIndex ]
        _bin = HistogramBin(bin_start, bin_start + binwidth, 0)
        if last_bin == True:
            _bin = HistogramBin(bin_start, sorted_data[ attr ][ len(sorted_data) - 1 ], 0)
        
        last = first_index - 1
        cursor = first_index
        
        while True:
            if cursor >= len(sorted_data):
                break
            if sorted_data[ attr ][ cursor ] > _bin.range_to:
                break
            _bin.quantity = _bin.quantity + 1
            last = cursor
            cursor = cursor + 1
            
        histogram_list[ attrIndex ].append(_bin)
        return last + 1     

In [2]:
class HistogramBin:

    def __init__(self, range_from, range_to, quantity):
        self.range_from = range_from
        self.range_to = range_to
        self.quantity = quantity
        self.score = 0
        self.total_data_size = 0
        
    def get_height(self):
        width = self.range_to - self.range_from
        height = self.quantity / width
        return height
    
    def add_quantitiy(self, anz):
        self.quantity = self.quantity + anz
        
    def calc_score(self, max_score):
        if max_score == 0:
            max_score = 1
        
        if self.quantity > 0:
            self.score = self.quantity / ((self.range_to - self.range_from) * self.total_data_size / abs(max_score))
                    
    def normalize_score(self, normal, max_score, log_scale):
        self.score = self.score * normal / max_score
        if(self.score == 0):
            return
        self.score = 1 / self.score
        if log_scale:
            self.score = math.log10(self.score)

In [51]:
from sklearn.ensemble import IsolationForest

In [4]:
def csv_to_data(directory, data_n):
    data = []
    
    for i in range(3, data_n+1):
        tmp_data = pd.read_csv(directory+'/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    
    return np.array(data)


def csv_to_aged_data(directory, aged_data_n):
    aged_data = []

    for i in range(1, aged_data_n+1):
        tmp_data = pd.read_csv(directory+'/s'+str(i)+'_aged.csv', header=None).values
        aged_data.append(tmp_data)
        
    return np.array(aged_data)


def generate_data(directory, data_n, aged_data_n):
    data = csv_to_data(directory, data_n)
    aged_data = csv_to_aged_data(directory, aged_data_n)
    
    return data, aged_data


def generate_residual_data(data_n, data):
    """
    測定値-推定値(周りの平均)
    残差を求めてデータ生成
    """
    tmp_x = [0, 1, 0, -1]
    tmp_y = [-1, 0, 1, 0]

    residual_data = np.zeros_like(data)

    for i in range(data_n):
        for j in range(data[i].shape[0]):
            for k in range(data[i].shape[1]):
                data_list = []
                for l in range(4):
                    next_y = j + tmp_y[l]
                    next_x = k + tmp_x[l]
                    if 0 <= next_y < 148 and 0 <= next_x < 33:
                        data_list.append(data[i, next_y, next_x])

                data_mean = mean(data_list)
                residual_data[i, j, k] = data[i, j, k] - data_mean

    return residual_data


def generate_nnr(data_n=50, aged_data_n=2):
    """
    残差集合のデータ生成
    """
    data, aged_data = generate_data('fresh_aged_ieice', data_n, aged_data_n)

    residual_data = generate_residual_data(data_n-2, data)
    aged_residual_data = generate_residual_data(aged_data_n, aged_data)

    return residual_data, aged_residual_data

In [18]:
residual_data, aged_residual_data = generate_nnr()

data = []
for i in range(48):
    data.append(residual_data[i].flatten())
    
for i in range(2):
    data.append(aged_residual_data[i].flatten())
    
data = np.array(data)

import csv # csvモジュールをインポート

file = open('new.csv', 'w')    #既存でないファイル名を作成してください
w = csv.writer(file)
w = w.writerows(data)
 
file.close()

In [23]:
dataset = pd.read_csv("./new.csv", header=None)
orig = dataset.copy()
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4874,4875,4876,4877,4878,4879,4880,4881,4882,4883
0,2.48775,-2.279433,1.000967,1.4769,1.874133,-0.688033,-0.436433,39.998233,-38.753767,0.0,...,1.116833,0.281233,0.8139,-0.6626,38.373933,-38.3024,0.0,0.0,0.0,0.0
1,1.7678,-1.852233,0.907767,1.061767,1.8017,0.4512,-0.556533,41.515533,-40.9928,0.0,...,0.792267,1.103433,0.470733,-0.791733,40.1131,-39.741767,0.0,0.0,0.0,0.0
2,2.7298,-3.1781,1.0412,1.7057,1.934533,-0.177067,-0.194433,41.583867,-41.2845,0.0,...,0.695167,0.254133,0.8565,-0.9051,41.089233,-40.769633,0.0,0.0,0.0,0.0
3,2.05035,-2.0777,1.0933,1.207767,1.7112,-0.326367,0.855867,41.642467,-41.111167,0.0,...,0.851633,0.9651,-0.296233,-0.008233,40.1906,-40.1376,0.0,0.0,0.0,0.0
4,2.4741,-2.858067,1.2018,1.825367,0.946067,0.558767,-0.3139,40.102167,-39.803533,0.0,...,0.710367,0.497367,1.3157,-1.2069,38.6519,-38.844967,0.0,0.0,0.0,0.0
5,1.9491,-2.672767,0.540733,1.6046,1.0587,0.546367,0.1041,41.160467,-40.400567,0.0,...,0.113367,0.972233,0.340067,-0.315167,40.714967,-40.276667,0.0,0.0,0.0,0.0
6,2.05115,-2.647867,1.028033,1.819867,0.940633,-0.602833,1.095267,41.167667,-40.393567,0.0,...,0.980067,0.644433,0.588333,-0.6806,39.7294,-39.423933,0.0,0.0,0.0,0.0
7,1.92485,-3.477433,1.3499,1.650067,1.081167,-0.278867,0.710567,40.5503,-40.063567,0.0,...,2.073833,0.043633,1.409867,-1.5563,40.0436,-39.8195,0.0,0.0,0.0,0.0
8,3.07685,-3.6063,2.017067,0.612733,1.4275,0.0392,0.848433,42.117433,-41.815267,0.0,...,0.945567,0.62,0.901867,-1.173767,41.296833,-41.053133,0.0,0.0,0.0,0.0
9,1.6182,-1.199567,0.1319,1.164067,1.8041,-0.1241,-0.138267,38.7844,-38.057433,0.0,...,1.6713,-0.498433,0.7215,-0.5959,37.2603,-37.2764,0.0,0.0,0.0,0.0


In [20]:
hbos = HBOS()
hbos_result = hbos.fit_predict(dataset)

In [21]:
hbos_result

[1490.6371744546436,
 1432.6520349270002,
 1550.548919353154,
 1455.3441875286653,
 1370.9408483836883,
 1338.9852021211082,
 1413.5842282462806,
 1407.261705362412,
 1603.9555433055618,
 1495.380979528325,
 1464.8341401062241,
 1460.7508035058843,
 1488.9434033521097,
 1339.6005346735794,
 1631.9906501003359,
 1532.9935870575064,
 1399.7007406060013,
 1731.0885986582555,
 1665.4610146450148,
 1515.9532643863795,
 1432.2788702575522,
 1411.8445597430632,
 1624.8752459736672,
 1502.1659485543337,
 1584.6492089086787,
 1776.2173073454592,
 1392.7999592794283,
 1483.9357371923807,
 1343.6005569118704,
 1360.6283325079485,
 1467.0771029559487,
 1391.2685814509898,
 1359.756697352075,
 1558.9719024312365,
 1481.7662637760332,
 1844.5627031990837,
 1509.4202438744042,
 1417.9665962616796,
 1302.575004779871,
 1468.3112017689957,
 1588.567404888451,
 1434.1753776122132,
 1319.3686061743178,
 1523.2980186820566,
 1422.283163212668,
 1480.9973611288265,
 1605.9269704485355,
 1498.7111730662257,

In [61]:
hbos_orig = orig.copy()
hbos_orig['hbos'] = hbos_result
x = np.zeros(50)
x[-1] = 1
x[-2] = 1
hbos_orig['class'] = x
hbos_orig

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4876,4877,4878,4879,4880,4881,4882,4883,hbos,class
0,2.48775,-2.279433,1.000967,1.4769,1.874133,-0.688033,-0.436433,39.998233,-38.753767,0.0,...,0.8139,-0.6626,38.373933,-38.3024,0.0,0.0,0.0,0.0,1490.637174,0.0
1,1.7678,-1.852233,0.907767,1.061767,1.8017,0.4512,-0.556533,41.515533,-40.9928,0.0,...,0.470733,-0.791733,40.1131,-39.741767,0.0,0.0,0.0,0.0,1432.652035,0.0
2,2.7298,-3.1781,1.0412,1.7057,1.934533,-0.177067,-0.194433,41.583867,-41.2845,0.0,...,0.8565,-0.9051,41.089233,-40.769633,0.0,0.0,0.0,0.0,1550.548919,0.0
3,2.05035,-2.0777,1.0933,1.207767,1.7112,-0.326367,0.855867,41.642467,-41.111167,0.0,...,-0.296233,-0.008233,40.1906,-40.1376,0.0,0.0,0.0,0.0,1455.344188,0.0
4,2.4741,-2.858067,1.2018,1.825367,0.946067,0.558767,-0.3139,40.102167,-39.803533,0.0,...,1.3157,-1.2069,38.6519,-38.844967,0.0,0.0,0.0,0.0,1370.940848,0.0
5,1.9491,-2.672767,0.540733,1.6046,1.0587,0.546367,0.1041,41.160467,-40.400567,0.0,...,0.340067,-0.315167,40.714967,-40.276667,0.0,0.0,0.0,0.0,1338.985202,0.0
6,2.05115,-2.647867,1.028033,1.819867,0.940633,-0.602833,1.095267,41.167667,-40.393567,0.0,...,0.588333,-0.6806,39.7294,-39.423933,0.0,0.0,0.0,0.0,1413.584228,0.0
7,1.92485,-3.477433,1.3499,1.650067,1.081167,-0.278867,0.710567,40.5503,-40.063567,0.0,...,1.409867,-1.5563,40.0436,-39.8195,0.0,0.0,0.0,0.0,1407.261705,0.0
8,3.07685,-3.6063,2.017067,0.612733,1.4275,0.0392,0.848433,42.117433,-41.815267,0.0,...,0.901867,-1.173767,41.296833,-41.053133,0.0,0.0,0.0,0.0,1603.955543,0.0
9,1.6182,-1.199567,0.1319,1.164067,1.8041,-0.1241,-0.138267,38.7844,-38.057433,0.0,...,0.7215,-0.5959,37.2603,-37.2764,0.0,0.0,0.0,0.0,1495.38098,0.0


In [62]:
hbos_top5_data = hbos_orig.sort_values(by=['hbos'],ascending=False)[:5]
hbos_top5_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4876,4877,4878,4879,4880,4881,4882,4883,hbos,class
48,0.9939,-1.215267,-0.099033,1.8056,0.568867,0.379567,-0.027667,35.704933,-35.528967,0.0,...,-0.4752,0.6657,34.700167,-34.7951,0.0,0.0,0.0,0.0,1908.2922,1.0
35,1.3136,-1.756367,0.181367,2.235533,0.535567,-0.020867,0.3971,36.3062,-36.2232,0.0,...,0.524833,-0.9138,35.882933,-35.760067,0.0,0.0,0.0,0.0,1844.562703,0.0
49,1.4739,-2.339467,1.256933,1.066133,0.84,0.793833,-0.733867,36.33,-35.965567,0.0,...,0.8381,-0.301367,35.695367,-35.931133,0.0,0.0,0.0,0.0,1840.546098,1.0
25,2.7093,-2.7346,0.876633,1.856867,0.4452,0.395833,-0.474767,38.422633,-37.313767,0.0,...,1.285233,-0.842167,35.7533,-35.883567,0.0,0.0,0.0,0.0,1776.217307,0.0
17,0.62275,-1.2865,0.6921,0.7453,1.715633,0.033433,-0.188533,37.577767,-36.732733,0.0,...,0.025167,-0.083233,35.073067,-35.889267,0.0,0.0,0.0,0.0,1731.088599,0.0


In [63]:
print(len(hbos_top5_data[lambda x:x['class']==1]))

2


In [74]:
iforest = IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', max_features=1.0, bootstrap=False, n_jobs=None, behaviour='deprecated', random_state=None, verbose=0, warm_start=False)
iforest.fit(dataset)
iforest_result = iforest.decision_function(dataset)
iforest_result

array([ 0.04666654,  0.03348292,  0.02460887,  0.04186663,  0.06149703,
        0.04894394,  0.04940245,  0.05076921, -0.00343711,  0.0463479 ,
        0.06670871,  0.03404388,  0.07165866,  0.07054567,  0.04644275,
        0.02689688,  0.02455471,  0.01126629,  0.04398484,  0.04980541,
        0.05461508,  0.05898749,  0.00852689,  0.04846566,  0.00997581,
        0.01426023,  0.04475306,  0.0343848 ,  0.05738068,  0.06274353,
        0.04063896,  0.05324865,  0.07092692,  0.06291605,  0.05236172,
       -0.00543904,  0.02534878,  0.02473438,  0.05503646,  0.05133021,
        0.03440179,  0.04782517,  0.05041914,  0.04223679,  0.04688216,
        0.03762728,  0.03898628,  0.02191608, -0.0099141 ,  0.00185834])

In [75]:
iforest_orig = dataset.copy()
iforest_orig['if'] = iforest_result
x = np.zeros(50)
x[-1] = 1
x[-2] = 1
iforest_orig['class'] = x
iforest_orig

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4876,4877,4878,4879,4880,4881,4882,4883,if,class
0,2.48775,-2.279433,1.000967,1.4769,1.874133,-0.688033,-0.436433,39.998233,-38.753767,0.0,...,0.8139,-0.6626,38.373933,-38.3024,0.0,0.0,0.0,0.0,0.046667,0.0
1,1.7678,-1.852233,0.907767,1.061767,1.8017,0.4512,-0.556533,41.515533,-40.9928,0.0,...,0.470733,-0.791733,40.1131,-39.741767,0.0,0.0,0.0,0.0,0.033483,0.0
2,2.7298,-3.1781,1.0412,1.7057,1.934533,-0.177067,-0.194433,41.583867,-41.2845,0.0,...,0.8565,-0.9051,41.089233,-40.769633,0.0,0.0,0.0,0.0,0.024609,0.0
3,2.05035,-2.0777,1.0933,1.207767,1.7112,-0.326367,0.855867,41.642467,-41.111167,0.0,...,-0.296233,-0.008233,40.1906,-40.1376,0.0,0.0,0.0,0.0,0.041867,0.0
4,2.4741,-2.858067,1.2018,1.825367,0.946067,0.558767,-0.3139,40.102167,-39.803533,0.0,...,1.3157,-1.2069,38.6519,-38.844967,0.0,0.0,0.0,0.0,0.061497,0.0
5,1.9491,-2.672767,0.540733,1.6046,1.0587,0.546367,0.1041,41.160467,-40.400567,0.0,...,0.340067,-0.315167,40.714967,-40.276667,0.0,0.0,0.0,0.0,0.048944,0.0
6,2.05115,-2.647867,1.028033,1.819867,0.940633,-0.602833,1.095267,41.167667,-40.393567,0.0,...,0.588333,-0.6806,39.7294,-39.423933,0.0,0.0,0.0,0.0,0.049402,0.0
7,1.92485,-3.477433,1.3499,1.650067,1.081167,-0.278867,0.710567,40.5503,-40.063567,0.0,...,1.409867,-1.5563,40.0436,-39.8195,0.0,0.0,0.0,0.0,0.050769,0.0
8,3.07685,-3.6063,2.017067,0.612733,1.4275,0.0392,0.848433,42.117433,-41.815267,0.0,...,0.901867,-1.173767,41.296833,-41.053133,0.0,0.0,0.0,0.0,-0.003437,0.0
9,1.6182,-1.199567,0.1319,1.164067,1.8041,-0.1241,-0.138267,38.7844,-38.057433,0.0,...,0.7215,-0.5959,37.2603,-37.2764,0.0,0.0,0.0,0.0,0.046348,0.0


In [76]:
iforest_top5_data=iforest_orig.sort_values(by=['if'],ascending=True)[:5]
iforest_top5_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4876,4877,4878,4879,4880,4881,4882,4883,if,class
48,0.9939,-1.215267,-0.099033,1.8056,0.568867,0.379567,-0.027667,35.704933,-35.528967,0.0,...,-0.4752,0.6657,34.700167,-34.7951,0.0,0.0,0.0,0.0,-0.009914,1.0
35,1.3136,-1.756367,0.181367,2.235533,0.535567,-0.020867,0.3971,36.3062,-36.2232,0.0,...,0.524833,-0.9138,35.882933,-35.760067,0.0,0.0,0.0,0.0,-0.005439,0.0
8,3.07685,-3.6063,2.017067,0.612733,1.4275,0.0392,0.848433,42.117433,-41.815267,0.0,...,0.901867,-1.173767,41.296833,-41.053133,0.0,0.0,0.0,0.0,-0.003437,0.0
49,1.4739,-2.339467,1.256933,1.066133,0.84,0.793833,-0.733867,36.33,-35.965567,0.0,...,0.8381,-0.301367,35.695367,-35.931133,0.0,0.0,0.0,0.0,0.001858,1.0
22,2.4221,-2.9723,0.8785,1.4563,1.504833,-0.1113,-0.177233,42.3625,-41.540033,0.0,...,0.3414,-0.519733,40.7577,-40.711367,0.0,0.0,0.0,0.0,0.008527,0.0


In [77]:
len(iforest_top5_data[lambda x:x['class']==1])

2