# IsolationForest

- 異常検知アルゴリズムの一つで，データ点をランダムに分割していく
- データセット内の異常なデータ点は，ランダムに分割した際，分割されやすいのではないかという考えに基づいたアルゴリズム


## 流れ
    - データ点(特徴量)をランダムにサンプルし，最小値と最大値の分割値をランダムに選別

- 再帰的にパーティションを分割することで，データをツリー構造で捉える
- パーティション数をツリーの深さで表現する（距離が短いほど異常度が高い）
- 葉にはデータ点が一つになるように分割する
    - ただし，同一の値の点や，二部探索の最大の平均深さで打ち切る場合は，葉が一つのデータにならない


- IsolationForestは教師あり学習ではないけれど，訓練データと評価データが存在する
- RandomForestと同様に，データをサンプリングして木を作り，その木を複数作ることで，ロバスト性や精度を高めている
- 木の深さ(ルートノードからの距離)は,その木で二部探索した際の平均深さで正規化している

## 異常度
     - E(h(x)) 木の平均深さ
     - C(n) 二部探索深さ
$$
2^{-\dfrac {E\left( h\left(x\right) \right) }{c\left( n\right) }}
$$

- 値が[0-1]の間におさまる
- 異常だと指数が0に近くなる
- 正常だと指数が-1に近くなる

--------------------------------------------------------------------------------------------

## 今回の実験

- 使用するデータ:[元の測定値, 四方向残差データ, 八方向残差データ]
- 木の数:[100, 1000]
- 木を形成する際に使用するサンプルデータ数:[10, 20, 30, 40, 50]

In [2]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import treelib
np.random.seed(100)

class IsolateTree:
    class __data:
        def __init__(self):
            self.feature_split = None
            self.threshold_split = None
            self.n_samples = None

    def __init__(self):
        self.__tree = treelib.Tree()

    def fit(self, X):
        n_samples = X.shape[0]
        self.__c = self.__get_normalization_factor(n_samples)
        self.__max_height = np.round(np.log2(n_samples))
        root = self.__tree.create_node('root')
        self.__create_tree(root, X)

    def __get_normalization_factor(self, n):
        return 2 * (np.log(n - 1) + 0.5772156649) - 2 * (n - 1) / n

    def __create_tree(self, parent, X):
        n_samples, n_features = X.shape
        data = self.__data()

        if n_samples == 0:
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return
        if self.__tree.depth() > self.__max_height or (X == X[0]).all():
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return

        data.feature_split = np.random.choice(n_features, 1)
        data.threshold_split = (max(X[:, data.feature_split]) - min(X[:, data.feature_split])) * np.random.random() + min(X[:, data.feature_split])
        self.__tree.update_node(parent.identifier, data=data)

        less_items = np.flatnonzero(X[:, data.feature_split] < data.threshold_split)
        greater_items = np.flatnonzero(X[:, data.feature_split] >= data.threshold_split)
        node = self.__tree.create_node('less ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[less_items])
        node = self.__tree.create_node('greater ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[greater_items])

    def get_abnormal_score(self, X):
        return 2 ** (-np.apply_along_axis(self.__get_path_length, 1, X, self.__tree.get_node(self.__tree.root)) / self.__c)

    def __get_path_length(self, x, node):
        if node.is_leaf():
            return self.__tree.depth(node.identifier) + (self.__get_normalization_factor(node.data.n_samples) if node.data.n_samples > 1 else 0)

        for child in self.__tree.children(node.identifier):
            if x[node.data.feature_split] < node.data.threshold_split and child.tag == 'less ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)
            elif x[node.data.feature_split] >= node.data.threshold_split and child.tag == 'greater ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)


class IsolateForest:
    def __init__(self):
        self.__trees = []

    def fit(self, X, n_trees, n_subsamples):
        n_samples = X.shape[0]
        for _ in range(n_trees):
            sub_items = np.random.choice(n_samples, n_subsamples, replace=False)
            tree = IsolateTree()
            tree.fit(X[sub_items])
            self.__trees.append(tree)

    def predict(self, X):
        n_samples = X.shape[0]
        abnormal_scores = np.zeros((n_samples, len(self.__trees)))
        for i in range(len(self.__trees)):
            abnormal_scores[:, i] = self.__trees[i].get_abnormal_score(X)
        return np.mean(abnormal_scores, axis=1)

def main():
    data = []
    for i in range(1, 51):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    for i in range(1, 3):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'_aged.csv', header=None).values
        data.append(tmp_data)
    data = np.array(data)

    check = []
    for i in range(148):
        for j in range(33):
            if data[0, i, j] == 0:
                check.append([i,j])
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    data[i, j, k] = 0

    tmp_1 = []
    for i in range(52):
        tmp_2 = data[i].flatten()
        tmp_1.append(tmp_2[tmp_2 != 0])
    data = np.array(tmp_1)

    model = IsolateForest()
    tree_list = [100, 1000]
    sample_list = [10, 20, 30, 40, 50]
    for tree in tree_list:
        rank = []
        for sample in sample_list:
            model.fit(data, tree, sample) # ここをいじる
            result = model.predict(data)
            tmp = []
            for i in range(52):
                tmp.append([result[i], i])
            tmp.sort()
            tmp = np.array(tmp)[:,1]
            rank.append(tmp[47:52])
        rank = np.array(rank).T
    
        print(f'木:[{tree}]')
        df = pd.DataFrame(rank, columns=sample_list, index=np.arange(1,6))
        print(df)

if __name__ == "__main__":
    print('[元の測定値]')
    main()
    pass

[元の測定値]
木:[100]
     10    20    30    40    50
1  51.0  37.0  26.0  26.0  26.0
2  26.0   0.0   0.0   0.0  51.0
3  24.0  51.0  51.0  51.0   0.0
4  50.0  10.0  10.0  10.0  10.0
5  10.0  50.0  50.0  50.0  50.0
木:[1000]
     10    20    30    40    50
1   4.0   0.0  51.0  51.0  51.0
2  24.0  51.0   0.0   0.0   0.0
3  26.0  26.0  26.0  26.0  26.0
4  10.0  10.0  10.0  10.0  10.0
5  50.0  50.0  50.0  50.0  50.0


In [5]:
np.random.seed(100)

class IsolateTree:
    class __data:
        def __init__(self):
            self.feature_split = None
            self.threshold_split = None
            self.n_samples = None

    def __init__(self):
        self.__tree = treelib.Tree()

    def fit(self, X):
        n_samples = X.shape[0]
        self.__c = self.__get_normalization_factor(n_samples)
        self.__max_height = np.round(np.log2(n_samples))
        root = self.__tree.create_node('root')
        self.__create_tree(root, X)

    def __get_normalization_factor(self, n):
        return 2 * (np.log(n - 1) + 0.5772156649) - 2 * (n - 1) / n

    def __create_tree(self, parent, X):
        n_samples, n_features = X.shape
        data = self.__data()

        if n_samples == 0:
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return
        if self.__tree.depth() > self.__max_height or (X == X[0]).all():
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return

        data.feature_split = np.random.choice(n_features, 1)
        data.threshold_split = (max(X[:, data.feature_split]) - min(X[:, data.feature_split])) * np.random.random() + min(X[:, data.feature_split])
        self.__tree.update_node(parent.identifier, data=data)

        less_items = np.flatnonzero(X[:, data.feature_split] < data.threshold_split)
        greater_items = np.flatnonzero(X[:, data.feature_split] >= data.threshold_split)
        node = self.__tree.create_node('less ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[less_items])
        node = self.__tree.create_node('greater ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[greater_items])

    def get_abnormal_score(self, X):
        return 2 ** (-np.apply_along_axis(self.__get_path_length, 1, X, self.__tree.get_node(self.__tree.root)) / self.__c)

    def __get_path_length(self, x, node):
        if node.is_leaf():
            return self.__tree.depth(node.identifier) + (self.__get_normalization_factor(node.data.n_samples) if node.data.n_samples > 1 else 0)

        for child in self.__tree.children(node.identifier):
            if x[node.data.feature_split] < node.data.threshold_split and child.tag == 'less ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)
            elif x[node.data.feature_split] >= node.data.threshold_split and child.tag == 'greater ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)

class IsolateForest:
    def __init__(self):
        self.__trees = []

    def fit(self, X, n_trees, n_subsamples):
        n_samples = X.shape[0]
        for _ in range(n_trees):
            sub_items = np.random.choice(n_samples, n_subsamples, replace=False)
            tree = IsolateTree()
            tree.fit(X[sub_items])
            self.__trees.append(tree)

    def predict(self, X):
        n_samples = X.shape[0]
        abnormal_scores = np.zeros((n_samples, len(self.__trees)))
        for i in range(len(self.__trees)):
            abnormal_scores[:, i] = self.__trees[i].get_abnormal_score(X)
        return np.mean(abnormal_scores, axis=1)

def main():
    data = []
    for i in range(1, 51):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    for i in range(1, 3):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'_aged.csv', header=None).values
        data.append(tmp_data)
    data = np.array(data)

    check = []
    for i in range(148):
        for j in range(33):
            if data[0, i, j] == 0:
                check.append([i,j])
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    data[i, j, k] = 0

    tmp_x = [0, 1, 0, -1]
    tmp_y = [-1, 0, 1, 0]
    residual_data = np.zeros_like(data)
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if data[i, j, k] != 0:
                    data_list = []
                    for l in range(4):
                        next_y = j + tmp_y[l]
                        next_x = k + tmp_x[l]
                        if 0 <= next_y < 148 and 0 <= next_x < 33 and data[i, next_y, next_x] != 0:
                            data_list.append(data[i, next_y, next_x])
    
                    data_mean = np.mean(np.array(data_list))
                    residual_data[i, j, k] = abs(data[i, j, k] - data_mean)

    tmp_1 = []
    for i in range(52):
        tmp_2 = []
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    continue
                else:
                    tmp_2.append(residual_data[i, j, k])
        tmp_1.append(tmp_2)
    data = np.array(tmp_1)
    
    model = IsolateForest()
    tree_list = [100, 1000]
    sample_list = [10, 20, 30, 40, 50]
    for tree in tree_list:
        rank = []
        for sample in sample_list:
            model.fit(data, tree, sample) 
            result = model.predict(data)
            tmp = []
            for i in range(52):
                tmp.append([result[i], i])
            tmp.sort()
            tmp = np.array(tmp)[:,1]
            rank.append(tmp[47:52])
        rank = np.array(rank).T
    
        print(f'木:[{tree}]')
        df = pd.DataFrame(rank, columns=sample_list, index=np.arange(1,6))
        print(df)

if __name__ == "__main__":
    print('[四方向残差データ]')
    main()
    pass

[四方向残差データ]
木:[100]
     10    20    30    40    50
1  50.0   0.0  37.0  27.0  43.0
2   3.0  20.0  21.0  43.0  21.0
3  49.0  43.0   0.0  21.0  27.0
4   8.0  21.0  43.0  50.0  50.0
5  43.0  50.0  50.0   0.0   0.0
木:[1000]
     10    20    30    40    50
1  50.0  37.0  43.0  27.0  43.0
2  21.0  43.0  37.0  21.0  21.0
3  27.0  11.0  21.0  37.0  37.0
4  43.0  50.0  50.0  50.0  50.0
5   0.0   0.0   0.0   0.0   0.0


In [6]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import treelib
np.random.seed(100)

class IsolateTree:
    class __data:
        def __init__(self):
            self.feature_split = None
            self.threshold_split = None
            self.n_samples = None

    def __init__(self):
        self.__tree = treelib.Tree()

    def fit(self, X):
        n_samples = X.shape[0]
        self.__c = self.__get_normalization_factor(n_samples)
        self.__max_height = np.round(np.log2(n_samples))
        root = self.__tree.create_node('root')
        self.__create_tree(root, X)

    def __get_normalization_factor(self, n):
        return 2 * (np.log(n - 1) + 0.5772156649) - 2 * (n - 1) / n

    def __create_tree(self, parent, X):
        n_samples, n_features = X.shape
        data = self.__data()

        if n_samples == 0:
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return
        if self.__tree.depth() > self.__max_height or (X == X[0]).all():
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return

        data.feature_split = np.random.choice(n_features, 1)
        data.threshold_split = (max(X[:, data.feature_split]) - min(X[:, data.feature_split])) * np.random.random() + min(X[:, data.feature_split])
        self.__tree.update_node(parent.identifier, data=data)

        less_items = np.flatnonzero(X[:, data.feature_split] < data.threshold_split)
        greater_items = np.flatnonzero(X[:, data.feature_split] >= data.threshold_split)
        node = self.__tree.create_node('less ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[less_items])
        node = self.__tree.create_node('greater ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[greater_items])

    def get_abnormal_score(self, X):
        return 2 ** (-np.apply_along_axis(self.__get_path_length, 1, X, self.__tree.get_node(self.__tree.root)) / self.__c)

    def __get_path_length(self, x, node):
        if node.is_leaf():
            return self.__tree.depth(node.identifier) + (self.__get_normalization_factor(node.data.n_samples) if node.data.n_samples > 1 else 0)

        for child in self.__tree.children(node.identifier):
            if x[node.data.feature_split] < node.data.threshold_split and child.tag == 'less ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)
            elif x[node.data.feature_split] >= node.data.threshold_split and child.tag == 'greater ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)

class IsolateForest:
    def __init__(self):
        self.__trees = []

    def fit(self, X, n_trees, n_subsamples):
        n_samples = X.shape[0]
        for _ in range(n_trees):
            sub_items = np.random.choice(n_samples, n_subsamples, replace=False)
            tree = IsolateTree()
            tree.fit(X[sub_items])
            self.__trees.append(tree)

    def predict(self, X):
        n_samples = X.shape[0]
        abnormal_scores = np.zeros((n_samples, len(self.__trees)))
        for i in range(len(self.__trees)):
            abnormal_scores[:, i] = self.__trees[i].get_abnormal_score(X)
        return np.mean(abnormal_scores, axis=1)

def main():
    data = []
    for i in range(1, 51):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    for i in range(1, 3):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'_aged.csv', header=None).values
        data.append(tmp_data)
    data = np.array(data)

    check = []
    for i in range(148):
        for j in range(33):
            if data[0, i, j] == 0:
                check.append([i,j])
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    data[i, j, k] = 0

    tmp_x = [-1, 0, 1, 1, 1, 0, -1, -1]
    tmp_y = [-1, -1, -1, 0, 1, 1, 1, 0]
    residual_data = np.zeros_like(data)
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if data[i, j, k] != 0:
                    data_list = []
                    for l in range(8):
                        next_y = j + tmp_y[l]
                        next_x = k + tmp_x[l]
                        if 0 <= next_y < 148 and 0 <= next_x < 33 and data[i, next_y, next_x] != 0:
                            data_list.append(data[i, next_y, next_x])
                    data_mean = np.mean(np.array(data_list))
                    residual_data[i, j, k] = abs(data[i, j, k] - data_mean)

    tmp_1 = []
    for i in range(52):
        tmp_2 = []
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    continue
                else:
                    tmp_2.append(residual_data[i, j, k])
        tmp_1.append(tmp_2)
    data = np.array(tmp_1)

    model = IsolateForest()
    tree_list = [100, 1000]
    sample_list = [10, 20, 30, 40, 50]
    for tree in tree_list:
        rank = []
        for sample in sample_list:
            model.fit(data, tree, sample) 
            result = model.predict(data)
            tmp = []
            for i in range(52):
                tmp.append([result[i], i])
            tmp.sort()
            tmp = np.array(tmp)[:,1]
            rank.append(tmp[47:52])
        rank = np.array(rank).T

        print(f'木:[{tree}]')
        df = pd.DataFrame(rank, columns=sample_list, index=np.arange(1,6))
        print(df)

if __name__ == "__main__":
    print('[八方向残差データ]')
    main()
    pass

[八方向残差データ]
木:[100]
     10    20    30    40    50
1  21.0  14.0  37.0  37.0  35.0
2   0.0   9.0  24.0  24.0  37.0
3   9.0   0.0  35.0  35.0  24.0
4   8.0  35.0  27.0  27.0  27.0
5  27.0  27.0   0.0   0.0   0.0
木:[1000]
     10    20    30    40    50
1  37.0  24.0  37.0  27.0  27.0
2  27.0  21.0  21.0  37.0  37.0
3  24.0  50.0  50.0  21.0  21.0
4  21.0  27.0  27.0  50.0  50.0
5   0.0   0.0   0.0   0.0   0.0


## 実験結果

- 残差データ+IsolationForestでは異常を検出することができなかった
- 次元削除で特徴量を減らすことで精度が良くなるかも？
- 新しい特徴を作ってみたらどう？（元の測定値の合計値，平均値，残差データの合計値，平均値など）

## ちょっとやってみる

In [None]:
np.random.seed(100)

class IsolateTree:
    class __data:
        def __init__(self):
            self.feature_split = None
            self.threshold_split = None
            self.n_samples = None

    def __init__(self):
        self.__tree = treelib.Tree()

    def fit(self, X):
        n_samples = X.shape[0]
        self.__c = self.__get_normalization_factor(n_samples)
        self.__max_height = np.round(np.log2(n_samples))
        root = self.__tree.create_node('root')
        self.__create_tree(root, X)

    def __get_normalization_factor(self, n):
        return 2 * (np.log(n - 1) + 0.5772156649) - 2 * (n - 1) / n

    def __create_tree(self, parent, X):
        n_samples, n_features = X.shape
        data = self.__data()

        if n_samples == 0:
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return
        if self.__tree.depth() > self.__max_height or (X == X[0]).all():
            data.n_samples = n_samples
            self.__tree.update_node(parent.identifier, data=data)
            return

        data.feature_split = np.random.choice(n_features, 1)
        data.threshold_split = (max(X[:, data.feature_split]) - min(X[:, data.feature_split])) * np.random.random() + min(X[:, data.feature_split])
        self.__tree.update_node(parent.identifier, data=data)

        less_items = np.flatnonzero(X[:, data.feature_split] < data.threshold_split)
        greater_items = np.flatnonzero(X[:, data.feature_split] >= data.threshold_split)
        node = self.__tree.create_node('less ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[less_items])
        node = self.__tree.create_node('greater ' + str(data.threshold_split), parent=parent)
        self.__create_tree(node, X[greater_items])

    def get_abnormal_score(self, X):
        return 2 ** (-np.apply_along_axis(self.__get_path_length, 1, X, self.__tree.get_node(self.__tree.root)) / self.__c)

    def __get_path_length(self, x, node):
        if node.is_leaf():
            return self.__tree.depth(node.identifier) + (self.__get_normalization_factor(node.data.n_samples) if node.data.n_samples > 1 else 0)

        for child in self.__tree.children(node.identifier):
            if x[node.data.feature_split] < node.data.threshold_split and child.tag == 'less ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)
            elif x[node.data.feature_split] >= node.data.threshold_split and child.tag == 'greater ' + str(node.data.threshold_split):
                return self.__get_path_length(x, child)

class IsolateForest:
    def __init__(self):
        self.__trees = []

    def fit(self, X, n_trees, n_subsamples):
        n_samples = X.shape[0]
        for _ in range(n_trees):
            sub_items = np.random.choice(n_samples, n_subsamples, replace=False)
            tree = IsolateTree()
            tree.fit(X[sub_items])
            self.__trees.append(tree)

    def predict(self, X):
        n_samples = X.shape[0]
        abnormal_scores = np.zeros((n_samples, len(self.__trees)))
        for i in range(len(self.__trees)):
            abnormal_scores[:, i] = self.__trees[i].get_abnormal_score(X)
        return np.mean(abnormal_scores, axis=1)

def main():
    data = []
    for i in range(1, 51):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    for i in range(1, 3):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'_aged.csv', header=None).values
        data.append(tmp_data)
    data = np.array(data)

    check = []
    for i in range(148):
        for j in range(33):
            if data[0, i, j] == 0:
                check.append([i,j])
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    data[i, j, k] = 0

    tmp_1 = []
    for i in range(52):
        tmp_2 = data[i].flatten()
        tmp_1.append(tmp_2[tmp_2 != 0])
    data = np.array(tmp_1)
    
    tmp = []
    for i in data:
        tmptmp = []
        tmptmp.append(np.sum(i))
        tmptmp.append(np.mean(i))
        tmp.append(tmptmp)
        
    data = []
    for i in range(1, 51):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'.csv', header=None).values
        data.append(tmp_data)
    for i in range(1, 3):
        tmp_data = pd.read_csv('fresh_aged_ieice/s'+str(i)+'_aged.csv', header=None).values
        data.append(tmp_data)
    data = np.array(data)

    check = []
    for i in range(148):
        for j in range(33):
            if data[0, i, j] == 0:
                check.append([i,j])
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    data[i, j, k] = 0

    tmp_x = [-1, 0, 1, 1, 1, 0, -1, -1]
    tmp_y = [-1, -1, -1, 0, 1, 1, 1, 0]
    residual_data = np.zeros_like(data)
    for i in range(52):
        for j in range(148):
            for k in range(33):
                if data[i, j, k] != 0:
                    data_list = []
                    for l in range(8):
                        next_y = j + tmp_y[l]
                        next_x = k + tmp_x[l]
                        if 0 <= next_y < 148 and 0 <= next_x < 33 and data[i, next_y, next_x] != 0:
                            data_list.append(data[i, next_y, next_x])
                    data_mean = np.mean(np.array(data_list))
                    residual_data[i, j, k] = abs(data[i, j, k] - data_mean)

    tmp_1 = []
    for i in range(52):
        tmp_2 = []
        for j in range(148):
            for k in range(33):
                if [j,k] in check:
                    continue
                else:
                    tmp_2.append(residual_data[i, j, k])
        tmp_1.append(tmp_2)
    data = np.array(tmp_1)
    
    for i in range(len(data)):
        tmp[i].append(np.sum(data[i]))
        tmp[i].append(np.mean(data[i]))
    data = np.array(tmp)
    
    model = IsolateForest()
    tree_list = [100, 1000]
    sample_list = [10, 20, 30, 40, 50]
    for tree in tree_list:
        rank = []
        for sample in sample_list:
            model.fit(data, tree, sample) 
            result = model.predict(data)
            tmp = []
            for i in range(52):
                tmp.append([result[i], i])
            tmp.sort()
            tmp = np.array(tmp)[:,1]
            rank.append(tmp[47:52])
        rank = np.array(rank).T
        
        print(f'木:[{tree}]')
        df = pd.DataFrame(rank, columns=sample_list, index=np.arange(1,6))
        print(df)

if __name__ == "__main__":
    print('[自作特徴データ（元の測定値の合計値，平均値，残差データの合計値，平均値]')
    main()
    pass

[自作特徴データ（元の測定値の合計値，平均値，残差データの合計値，平均値]
