In [489]:
from scipy.optimize import minimize
import numpy as np
from math import exp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import OneClassSVM
from sklearn.datasets import load_boston
from scipy.spatial import distance
from scipy.spatial import cKDTree
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import PP4
from sklearn.metrics import f1_score
import copy
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

# Experiments

## 1. Letter 

In [490]:
df = pd.read_table('letter-recognition.data', sep=',', header=None)
X_data = df.drop(0, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[0]
df = scaled
classes = df['class'].unique()

normal = ['T', 'I', 'D', 'N', 'G', 'S', 'B', 'A', 'J', 'M', 'X', 'O', 'R']
out = ['F', 'C', 'H', 'W', 'L', 'P', 'E', 'V', 'Y', 'Q', 'U', 'K', 'Z']
indice = []
outIndice = []
for letter in normal:
    indice += df['class'].index[df['class'] == letter].tolist()

for letter in out:
    outIndice += df['class'].index[df['class'] == letter].tolist()
indice = np.sort(indice)
outIndice = np.sort(outIndice)

scores_Letter_macro = []
scores_Letter_micro = []
scores_Letter_weighted = []
for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])
    scores_Letter_macro.append(mean_score_macro)
    scores_Letter_micro.append(mean_score_micro)
    scores_Letter_weighted.append(mean_score_weighted)

  """Entry point for launching an IPython kernel.


iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 2. Abalone

In [492]:
df = pd.read_csv('abalone.data', sep=',', header=None)
X_data = df.drop(0, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1), copy=False)
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[0]
df = scaled
classes = df['class'].unique()
scores_Abalone_macro = []
scores_Abalone_micro = []
scores_Abalone_weighted = []

for c in classes:
    for iter_ in range(10):
        print('iter : ',iter_,', target class = ', c)
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)

        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')
        
        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])
        
        scores_Abalone_macro.append(mean_score_macro)
        scores_Abalone_micro.append(mean_score_micro)
        scores_Abalone_weighted.append(mean_score_weighted)

iter :  0 , target class =  M
iter :  1 , target class =  M
iter :  2 , target class =  M
iter :  3 , target class =  M
iter :  4 , target class =  M
iter :  5 , target class =  M
iter :  6 , target class =  M
iter :  7 , target class =  M
iter :  8 , target class =  M
iter :  9 , target class =  M
iter :  0 , target class =  F
iter :  1 , target class =  F
iter :  2 , target class =  F
iter :  3 , target class =  F
iter :  4 , target class =  F
iter :  5 , target class =  F
iter :  6 , target class =  F
iter :  7 , target class =  F
iter :  8 , target class =  F
iter :  9 , target class =  F
iter :  0 , target class =  I
iter :  1 , target class =  I
iter :  2 , target class =  I
iter :  3 , target class =  I
iter :  4 , target class =  I
iter :  5 , target class =  I
iter :  6 , target class =  I
iter :  7 , target class =  I
iter :  8 , target class =  I
iter :  9 , target class =  I


## 3.Balance

In [494]:
df = pd.read_csv('balance-scale.data', sep=',', header=None)
X_data = df.drop(0, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[0]
df = scaled
classes = df['class'].unique()

scores_Balance_macro = []
scores_Balance_micro = []
scores_Balance_weighted = []

for c in classes:
    for iter_ in range(10):
        print('iter : ', iter_,', target class = ', c)
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)

        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        
        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')
        
        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])
        
        scores_Balance_macro.append(mean_score_macro)
        scores_Balance_micro.append(mean_score_micro)
        scores_Balance_weighted.append(mean_score_weighted)

iter :  0 , target class =  B
iter :  1 , target class =  B
iter :  2 , target class =  B
iter :  3 , target class =  B
iter :  4 , target class =  B
iter :  5 , target class =  B
iter :  6 , target class =  B
iter :  7 , target class =  B
iter :  8 , target class =  B
iter :  9 , target class =  B
iter :  0 , target class =  R
iter :  1 , target class =  R
iter :  2 , target class =  R
iter :  3 , target class =  R
iter :  4 , target class =  R
iter :  5 , target class =  R
iter :  6 , target class =  R
iter :  7 , target class =  R
iter :  8 , target class =  R
iter :  9 , target class =  R
iter :  0 , target class =  L
iter :  1 , target class =  L
iter :  2 , target class =  L
iter :  3 , target class =  L
iter :  4 , target class =  L
iter :  5 , target class =  L
iter :  6 , target class =  L
iter :  7 , target class =  L
iter :  8 , target class =  L
iter :  9 , target class =  L


# 4. Heart

In [496]:
df = pd.read_csv('heart.dat', sep = ' ',  header=None)
X_data = df.drop(13, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[13]
df = scaled
classes = df['class'].unique()
scores_Heart_macro = []
scores_Heart_micro = []
scores_Heart_weighted = []
for c in classes:
    for iter_ in range(10):
        print('iter : ', iter_,', target class = ', c)
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)
        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')
        
        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])

        scores_Heart_macro.append(mean_score_macro)
        scores_Heart_micro.append(mean_score_micro)
        scores_Heart_weighted.append(mean_score_weighted)

iter :  0 , target class =  2
iter :  1 , target class =  2
iter :  2 , target class =  2
iter :  3 , target class =  2
iter :  4 , target class =  2
iter :  5 , target class =  2
iter :  6 , target class =  2
iter :  7 , target class =  2
iter :  8 , target class =  2
iter :  9 , target class =  2
iter :  0 , target class =  1
iter :  1 , target class =  1
iter :  2 , target class =  1
iter :  3 , target class =  1
iter :  4 , target class =  1
iter :  5 , target class =  1
iter :  6 , target class =  1
iter :  7 , target class =  1
iter :  8 , target class =  1
iter :  9 , target class =  1


## 5. Australian 

In [498]:
df = pd.read_csv('australian.dat', sep = ' ',  header=None)
X_data = df.drop(14, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[14]
df = scaled
classes = df['class'].unique()
scores_Australian_macro = []
scores_Australian_micro = []
scores_Australian_weighted = []
for c in classes:
    for iter_ in range(10):
        print('iter : ', iter_,', target class = ', c)
        
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)
        
        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')


        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')


        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])
        scores_Australian_macro.append(mean_score_macro)
        scores_Australian_micro.append(mean_score_micro)
        scores_Australian_weighted.append(mean_score_weighted)

iter :  0 , target class =  0
iter :  1 , target class =  0
iter :  2 , target class =  0
iter :  3 , target class =  0
iter :  4 , target class =  0
iter :  5 , target class =  0
iter :  6 , target class =  0
iter :  7 , target class =  0
iter :  8 , target class =  0
iter :  9 , target class =  0
iter :  0 , target class =  1
iter :  1 , target class =  1
iter :  2 , target class =  1
iter :  3 , target class =  1
iter :  4 , target class =  1
iter :  5 , target class =  1
iter :  6 , target class =  1
iter :  7 , target class =  1
iter :  8 , target class =  1
iter :  9 , target class =  1


## 6.Glass

In [500]:
df = pd.read_csv('glass.data', sep = ',',  header=None)
df = df.drop(0, axis=1)
X_data = df.drop(10, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[10]
df = scaled
classes = df['class'].unique()
normal = [1,2,3]
out = [5,6,7]
indexes = []
outIndexes = []
for c in normal:
    indexes += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndexes += df['class'].index[df['class'] == c].tolist()

indexes = np.sort(indexes)
outIndexes = np.sort(outIndexes)
classes = [normal, out]

scores_Glass_macro = []
scores_Glass_micro = []
scores_Glass_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indexes]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndexes].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    target2 = df.iloc[outIndexes]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indexes].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])
    
    scores_Glass_macro.append(mean_score_macro)
    scores_Glass_micro.append(mean_score_micro)
    scores_Glass_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 7. Landsat

In [502]:
df = pd.read_csv('bmdatasets\sat.tst', sep = ' ', header=None)
X_data = df.drop(36, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[36]
df = scaled
classes = df['class'].unique()
normal = [1,2,3]
out = [4,5,7]
indice = []
outIndice = []
for c in normal:
    indice += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndice += df['class'].index[df['class'] == c].tolist()

indice = np.sort(indice)
outIndice = np.sort(outIndice)

scores_Landsat_macro = []
scores_Landsat_micro = []
scores_Landsat_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')


    
    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Landsat_macro.append(mean_score_macro)
    scores_Landsat_micro.append(mean_score_micro)
    scores_Landsat_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 8.Segment

In [507]:
df = pd.read_csv('bmdatasets/segment.dat', sep=' ', header=None)
X_data = df.drop(19, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[19]
df = scaled
classes = df['class'].unique()
normal = [1,2,3,4]
out = [5,6,7]
indice = []
outIndice = []
for c in normal:
    indice += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndice += df['class'].index[df['class'] == c].tolist()

indice = np.sort(indice)
outIndice = np.sort(outIndice)
classes = [normal, out]
scores_Segment_macro = []
scores_Segment_micro = []
scores_Segment_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    

    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    

    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Segment_macro.append(mean_score_macro)
    scores_Segment_micro.append(mean_score_micro)
    scores_Segment_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 9. Sonar

In [508]:
df = pd.read_csv('bmdatasets/sonar.all-data', header=None)
X_data = df.drop(60, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[60]
df = scaled
classes = df['class'].unique()
scores_Sonar_macro = []
scores_Sonar_micro = []
scores_Sonar_weighted = []
for c in classes:
    for iter_ in range(10):
        print('iter : ', iter_,', target class = ', c)
        
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)
        
        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        

        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        

        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])

        scores_Sonar_macro.append(mean_score_macro)
        scores_Sonar_micro.append(mean_score_micro)
        scores_Sonar_weighted.append(mean_score_weighted)

iter :  0 , target class =  R
iter :  1 , target class =  R
iter :  2 , target class =  R
iter :  3 , target class =  R
iter :  4 , target class =  R
iter :  5 , target class =  R
iter :  6 , target class =  R
iter :  7 , target class =  R
iter :  8 , target class =  R
iter :  9 , target class =  R
iter :  0 , target class =  M
iter :  1 , target class =  M
iter :  2 , target class =  M
iter :  3 , target class =  M
iter :  4 , target class =  M
iter :  5 , target class =  M
iter :  6 , target class =  M
iter :  7 , target class =  M
iter :  8 , target class =  M
iter :  9 , target class =  M


## 10. Vehicle

In [509]:
'''
files = ['xaa.dat', 'xab.dat','xac.dat','xad.dat','xae.dat','xaf.dat','xag.dat','xah.dat','xai.dat']
df = pd.DataFrame()
for file in files:
    print(file)
    d = pd.read_csv('bmdatasets/'+file, header=None, sep=' ', error_bad_lines=False)
    d = d.iloc[:, :19]
    df = pd.concat([df, d]).reset_index(drop=True)
'''

"\nfiles = ['xaa.dat', 'xab.dat','xac.dat','xad.dat','xae.dat','xaf.dat','xag.dat','xah.dat','xai.dat']\ndf = pd.DataFrame()\nfor file in files:\n    print(file)\n    d = pd.read_csv('bmdatasets/'+file, header=None, sep=' ', error_bad_lines=False)\n    d = d.iloc[:, :19]\n    df = pd.concat([df, d]).reset_index(drop=True)\n"

In [510]:
df = pd.read_csv('vehicle.csv')

In [511]:
X_data = df.drop('18', axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df['18']
df = scaled
classes = df['class'].unique()
normal = ['van', 'saab']
out = ['bus', 'opel']
indice = []
outIndice = []
for c in normal:
    indice += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndice += df['class'].index[df['class'] == c].tolist()

indice = np.sort(indice)
outIndice = np.sort(outIndice)

scores_Vehicle_macro = []
scores_Vehicle_micro = []
scores_Vehicle_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    
    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    

    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Vehicle_macro.append(mean_score_macro)
    scores_Vehicle_micro.append(mean_score_micro)
    scores_Vehicle_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 11.Waveform3

In [512]:
df = pd.read_csv('bmdatasets/waveform.data', header=None, sep=',')
X_data = df.drop(21, axis=1)
scaler = MinMaxScaler(feature_range =(-1,1), copy=False)
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df[21]
df = scaled
classes = df['class'].unique()
scores_Waveform3_macro = []
scores_Waveform3_micro = []
scores_Waveform3_weighted = []
for c in classes:
    for iter_ in range(10):
        print('iter : ',iter_,', target class = ', c, ', outlier class = ' ,np.delete(classes, np.argwhere(classes == c)))
        target1 = df[df['class']==c]
        target_train1 = target1.sample(frac = 0.7)
        target_val1 = target1.drop(target_train1.index)
        target_train1 = target_train1.reset_index(drop=True)
        target_val1 = target_val1.reset_index(drop=True)
        outliers1 = df[df['class']!=c].reset_index(drop=True)
        testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

        y_true1 =  [1 if  i ==c else -1 for i in testset1['class']]
        target_train1 = target_train1.drop('class', axis=1)
        testset1 = testset1.drop('class', axis=1)

        model1 = PP4.PseudoPoints(target_train1, 10)
        opt_comb1 = model1.search_optimal_hyperparameters()

        clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
        y_pred1 = clf1.predict(testset1)
        
        print('iter : ',iter_,', target class = ', np.delete(classes, np.argwhere(classes == c)), ', outlier class = ' , c)
        target2 = df[df['class']!=c]
        target_train2 = target2.sample(frac = 0.7)
        target_val2 = target2.drop(target_train2.index)
        target_train2 = target_train2.reset_index(drop=True)
        target_val2 = target_val2.reset_index(drop=True)
        outliers2 = df[df['class']==c].reset_index(drop=True)
        testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

        y_true2 =  [1 if i!=c else -1 for i in testset2['class']]
        target_train2 = target_train2.drop('class', axis=1)
        testset2 = testset2.drop('class', axis=1)

        model2 = PP4.PseudoPoints(target_train2, 10)
        opt_comb2 = model2.search_optimal_hyperparameters()

        clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
        y_pred2 = clf2.predict(testset2)
        

        score1_macro = f1_score(y_true1, y_pred1, average='macro')
        score1_micro = f1_score(y_true1, y_pred1, average='micro')
        score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

        score2_macro = f1_score(y_true2, y_pred2, average='macro')
        score2_micro = f1_score(y_true2, y_pred2, average='micro')
        score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

        mean_score_macro = np.mean([score1_macro, score2_macro])
        mean_score_micro = np.mean([score1_micro, score2_micro])
        mean_score_weighted = np.mean([score1_weighted, score2_weighted])

        scores_Waveform3_macro.append(mean_score_macro)
        scores_Waveform3_micro.append(mean_score_micro)
        scores_Waveform3_weighted.append(mean_score_weighted)

iter :  0 , target class =  2 , outlier class =  [1 0]
iter :  0 , target class =  [1 0] , outlier class =  2
iter :  1 , target class =  2 , outlier class =  [1 0]
iter :  1 , target class =  [1 0] , outlier class =  2
iter :  2 , target class =  2 , outlier class =  [1 0]
iter :  2 , target class =  [1 0] , outlier class =  2
iter :  3 , target class =  2 , outlier class =  [1 0]
iter :  3 , target class =  [1 0] , outlier class =  2
iter :  4 , target class =  2 , outlier class =  [1 0]
iter :  4 , target class =  [1 0] , outlier class =  2
iter :  5 , target class =  2 , outlier class =  [1 0]
iter :  5 , target class =  [1 0] , outlier class =  2
iter :  6 , target class =  2 , outlier class =  [1 0]
iter :  6 , target class =  [1 0] , outlier class =  2
iter :  7 , target class =  2 , outlier class =  [1 0]
iter :  7 , target class =  [1 0] , outlier class =  2
iter :  8 , target class =  2 , outlier class =  [1 0]
iter :  8 , target class =  [1 0] , outlier class =  2
iter :  9 

## 12.Winequality

In [513]:
df = pd.read_csv('bmdatasets/winequality-red.csv', sep=';') #red wine
X_data = df.drop('quality', axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df['quality']
df = scaled
classes = df['class'].unique()
normal = [3,4,5]
out = [6,7,8]
indice = []
outIndice = []

for c in normal:
    indice += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndice += df['class'].index[df['class'] == c].tolist()

indice = np.sort(indice)
outIndice = np.sort(outIndice)

scores_Winequality_macro = []
scores_Winequality_micro = []
scores_Winequality_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    

    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    

    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Winequality_macro.append(mean_score_macro)
    scores_Winequality_micro.append(mean_score_micro)
    scores_Winequality_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 13.SVMguide1

In [530]:
df = pd.read_csv('svmguide1.txt', header = None, sep = ' ')
classes = df[0]
df = df.drop(0, axis=1)
val = []
for j in range(df.shape[0]):
    for i in range(1,5):
        val.append(str(df[i][j]).split(':')[1])

val = np.array(val, dtype=float)
val = val.reshape(df.shape)
df = pd.DataFrame(val)
df['class'] = classes

In [545]:
X_data = df.drop('class', axis=1)
scaler = MinMaxScaler(feature_range =(-1,1), copy=False)
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df['class']
df = scaled
classes = df['class'].unique()
scores_SVMguide1_macro = []
scores_SVMguide1_micro = []
scores_SVMguide1_weighted = []


for iter_ in range(20):
    print('iter : ',iter_)
    target1 = df[df['class']==1]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df[df['class']!=1].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i ==1 else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    

    
    target2 = df[df['class']!=1]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df[df['class']==1].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i!=1 else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    

    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_SVMguide1_macro.append(mean_score_macro)
    scores_SVMguide1_micro.append(mean_score_micro)
    scores_SVMguide1_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9
iter :  10
iter :  11
iter :  12
iter :  13
iter :  14
iter :  15
iter :  16
iter :  17
iter :  18
iter :  19


## 14.Diabetes

In [516]:
df = pd.read_csv('bmdatasets/diabetes.txt', header = None, sep = ' ')
df = df.drop(1, axis=1)
df.columns = range(df.shape[1])
df = df.dropna().reset_index(drop=True)
classes = df[0]
df = df.drop(0, axis=1)
val = []
for j in range(df.shape[0]):
    for i in range(1,9):
        val.append(str(df[i][j]).split(':')[1])
        
val = np.array(val, dtype=float)
val = val.reshape(df.shape)
df = pd.DataFrame(val)
df['class'] = classes

X_data = df.drop('class', axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df['class']
df = scaled
classes = df['class'].unique()
scores_Diabetes_macro = []
scores_Diabetes_micro = []
scores_Diabetes_weighted = []

for iter_ in range(10):
    print('iter : ',iter_)
    target1 = df[df['class']==1]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df[df['class']!=1].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i ==1 else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
    

    
    target2 = df[df['class']!=1]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df[df['class']==1].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i!=1 else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    
    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Diabetes_macro.append(mean_score_macro)
    scores_Diabetes_micro.append(mean_score_micro)
    scores_Diabetes_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


## 15.vowel

In [517]:
df = pd.read_csv('bmdatasets/vowel.txt', sep = ' ', header = None)
df = df.dropna().reset_index(drop=True)
classes = df[0]
df = df.drop(0, axis=1)
val = []
for j in range(df.shape[0]):
    for i in range(1,11):
        val.append(str(df[i][j]).split(':')[1])
        
val = np.array(val, dtype=float)
val = val.reshape(df.shape)
df = pd.DataFrame(val)
df['class'] = classes
X_data = df.drop('class', axis=1)
scaler = MinMaxScaler(feature_range =(-1,1))
scaler.fit(X_data)
scaled = pd.DataFrame(scaler.transform(X_data))
scaled['class'] = df['class']
df = scaled

normal = [0,1,2,3,4,5]
out = [6,7,8,9,10]
indice = []
outIndice = []

for c in normal:
    indice += df['class'].index[df['class'] == c].tolist()

for c in out:
    outIndice += df['class'].index[df['class'] == c].tolist()

indice = np.sort(indice)
outIndice = np.sort(outIndice)

scores_Vowel_macro = []
scores_Vowel_micro = []
scores_Vowel_weighted = []

for iter_ in range(10):
    print('iter : ', iter_)

    target1 = df.iloc[indice]
    target_train1 = target1.sample(frac = 0.7)
    target_val1 = target1.drop(target_train1.index)
    target_train1 = target_train1.reset_index(drop=True)
    target_val1 = target_val1.reset_index(drop=True)
    outliers1 = df.iloc[outIndice].reset_index(drop=True)
    testset1 = pd.concat([target_val1, outliers1]).reset_index(drop=True)

    y_true1 =  [1 if  i in normal else -1 for i in testset1['class']]
    target_train1 = target_train1.drop('class', axis=1)
    testset1 = testset1.drop('class', axis=1)

    model1 = PP4.PseudoPoints(target_train1, 10)
    opt_comb1 = model1.search_optimal_hyperparameters()

    clf1 = OneClassSVM(nu = opt_comb1[0], gamma = opt_comb1[1]).fit(target_train1)
    y_pred1 = clf1.predict(testset1)
   

    target2 = df.iloc[outIndice]
    target_train2 = target2.sample(frac = 0.7)
    target_val2 = target2.drop(target_train2.index)
    target_train2 = target_train2.reset_index(drop=True)
    target_val2 = target_val2.reset_index(drop=True)
    outliers2 = df.iloc[indice].reset_index(drop=True)
    testset2 = pd.concat([target_val2, outliers2]).reset_index(drop=True)

    y_true2 =  [1 if i in out else -1 for i in testset2['class']]
    target_train2 = target_train2.drop('class', axis=1)
    testset2 = testset2.drop('class', axis=1)

    model2 = PP4.PseudoPoints(target_train2, 10)
    opt_comb2 = model2.search_optimal_hyperparameters()

    clf2 = OneClassSVM(nu = opt_comb2[0], gamma = opt_comb2[1]).fit(target_train2)
    y_pred2 = clf2.predict(testset2)
    

    score1_macro = f1_score(y_true1, y_pred1, average='macro')
    score1_micro = f1_score(y_true1, y_pred1, average='micro')
    score1_weighted = f1_score(y_true1, y_pred1, average='weighted')

    score2_macro = f1_score(y_true2, y_pred2, average='macro')
    score2_micro = f1_score(y_true2, y_pred2, average='micro')
    score2_weighted = f1_score(y_true2, y_pred2, average='weighted')

    mean_score_macro = np.mean([score1_macro, score2_macro])
    mean_score_micro = np.mean([score1_micro, score2_micro])
    mean_score_weighted = np.mean([score1_weighted, score2_weighted])

    scores_Vowel_macro.append(mean_score_macro)
    scores_Vowel_micro.append(mean_score_micro)
    scores_Vowel_weighted.append(mean_score_weighted)

iter :  0
iter :  1
iter :  2
iter :  3
iter :  4
iter :  5
iter :  6
iter :  7
iter :  8
iter :  9


In [547]:
f1_macro = {'Abalone':scores_Abalone_macro,'Australian':scores_Australian_macro, 'Balance' : scores_Balance_macro, 'Glass' : scores_Glass_macro, 'Heart' : scores_Heart_macro, 'Landsat' : scores_Landsat_macro, 
            'Letter':scores_Letter_macro, 'Segment':scores_Segment_macro, 'Sonar':scores_Sonar_macro, 'Vehicle':scores_Vehicle_macro, 'Waveform3':scores_Waveform3_macro, 'Winequality':scores_Winequality_macro,
           'SVMguide1':scores_SVMguide1_macro, 'Diabetes':scores_Diabetes_macro, 'Vowel':scores_Vowel_macro}
f1_macro_mean = {} 
for k in f1_macro.keys():
    scores = f1_macro[k]
    f1_macro_mean[k] = np.mean(scores)

In [548]:
f1_micro = {'Abalone':scores_Abalone_micro,'Australian':scores_Australian_micro, 'Balance' : scores_Balance_micro, 'Glass' : scores_Glass_micro, 'Heart' : scores_Heart_micro, 'Landsat' : scores_Landsat_micro, 
            'Letter':scores_Letter_micro, 'Segment':scores_Segment_micro, 'Sonar':scores_Sonar_micro, 'Vehicle':scores_Vehicle_micro, 'Waveform3':scores_Waveform3_micro, 'Winequality':scores_Winequality_micro,
           'SVMguide1':scores_SVMguide1_micro, 'Diabetes':scores_Diabetes_micro, 'Vowel':scores_Vowel_micro}
f1_micro_mean = {} 
for k in f1_micro.keys():
    scores = f1_micro[k]
    f1_micro_mean[k] = np.mean(scores)

In [549]:
f1_weighted = {'Abalone':scores_Abalone_weighted,'Australian':scores_Australian_weighted, 'Balance' : scores_Balance_weighted, 'Glass' : scores_Glass_weighted, 'Heart' : scores_Heart_weighted, 'Landsat' : scores_Landsat_weighted, 
            'Letter':scores_Letter_weighted, 'Segment':scores_Segment_weighted, 'Sonar':scores_Sonar_weighted, 'Vehicle':scores_Vehicle_weighted, 'Waveform3':scores_Waveform3_weighted, 'Winequality':scores_Winequality_weighted,
           'SVMguide1':scores_SVMguide1_weighted, 'Diabetes':scores_Diabetes_weighted, 'Vowel':scores_Vowel_weighted}
f1_weighted_mean = {} 
for k in f1_weighted.keys():
    scores = f1_weighted[k]
    f1_weighted_mean[k] = np.mean(scores)

In [550]:
f1_macro_mean

{'Abalone': 0.42956020115846005,
 'Australian': 0.5825646256076714,
 'Balance': 0.7260307428564896,
 'Glass': 0.6275776284712566,
 'Heart': 0.6076398701776257,
 'Landsat': 0.8098854323380793,
 'Letter': 0.6971528174420796,
 'Segment': 0.7750995541796865,
 'Sonar': 0.5831122286663195,
 'Vehicle': 0.5666267932696067,
 'Waveform3': 0.6466377250450311,
 'Winequality': 0.4482553991235784,
 'SVMguide1': 0.8469852112809765,
 'Diabetes': 0.4591368007167511,
 'Vowel': 0.7748490752301298}

In [529]:
f1_weighted_mean

{'Abalone': 0.45779301761920466,
 'Australian': 0.6356974049801933,
 'Balance': 0.8232677255129409,
 'Glass': 0.7098045426510994,
 'Heart': 0.6726825939068124,
 'Landsat': 0.8631813974769649,
 'Letter': 0.8091572815277898,
 'Segment': 0.8414636374035631,
 'Sonar': 0.6589960432953669,
 'Vehicle': 0.632105805341449,
 'Waveform3': 0.7124674083496176,
 'Winequality': 0.47447219734854695,
 'SVMguide1': 0.8701342885544914,
 'Diabetes': 0.503568297153782,
 'Vowel': 0.8358397024846921}

In [565]:
f1_micro_mean

{'Abalone': 0.46547940377568325,
 'Australian': 0.6168855076078822,
 'Balance': 0.8124085230170507,
 'Glass': 0.6857977528089887,
 'Heart': 0.6454594330400784,
 'Landsat': 0.8599991344306301,
 'Letter': 0.8399547478186579,
 'Segment': 0.8452741702741703,
 'Sonar': 0.6395467032967034,
 'Vehicle': 0.601234332983435,
 'Waveform3': 0.6989853949735807,
 'Winequality': 0.47203867561010415,
 'SVMguide1': 0.8969848825904772,
 'Diabetes': 0.48454957927734704,
 'Vowel': 0.830586162440911}

In [568]:
print('f1_micro : ',f1_micro ,' f1_macro : ', f1_macro, 'f1_weighted : ', f1_weighted)

f1_micro :  {'Abalone': [0.3526222362374215, 0.36822411338123784, 0.34282987009046406, 0.35504618526951137, 0.38034995478389444, 0.364044030940646, 0.3519765194918339, 0.3781532986004552, 0.3691433989958658, 0.3703221905571702, 0.4002636589057491, 0.3829118882083977, 0.38930341786557043, 0.38805172386487813, 0.40325050169569754, 0.39756640822079586, 0.37755415146119575, 0.3904452422053344, 0.3972697459960498, 0.39787721068504367, 0.6410524305675844, 0.6416120752565789, 0.6608447564785139, 0.6372956712455009, 0.5457869345074887, 0.6626687418866307, 0.6362945353385906, 0.6612924440644006, 0.6741784953922962, 0.6461502810757007], 'Australian': [0.5662060364180594, 0.5410800698428535, 0.6406784734347717, 0.5084435021202295, 0.7448216512846096, 0.7588924918932403, 0.5659416313295086, 0.5100249438762784, 0.5022200049887753, 0.6526490396607634, 0.5814816662509354, 0.6577650286854577, 0.6168645547518083, 0.7292866051384386, 0.5749064604639561, 0.5676702419555999, 0.6767223746570217, 0.59435769