# OneClassSVM
https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read and standarize the dataset
df = pd.read_csv('data/selected_data.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
df = pd.DataFrame(StandardScaler().fit_transform(X), columns=X.columns.values)
df['target'] = y

# Divide dataframe depending on target
df_correct = df.loc[df['target'] == 1]
df_incorrect = df.loc[df['target'] == 0]

# Print each target shape
print('Target 1 shape: ' + str(df_correct.shape))
print('Target 0 shape: ' + str(df_incorrect.shape))

Target 1 shape: (5055, 39)
Target 0 shape: (4959, 39)


In [2]:
from sklearn.model_selection import train_test_split

df_train, df_test_correct = train_test_split(df_correct, test_size=0.2, random_state=0)
df_test_incorrect = df_incorrect

X_train = df_train.iloc[:,:-1]
X_test_correct = df_test_correct.iloc[:,:-1]
X_test_incorrect = df_test_incorrect.iloc[:,:-1]

In [3]:
# -1 = outlier
#  1 = inlier
from sklearn.metrics import f1_score

def get_f1_score(y_pred):
    y_true = y_test.copy()
    y_true[y_true == 0] = -1
    print()
    print("f1_score: ", f1_score(y_true, y_pred))    

### rbf

In [4]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='rbf', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)

print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED CORRECT Counter({-1: 537, 1: 474})
PRED INCORRECT  Counter({-1: 3832, 1: 1127})


### poly

In [5]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='poly', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)

print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED CORRECT Counter({-1: 516, 1: 495})
PRED INCORRECT  Counter({-1: 3267, 1: 1692})


### linear

In [6]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='linear', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)

print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED CORRECT Counter({1: 506, -1: 505})
PRED INCORRECT  Counter({-1: 4261, 1: 698})


### sigmoid

In [7]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='sigmoid', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)

print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED CORRECT Counter({-1: 513, 1: 498})
PRED INCORRECT  Counter({-1: 4067, 1: 892})
