# OneClassSVM
https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read and standarize the dataset
df = pd.read_csv('data/selected_data1.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
df = pd.DataFrame(StandardScaler().fit_transform(X), columns=X.columns.values)
df['target'] = y

# Divide dataframe depending on target
df_correct = df.loc[df['target'] == 1]
df_incorrect = df.loc[df['target'] == 0]

# Print each target shape
print('Target 1 shape: ' + str(df_correct.shape))
print('Target 0 shape: ' + str(df_incorrect.shape))

Target 1 shape: (5049, 25)
Target 0 shape: (4951, 25)


In [2]:
from sklearn.model_selection import train_test_split

df_train, df_test_correct = train_test_split(df_correct, test_size=0.2, random_state=0)
df_test_incorrect = df_incorrect

X_train = df_train.iloc[:,:-1]
X_test_correct = df_test_correct.iloc[:,:-1]
X_test_incorrect = df_test_incorrect.iloc[:,:-1]

In [3]:
# -1 = outlier
#  1 = inlier
from sklearn.metrics import f1_score

def get_f1_score(y_pred):
    y_true = y_test.copy()
    y_true[y_true == 0] = -1
    print()
    print("f1_score: ", f1_score(y_true, y_pred))    

### rbf

In [8]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='rbf', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)
pred_train = rbf.predict(X_train)

print("PRED TRAIN", collections.Counter(pred_train))
print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED TRAIN Counter({1: 2023, -1: 2016})
PRED CORRECT Counter({-1: 525, 1: 485})
PRED INCORRECT  Counter({-1: 4148, 1: 803})


### poly

In [9]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='poly', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)
pred_train = rbf.predict(X_train)

print("PRED TRAIN", collections.Counter(pred_train))
print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED TRAIN Counter({-1: 2033, 1: 2006})
PRED CORRECT Counter({-1: 538, 1: 472})
PRED INCORRECT  Counter({-1: 3671, 1: 1280})


### linear

In [10]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='linear', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)
pred_train = rbf.predict(X_train)

print("PRED TRAIN", collections.Counter(pred_train))
print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED TRAIN Counter({-1: 2020, 1: 2019})
PRED CORRECT Counter({-1: 517, 1: 493})
PRED INCORRECT  Counter({-1: 4194, 1: 757})


### sigmoid

In [11]:
import collections
from sklearn.svm import OneClassSVM

rbf = OneClassSVM(kernel='sigmoid', gamma='scale').fit(X_train)
pred_correct = rbf.predict(X_test_correct)
pred_incorrect = rbf.predict(X_test_incorrect)
pred_train = rbf.predict(X_train)

print("PRED TRAIN", collections.Counter(pred_train))
print("PRED CORRECT", collections.Counter(pred_correct))
print("PRED INCORRECT ", collections.Counter(pred_incorrect))
#get_f1_score(pred_test)

PRED TRAIN Counter({1: 2021, -1: 2018})
PRED CORRECT Counter({-1: 517, 1: 493})
PRED INCORRECT  Counter({-1: 4156, 1: 795})
