# OneClassSVM
https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read and standarize the dataset
df = pd.read_csv('data/selected_data1.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
df = pd.DataFrame(StandardScaler().fit_transform(X), columns=X.columns.values)
df['target'] = y

# Divide dataframe depending on target
df_correct = df.loc[df['target'] == 1]
df_incorrect = df.loc[df['target'] == 0]

# Print each target shape
print('Target 1 shape: ' + str(df_correct.shape))
print('Target 0 shape: ' + str(df_incorrect.shape))

Target 1 shape: (5049, 25)
Target 0 shape: (4951, 25)


In [2]:
from sklearn.model_selection import train_test_split

df_train, df_test_correct = train_test_split(df_correct, test_size=0.2, random_state=0)
df_test_incorrect = df_incorrect

X_train = df_train.iloc[:,:-1]
X_test_correct = df_test_correct.iloc[:,:-1]
X_test_incorrect = df_test_incorrect.iloc[:,:-1]

In [3]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.97).fit(X_train)
X_train = pca.transform(X_train)
X_test_correct = pca.transform(X_test_correct)
X_test_incorrect = pca.transform(X_test_incorrect)


**Relevant parameters for tunning**

- **nu**: the proportion of outliers you expect to observe

In [4]:
# -1 = outlier
#  1 = inlier
def MyOneClassSVM(kernel='rbf'):
    import collections
    from sklearn.svm import OneClassSVM
    
    model = OneClassSVM(kernel=kernel, nu=0.2).fit(X_train)
    pred_correct = model.predict(X_test_correct)
    pred_incorrect = model.predict(X_test_incorrect)
    pred_train = model.predict(X_train)

    print("PRED TRAIN", collections.Counter(pred_train))
    print("PRED CORRECT", collections.Counter(pred_correct))
    print("PRED INCORRECT", collections.Counter(pred_incorrect))

### rbf

In [5]:
MyOneClassSVM(kernel='rbf')

PRED TRAIN Counter({1: 3229, -1: 810})
PRED CORRECT Counter({1: 802, -1: 208})
PRED INCORRECT Counter({-1: 3122, 1: 1829})


### poly

In [6]:
MyOneClassSVM(kernel='poly')

PRED TRAIN Counter({1: 3204, -1: 835})
PRED CORRECT Counter({1: 782, -1: 228})
PRED INCORRECT Counter({1: 3378, -1: 1573})


### linear

In [7]:
MyOneClassSVM(kernel='linear')

PRED TRAIN Counter({-1: 3499, 1: 540})
PRED CORRECT Counter({-1: 873, 1: 137})
PRED INCORRECT Counter({-1: 3613, 1: 1338})


### sigmoid

In [8]:
MyOneClassSVM(kernel='sigmoid')

PRED TRAIN Counter({1: 3231, -1: 808})
PRED CORRECT Counter({1: 822, -1: 188})
PRED INCORRECT Counter({1: 4145, -1: 806})
