In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read Data and Pruning

In [2]:
def prune_columns(dfm:pd.DataFrame, cols:list):
    return dfm.drop(cols, axis=1)

In [3]:
def read_data(ddir:str):
    dataframe = pd.read_csv(ddir)
    pruned_dataframe = prune_columns(dataframe, ['ImagesName'])
    return pruned_dataframe

# Split and Scale Data (Standard Scaler)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
def split_data(dataframe:pd.DataFrame):
    X = dataframe.drop(['Labels'], axis=1)
    y = dataframe['Labels']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

# Build Model and Evaluate

In [6]:
def evaluate(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f = f1_score(y_true, y_pred)
    
    return acc, prec, rec, f

In [7]:
def build_model(X_train, y_train, X_test, \
                y_test, kernel_list=['linear', 'poly', 'rbf']):
    recorded_results = []
    for k in kernel_list:
        print(f'Processing {k} kernel...')
        scores = {}
        clf = SVC(kernel=k)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        test_accuracy, test_precision, test_recall, test_f1 = evaluate(y_test, y_pred)
        # recorded_result[k] = [test_accuracy, test_precision, test_recall, test_f1]
        scores['kernel'] = k
        scores['test_accuracy'] = test_accuracy
        scores['test_precision'] = test_precision
        scores['test_recall'] = test_recall
        scores['test_f1'] = test_f1
        recorded_results.append(scores)
    print('done!')
        
    return pd.DataFrame.from_dict(recorded_results)

# Main

## Read

In [8]:
features_data_dir = r'/Users/teguhsatya/Dev/segeralulus/exportedDataframe/a_features.csv'

In [9]:
df = read_data(features_data_dir)

In [10]:
df.sample(5)

Unnamed: 0,CDR,VCDR,RDR,I,S,N,T,Labels
14,0.444737,0.531447,0.139474,118,155,138,148,0
78,0.337621,0.373665,0.266881,107,113,123,115,0
118,0.529745,0.510929,0.186969,121,147,114,115,1
54,0.343537,0.338926,0.221088,131,125,103,132,0
66,0.441667,0.353333,0.177778,183,175,101,154,0


In [11]:
X_train_scaled, X_test_scaled, y_train, y_test = split_data(df)

In [12]:
kernel_list = ['linear', 'poly', 'rbf']

In [13]:
reports = build_model(X_train_scaled, y_train, X_test_scaled, y_test, kernel_list)

Processing linear kernel...
Processing poly kernel...
Processing rbf kernel...
done!


In [14]:
reports

Unnamed: 0,kernel,test_accuracy,test_precision,test_recall,test_f1
0,linear,0.705882,0.611111,0.785714,0.6875
1,poly,0.529412,0.461538,0.857143,0.6
2,rbf,0.823529,0.722222,0.928571,0.8125


In [16]:
reports.to_csv('SVMResult/svmresult.csv', index=False)