In [31]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn import preprocessing as prep
from sklearn.multiclass import OneVsRestClassifier as OVR

In [32]:
# hyper-parameter
my_C = 1

In [33]:
def transform_y(y_t, class_num, flag=0):
        # print(class_num)
        y = np.array(y_t)
        if flag == 0:
        # the target class we want to distinguish is set to -1 
            y[y == class_num] = -1
            y[y != -1] = 1
            return y
        else:
            y[y == class_num] = 10
            y[y != 10] = 0
            y[y == 10] = 1
            return y

In [34]:
# ------------- data preparation -------------
df_train = pd.read_table("train.txt")
df_test = pd.read_table("test.txt")

data_train = df_train.to_numpy()
data_test = df_test.to_numpy()

x_train = np.delete(data_train, [0], axis=1)
y_train = data_train[:, 0]

x_test = np.delete(data_test, [0], axis=1)
y_test = data_test[:, 0]

# scaling for x train dataset to standard distribution
scaler_1 = prep.StandardScaler().fit(x_train)
x_tr_scaled = scaler_1.transform(x_train)
scaler_2 = prep.StandardScaler().fit(x_test)
x_ts_scaled = scaler_2.transform(x_test)


# st for Setosa; vs for Versicolour; vg for Virginica
y_train_st = transform_y(y_train, 0)
y_train_vs = transform_y(y_train, 1)
y_train_vg = transform_y(y_train, 2)

sig_y_train_st = transform_y(y_train, 0, flag=1)
sig_y_train_vs = transform_y(y_train, 1, flag=1)
sig_y_train_vg = transform_y(y_train, 2, flag=1)


y_class = [y_train_st, y_train_vs, y_train_vg]
sig_y_class = [sig_y_train_st, sig_y_train_vs, sig_y_train_vg]

In [35]:
# ---------------- poly ----------------
namelist = ["SVM_poly2.txt", "SVM_poly3.txt"]
for i in range(2):
    with open(namelist[i], 'w') as f:
        poly_clf = OVR(SVC(C=my_C, kernel="poly", degree=i+2)).fit(x_train, y_train)
        poly_tr_err = 1 - poly_clf.score(x_train, y_train)
        poly_ts_err = 1 - poly_clf.score(x_test, y_test)
        print("Poly with degree", i+2)
        print("training error:", poly_tr_err)
        print("testing error:", poly_ts_err)
        
        

        f.write(str(poly_tr_err))
        f.write("\n")
        f.write(str(poly_ts_err))
        f.write("\n")

        for j in y_class:
            poly_sub = SVC(C=my_C, kernel="poly", degree=i+2).fit(x_train, j)
            b = poly_sub.intercept_[0]
            svc_num = poly_sub.n_support_[0]
            svid = poly_sub.support_[:]

            f.write(str(b))
            f.write("\n")
            id = ', '.join([str(k) for k in svid])
            f.write(id)
            f.write("\n")

f.close()

Poly with degree 2
training error: 0.033333333333333326
testing error: 0.033333333333333326
Poly with degree 3
training error: 0.025000000000000022
testing error: 0.0


In [36]:
# ---------------- RBF ----------------

with open("SVM_rbf.txt", 'w') as f:

    rbf_clf = OVR(SVC(C=my_C, kernel="rbf", gamma=0.5)).fit(x_train, y_train)
    rbf_tr_err = 1 - rbf_clf.score(x_train, y_train)
    rbf_ts_err = 1 - rbf_clf.score(x_test, y_test)
    
    print("RBF Kernel")
    print("training error:", rbf_tr_err)
    print("testing error:", rbf_ts_err)

    f.write(str(rbf_tr_err))
    f.write("\n")
    f.write(str(rbf_ts_err))
    f.write("\n")

    for rbf_y in y_class:
        rbf_sub = SVC(C=my_C, kernel="rbf", gamma=0.5).fit(x_train, rbf_y)
        b = rbf_sub.intercept_[0]
        svc_num = rbf_sub.n_support_[0]
        svid = rbf_sub.support_[:]

        f.write(str(b))
        f.write("\n")
        id = ', '.join([str(k) for k in svid])
        f.write(id)
        f.write("\n")

f.close()

RBF Kernel
training error: 0.033333333333333326
testing error: 0.033333333333333326


In [37]:
# ---------------- sigmoid ----------------

with open("SVM_sigmoid.txt", 'w') as f:

    model = SVC(C=my_C, kernel="sigmoid", gamma = 'auto')
    sig_clf = OVR(model).fit(x_tr_scaled, y_train)

    sig_tr_err = 1 - sig_clf.score(x_tr_scaled, y_train)
    sig_ts_err = 1 - sig_clf.score(x_ts_scaled, y_test)
    
    print("Sigmoid Kernel")
    print("training error:", sig_tr_err)
    print("testing error:", sig_ts_err)

    f.write(str(sig_tr_err))
    f.write("\n")
    f.write(str(sig_ts_err))
    f.write("\n")


    for sig_y in sig_y_class:
        # print(sig_y)
        # print("iter: ", iter)
        sig_sub = SVC(C=my_C, kernel="sigmoid", gamma= 'auto').fit(x_tr_scaled, sig_y)
        b = sig_sub.intercept_[0]
        svc_num = sig_sub.n_support_[0]
        svid = sig_sub.support_[:]

        f.write(str(b))
        f.write("\n")
        id = ', '.join([str(k) for k in svid])
        f.write(id)
        f.write("\n")


f.close()


Sigmoid Kernel
training error: 0.1166666666666667
testing error: 0.2666666666666667
