In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.cross_validation import train_test_split

In [7]:
# csv file local path
trainDatapath=r'C:\Users\Yanbo Huang\Desktop\Python exercises\SVM_Example_2.csv'
testDatapath=r'C:\Users\Yanbo Huang\Desktop\Python exercises\SVM_Example_2_Test_data.csv'

In [62]:
# read data
trainData=pd.read_csv(trainDatapath)
testData=pd.read_csv(testDatapath)

In [66]:
# scatter plot
plt.figure(1)
plt.scatter(trainData[trainData['label ']==0].x,trainData[trainData['label ']==0].y,color='green',label='Class 0')
plt.scatter(trainData[trainData['label ']==1].x,trainData[trainData['label ']==1].y,color='blue',label='Class 1')
plt.title('training data',fontsize=20)
plt.xlabel('X',fontsize=18)
plt.ylabel('Y',fontsize=18)
plt.grid(True)
plt.legend(loc='upper left')

plt.figure(2)
plt.scatter(testData[trainData['label ']==0].x,testData[trainData['label ']==0].y,color='green',label='Class 0')
plt.scatter(testData[trainData['label ']==1].x,testData[trainData['label ']==1].y,color='blue',label='Class 1')
plt.title('testing data',fontsize=20)
plt.xlabel('X',fontsize=18)
plt.ylabel('Y',fontsize=18)
plt.grid(True)
plt.legend(loc='upper left')
plt.show()

In [67]:
# exact features and labels 
features_train=trainData[['x','y']].values
label_train=trainData['label '].values

features_test=testData[['x','y']].values
label_test=testData['label '].values

In [68]:
#Gaussian kernel
sigmas=[0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100]
Cs=[0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100.0,100.0]
gammas = map(lambda x: 1.0/(2.0*x**2), sigmas)

FCR=[]  # false classified ratio
# Gaussian kernal model
for p in Cs:
    for g in gammas:
        svc=svm.SVC(kernel='rbf',C=p,gamma=g).fit(features_train,label_train)
        label_predict=svc.predict(features_test)
        falseRatio=float(sum((label_test-label_predict)**2))/len(label_test)
        FCR.append(falseRatio)
        
FCR_min = reduce(lambda a,b: a if (a < b) else b, FCR)
FCR=np.array(FCR).reshape(len(Cs),len(sigmas))
print 'minimum false classified ratio in test dataset:{0:.4}% '.format(FCR.min()*100)
svm_C=Cs[np.array(np.where(FCR==FCR_min)[0,0]]
svm_gamma=gammas[np.array(np.where(FCR==FCR_min))[1,0]]
# FCR table with index in C and sigma
df = pd.DataFrame(FCR, index=Cs, columns=sigmas)
df.columns.name='C,S->'

minimum false classified ratio in test dataset:19.16% 


In [69]:
df

"C,S->",0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100.0
0.001,0.5,0.5,0.498004,0.499002,0.499002,0.49501,0.49501,0.492016,0.457086,0.191617
0.01,0.5,0.5,0.498004,0.499002,0.499002,0.49501,0.49501,0.492016,0.469062,0.232535
0.1,0.5,0.5,0.498004,0.499002,0.498004,0.494012,0.489022,0.486028,0.457086,0.232535
0.2,0.5,0.5,0.499002,0.496008,0.49501,0.493014,0.49002,0.486028,0.457086,0.225549
0.5,0.5,0.5,0.498004,0.498004,0.494012,0.49501,0.49002,0.487026,0.46008,0.267465
1.0,0.5,0.5,0.498004,0.497006,0.494012,0.494012,0.49002,0.486028,0.457086,0.263473
2.0,0.5,0.5,0.498004,0.497006,0.494012,0.494012,0.489022,0.486028,0.457086,0.235529
3.0,0.5,0.5,0.498004,0.497006,0.494012,0.494012,0.489022,0.486028,0.458084,0.260479
10.0,0.5,0.5,0.498004,0.497006,0.494012,0.493014,0.486028,0.483034,0.46008,0.221557
100.0,0.5,0.5,0.498004,0.497006,0.494012,0.493014,0.491018,0.486028,0.458084,0.224551


In [72]:
# polynomial kernal
degrees=[2,3,4,5]
Cs=[0.001,0.01,0.1,0.2,0.5,1.0,2.0,3.0,10.0,100.0,100.0]

FCR=[]  # false classified ratio
# Gaussian kernal model
for c in Cs:
    for d in degrees:
        svc=svm.SVC(kernel='poly',degree=d,C=c).fit(features_train,label_train)
        label_predict=svc.predict(features_test)
        falseRatio=float(sum((label_test-label_predict)**2))/len(label_test)
        FCR.append(falseRatio)
FCR=np.array(FCR).reshape(len(Cs),len(degrees))
print 'minimum false classified ratio in test dataset:{0:.4}% '.format(FCR.min()*100)
svm_C=Cs[np.array(np.where(FCR==FCR.min()))[0,0]]
svm_degree=degrees[np.array(np.where(FCR==FCR.min()))[1,0]]

df = pd.DataFrame(FCR, index=Cs, columns=degrees)
df.columns.name='C,d->'

minimum false classified ratio in test dataset:0.0% 


In [73]:
df

"C,d->",2,3,4,5
0.001,0.429142,0,0.239521,0
0.01,0.0,0,0.0,0
0.1,0.46507,0,0.273453,0
0.2,0.366267,0,0.49501,0
0.5,0.358283,0,0.322355,0
1.0,0.406188,0,0.095808,0
2.0,0.222555,0,0.235529,0
3.0,0.270459,0,0.214571,0
10.0,0.49501,0,0.361277,0
100.0,0.49501,0,0.0,0
