## Simple Classification Using SVM

In [None]:
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt

#Flags dataset used: https://archive.ics.uci.edu/ml/datasets/Flags
flags_raw = pd.read_csv("FlagsDataset.txt")

#Converting the string object columns to categorical integers as needed for SVD solvers
#The Python SVD solvers take a numerical matrix as input so string format is not allowed
flags_raw['NameOfCountry'] = flags_raw['NameOfCountry'].astype('category')
flags_raw['NameOfCountry'] = flags_raw['NameOfCountry'].cat.codes
flags_raw['Topleft'] = flags_raw['Topleft'].astype('category')
flags_raw['Topleft'] = flags_raw['Topleft'].cat.codes
flags_raw['Botright'] = flags_raw['Botright'].astype('category')
flags_raw['Botright'] = flags_raw['Botright'].cat.codes
flags_raw['Mainhue'] = flags_raw['Mainhue'].astype('category')
flags_raw['Mainhue'] = flags_raw['Mainhue'].cat.codes

#Using holdout set cross-validation method here
#Randomize data
flags_raw = flags_raw.sample(frac=1)
flags_training = flags_raw.iloc[0:int(flags_raw.shape[0]/2),]
flags_test = flags_raw.iloc[int(flags_raw.shape[0]/2):,]

In [None]:
#Predicting which flag has the color red as part of the design

#The training features are all the features except the Red feature (that's the one we want to predict)
features_train = flags_training[['NameOfCountry', 'Landmass', 'Zone', 'Area', 'Population', \
                                 'Language', 'Religion', 'Bars', 'Stripes', 'Colours', 'Green',\
                                 'Blue', 'Gold', 'White', 'Black', 'Orange', 'Mainhue', 'Circles',\
                                 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crescent', 'Triangle',\
                                 'Icon', 'Animate', 'Text', 'Topleft', 'Botright']]

#Training on the Text feature
y_train = flags_training["Red"].tolist()

#Creating and fitting the SVM model using the training x/y data
SVMModel = svm.SVC(kernel="linear") 
SVMModel.fit(features_train, y_train)

#Predicting how well we did on the test data
features_test = flags_training[['NameOfCountry', 'Landmass', 'Zone', 'Area', 'Population', \
                                 'Language', 'Religion', 'Bars', 'Stripes', 'Colours', 'Green',\
                                 'Blue', 'Gold', 'White', 'Black', 'Orange', 'Mainhue', 'Circles',\
                                 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crescent', 'Triangle',\
                                 'Icon', 'Animate', 'Text', 'Topleft', 'Botright']]
y_true = flags_test["Red"].tolist()

y_predict = SVMModel.predict(features_test)
print(y_predict)
print(y_true)
print("Accuracy: ", 100*sum(y_true == y_predict)/len(y_predict), "%")


## Multi-Class Classification Using SVM

In [None]:
#Predicting which landmass the flag is contained in (i.e. 1=N.America, 2=S.America, 3=Europe, 4=Africa, 4=Asia, 6=Oceania)

#The training features are all the features except the Landmass feature (that's the one we want to predict)
features_train = flags_training[['NameOfCountry', 'Zone', 'Area', 'Population', \
                                 'Language', 'Religion', 'Bars', 'Stripes', 'Colours', 'Red', 'Green',\
                                 'Blue', 'Gold', 'White', 'Black', 'Orange', 'Mainhue', 'Circles',\
                                 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crescent', 'Triangle',\
                                 'Icon', 'Animate', 'Text', 'Topleft', 'Botright']]

#Training on the Landmass feature
y_train = flags_training["Landmass"].tolist()

#Creating and fitting the SVM model using the training x/y data
#SVMModel = svm.SVC() #default rbf - Radial Basis Function Kernel, but we can change to linear, polynomial hyperplane, and more
SVMModel = svm.SVC(C=1)
SVMModel.fit(features_train, y_train)

#Predicting how well we did on the test data
features_test = flags_training[['NameOfCountry', 'Zone', 'Area', 'Population', \
                                 'Language', 'Religion', 'Bars', 'Stripes', 'Colours', 'Red', 'Green',\
                                 'Blue', 'Gold', 'White', 'Black', 'Orange', 'Mainhue', 'Circles',\
                                 'Crosses', 'Saltires', 'Quarters', 'Sunstars', 'Crescent', 'Triangle',\
                                 'Icon', 'Animate', 'Text', 'Topleft', 'Botright']]

y_true_test = flags_test["Landmass"].tolist()
y_predict_test = SVMModel.predict(features_test)

y_true_training = y_train
y_predict_training = SVMModel.predict(features_test)

print("Training Accuracy: ", 100*sum(y_true_training == y_predict_training)/len(y_predict_training), "%")
print("Test Accuracy: ", 100*sum(y_true_test == y_predict_test)/len(y_predict_test), "%")

## Intuitive SVM Visualization on the Iris Dataset

In [None]:
#Showcasing different SVM kernels and regularizers
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets

# import some data to play with
iris = datasets.load_iris()
iris_visualize = pd.DataFrame(iris.data, columns=iris.feature_names)
print(iris_visualize.head())

X = iris.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
y = iris.target

h = .02  # step size in the mesh

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
C = 0.1  # SVM regularization parameter
svc = svm.SVC(kernel='linear', C=C).fit(X, y)
rbf_svc = svm.SVC(kernel='rbf', gamma=20, C=C).fit(X, y)
poly_svc = svm.SVC(kernel='poly', degree=5, C=C).fit(X, y)
lin_svc = svm.LinearSVC(C=C).fit(X, y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

# title for the plots
titles = ['SVC with linear kernel',
          'LinearSVC (linear kernel)',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel']


for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    plt.subplot(2, 2, i + 1)
    plt.subplots_adjust(wspace=0.4, hspace=0.4)

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title(titles[i])

plt.show()

#Reference, this slab of code obtained from: http://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py