In [1]:
import pandas as pd
import numpy as np
from random import randint

# Data Import, Split, and Preparation

* First we need to import our data, then split it so we can do the required binary classifications.
* We used the data named 'bezdekIris.data' because this is the corrected data based on the comments on the site.

In [2]:
dataset = pd.read_csv('bezdekIris.data',delim_whitespace=False,header=None,names=['1','2','3','4','name'])
dataset.head(10)

Unnamed: 0,1,2,3,4,name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


* We use the pandas command groupby in order to access the data of each class separately.

In [3]:
grouped_data = dataset.groupby(['name'])

In [4]:
grouped_data.get_group('Iris-setosa')

Unnamed: 0,1,2,3,4,name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


* We save the names of the classes in the list named class_names.

In [5]:
class_names = list(grouped_data.groups.keys())
class_names

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

* We create individual matrices for each class.
* These are saved in the list named split_data_matrices.
* The order of the arrays is the same as the order of class_names. This means that the arrays are indexed as follows:
    * split_data_matrices[0] ==> Iris-setosa
    * split_data_matrices[1] ==> Iris-versicolor
    * split_data_matrices[2] ==> Iris-virginica

In [6]:
split_data_matrices = []
for class_name in class_names:
    split_data_matrices.append(grouped_data.get_group(class_name).drop(['name'],axis=1).as_matrix())

In [7]:
split_data_matrices

[array([[ 5.1,  3.5,  1.4,  0.2],
        [ 4.9,  3. ,  1.4,  0.2],
        [ 4.7,  3.2,  1.3,  0.2],
        [ 4.6,  3.1,  1.5,  0.2],
        [ 5. ,  3.6,  1.4,  0.2],
        [ 5.4,  3.9,  1.7,  0.4],
        [ 4.6,  3.4,  1.4,  0.3],
        [ 5. ,  3.4,  1.5,  0.2],
        [ 4.4,  2.9,  1.4,  0.2],
        [ 4.9,  3.1,  1.5,  0.1],
        [ 5.4,  3.7,  1.5,  0.2],
        [ 4.8,  3.4,  1.6,  0.2],
        [ 4.8,  3. ,  1.4,  0.1],
        [ 4.3,  3. ,  1.1,  0.1],
        [ 5.8,  4. ,  1.2,  0.2],
        [ 5.7,  4.4,  1.5,  0.4],
        [ 5.4,  3.9,  1.3,  0.4],
        [ 5.1,  3.5,  1.4,  0.3],
        [ 5.7,  3.8,  1.7,  0.3],
        [ 5.1,  3.8,  1.5,  0.3],
        [ 5.4,  3.4,  1.7,  0.2],
        [ 5.1,  3.7,  1.5,  0.4],
        [ 4.6,  3.6,  1. ,  0.2],
        [ 5.1,  3.3,  1.7,  0.5],
        [ 4.8,  3.4,  1.9,  0.2],
        [ 5. ,  3. ,  1.6,  0.2],
        [ 5. ,  3.4,  1.6,  0.4],
        [ 5.2,  3.5,  1.5,  0.2],
        [ 5.2,  3.4,  1.4,  0.2],
        [ 4.7,

* We then make the necessary combinations that will be used for the classification.
* All data points in the first matrix to be combined will be marked with a +1, while those in the second will be marked with a -1.
* The data will be combined and then shuffled, so there's no clear demarkation between the two classes.
* These are saved in the list combined_data_arrays.
* The combinations are indexed as follows (where the first listed class name is the "positive" case and the second the "negative" case):
    * combined_data_arrays[0] ==> Iris-setosa, Iris-versicolor
    * combined_data_arrays[1] ==> Iris-setosa, Iris-virginica
    * combined_data_arrays[2] ==> Iris-versicolor, Iris-virginica

In [8]:
from sklearn.utils import shuffle
combined_data_arrays = []
for i in range(len(split_data_matrices)-1):
    for j in range(i+1,len(split_data_matrices)):
        pos_ones = np.ones((split_data_matrices[i].shape[0],1)) # marks all data points of first matrix with +1
        neg_ones = np.full((split_data_matrices[j].shape[0],1),-1) # marks all data points of second matrix with -1
        pos_mtrx = np.append(split_data_matrices[i],pos_ones,axis=1)
        neg_mtrx = np.append(split_data_matrices[j],neg_ones,axis=1)
        combined_array = np.concatenate((pos_mtrx,neg_mtrx),axis=0)
        combined_data_arrays.append(shuffle(combined_array,random_state=951982))

In [9]:
combined_data_arrays

[array([[ 5.4,  3.7,  1.5,  0.2,  1. ],
        [ 6.1,  2.8,  4. ,  1.3, -1. ],
        [ 6. ,  2.9,  4.5,  1.5, -1. ],
        [ 5.4,  3.9,  1.3,  0.4,  1. ],
        [ 6.8,  2.8,  4.8,  1.4, -1. ],
        [ 5. ,  3.6,  1.4,  0.2,  1. ],
        [ 5.8,  2.7,  3.9,  1.2, -1. ],
        [ 5.2,  2.7,  3.9,  1.4, -1. ],
        [ 6.3,  3.3,  4.7,  1.6, -1. ],
        [ 4.9,  3. ,  1.4,  0.2,  1. ],
        [ 4.7,  3.2,  1.6,  0.2,  1. ],
        [ 5.6,  2.7,  4.2,  1.3, -1. ],
        [ 4.8,  3. ,  1.4,  0.1,  1. ],
        [ 4.3,  3. ,  1.1,  0.1,  1. ],
        [ 6.3,  2.3,  4.4,  1.3, -1. ],
        [ 4.8,  3.1,  1.6,  0.2,  1. ],
        [ 6. ,  3.4,  4.5,  1.6, -1. ],
        [ 5.1,  3.5,  1.4,  0.2,  1. ],
        [ 5.7,  2.8,  4.5,  1.3, -1. ],
        [ 5. ,  2. ,  3.5,  1. , -1. ],
        [ 5.8,  2.7,  4.1,  1. , -1. ],
        [ 4.5,  2.3,  1.3,  0.3,  1. ],
        [ 5.9,  3.2,  4.8,  1.8, -1. ],
        [ 5.2,  3.4,  1.4,  0.2,  1. ],
        [ 5.7,  4.4,  1.5,  0.4,  1. ],


# Perceptron

In [10]:
class Perceptron:
    
    def __init__(self,data):
        """splits data in training and testing sets, adds a column of 1s to compensate for theta"""
        split_point = (data.shape[0] // 10) * 8
        split_data = np.vsplit(data,[split_point])
        self.train_label = split_data[0][:,-1:]
        self.train_rows = self.train_label.shape[0]
        train_ones = np.ones((self.train_rows,1))
        self.trainset = np.concatenate((train_ones,split_data[0][:,:-1]),axis=1)
        self.test_label = split_data[1][:,-1:]
        self.test_rows = self.test_label.shape[0]
        test_ones = np.ones((self.test_rows,1))
        self.testset = np.concatenate((test_ones,split_data[1][:,:-1]),axis=1)
        
    def test_w(self,w):
        """tests w to see if it correctly classifies all items in the training set"""
        all_correct = True
        for row in range(self.train_rows):
            if (np.dot(w,self.trainset[row]) > 0) :
                all_correct = all_correct and self.train_label[row]>0
            else:
                all_correct = all_correct and self.train_label[row]<0
        return all_correct
    
    def train(self):
        """implements the Perceptron Learning Algorithm to find w"""
        self.w = np.random.rand(self.trainset[0].shape[0])*5
        w_found = False
        while not w_found:
            random_point = randint(0,self.train_rows-1)
            x = self.trainset[random_point]
            x_label = self.train_label[random_point][0]
            multiplied = np.dot(self.w,x)
            if multiplied < 0 and x_label > 0:
                self.w = self.w + x
            elif multiplied > 0 and x_label < 0:
                self.w = self.w - x
            else:
                pass
            w_found = self.test_w(self.w)
        return self.w
    
    def test(self):
        """uses the w found to make predictions on the testing set"""
        self.predictions = []
        for row in range(self.test_rows):
            if np.dot(self.w,self.testset[row]) > 0:
                self.predictions.append(1)
            else:
                self.predictions.append(-1)
        self.pred_array = np.array(self.predictions)
        #return self.pred_array
    
    def error_rate(self):
        bools = np.equal(self.pred_array,self.test_label.flatten())
        correct = np.sum(bools)
        return (self.test_rows-correct)/self.test_rows
        #return pred
        
    def changelabels(self,arrtochange):
        """in labels, changes all positive numbers to 0s and all negative numbers to 1s"""
        result = np.zeros((arrtochange.size))
        for i in range(arrtochange.size):
            if arrtochange[i] >= 0:
                result[i] = 0
            else:
                result[i] = 1
        return result
    
    def confusion_matrix(self):
        preds = self.changelabels(self.pred_array).flatten()
        testlabels = self.test_label
        changedtestlabels = self.changelabels(testlabels)
        mtrx = np.zeros((2,2))
        for i in range(self.test_rows):
            mtrx[int(changedtestlabels[i]),int(preds[i])] += 1
        return mtrx

### Classifying Iris-Setosa and Iris-Versicolor

In [11]:
setosa_versicolor = Perceptron(combined_data_arrays[0])

In [12]:
setosa_versicolor.train()

array([ 0.01664125, -0.83776395,  3.88307703, -2.16329674,  0.04970412])

In [13]:
setosa_versicolor.test()

In [14]:
setosa_versicolor.error_rate()

0.0

In [15]:
setosa_versicolor.confusion_matrix()

array([[  8.,   0.],
       [  0.,  12.]])

### Classifying Iris-Setosa and Iris-Virginica

In [16]:
setosa_virginica = Perceptron(combined_data_arrays[1])
setosa_virginica.train()
setosa_virginica.test()
setosa_virginica.error_rate()

0.0

In [17]:
setosa_virginica.confusion_matrix()

array([[  8.,   0.],
       [  0.,  12.]])

# Pocket Perceptron

In [18]:
class PocketPerceptron(Perceptron):
    
    def __init__(self,data,iterations=2000):
        """calls on the constructor of the parent class Perceptron, then saves the number of iterations"""
        super(PocketPerceptron,self).__init__(data)
        self.iterations = iterations
        
    def test_w(self,w):
        """overwrites test_w of the class Perceptron
        tests the number of consecutively correctly classified items in the training set
        if this number is greater than the previous number, then both h_s and w are updates"""
        h = 0
        for row in range(self.train_rows):
            w_x = np.dot(w,self.trainset[row])
            if w_x>0 and self.train_label[row]>0:
                h += 1
            elif w_x<0 and self.train_label[row]<0:
                h +=1
            else:
                break
        if h > self.h_s:
            self.h_s = h
            self.w = w
    
    def train(self):
        """overwrites train of the class Perceptron
        runs the learning algorithm for the given number of iterations"""
        curr_w = np.random.rand(self.trainset[0].shape[0])*5
        self.w = curr_w
        self.h_s = 0
        for i in range(self.iterations):
            random_point = randint(0,self.train_rows-1)
            x = self.trainset[random_point]
            x_label = self.train_label[random_point][0]
            multiplied = np.dot(curr_w,x)
            if multiplied < 0 and x_label > 0:
                curr_w = curr_w + x
            elif multiplied > 0 and x_label < 0:
                curr_w = curr_w - x
            else:
                pass
            self.test_w(curr_w)
        return self.w

In [19]:
versicolor_virginica = PocketPerceptron(combined_data_arrays[2],iterations=2000)

In [20]:
versicolor_virginica.train()

array([  8.69001349,  14.68968942,  13.09917675, -22.34242951, -16.29303433])

In [21]:
versicolor_virginica.test()

In [22]:
versicolor_virginica.error_rate()

0.050000000000000003

In [23]:
versicolor_virginica.confusion_matrix()

array([[  7.,   1.],
       [  0.,  12.]])