In [None]:
#import library
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import confusion_matrix
#This function will output the optimal w after updating 5000 times
def fit_perceptron(X_train, y_train):
  #add a column of 1 before x_i
  X_i = np.hstack((np.ones((len(X_train),1)),X_train))
  #initialize w as matrix of 0
  w = np.zeros((5001,len(X_i[0])))
  #calculate the first Ein
  E_in_best = errorPer(X_train,y_train,w[0])
  #initialize w_best as 0
  w_best = np.zeros(len(X_i[0]))
  count = 0
  #stop when Ein equals 0
  while (E_in_best > 0 and count < 5000-1):
    for j in range(len(X_i)):
      y_pred = pred(X_i[j],w[count])
      if y_pred != y_train[j]:
        #w[t+1] = w[t]+ YX
        w[count + 1] = w[count] + np.dot(y_train[j],X_i[j])
        #calculate new error for new w
        E_in_current = errorPer(X_train, y_train, w[count+1])
        if E_in_current < E_in_best:
          #update new error for new w
          E_in_best = E_in_current
          w_best = w[count+1]
        if (count < (5000 - 1)and E_in_best > 0):
          count += 1
        else:
          break
  return w_best

#This function calculate Ein with given w
def errorPer(X_train,y_train,w):
  #confusion matrix
  error = 0
  count = confMatrix(X_train,y_train,w)
  error += count[1][0]
  error += count[0][1]
  error /= len(X_train)
  return error

#This function output a confusion matrix with given w. Inside the matrix, [0][0] count true negatives, [1][0] count false negatives, [0][1] count false positive, and [1][1] count true positive
def confMatrix(X_train,y_train,w):
  #confusion matrix
  X_i = np.hstack((np.ones((len(X_train),1)),X_train))
  count = np.zeros((2,2),dtype=np.int16)
  for i in range(len(X_i)):
    if(y_train[i]==1):
      if pred(X_i[i],w) != y_train[i]:
        count[1][0]+=1
      else:
        count[1][1]+=1
    else:
      if pred(X_i[i],w) != y_train[i]:
        count[0][1]+=1
      else:
        count[0][0]+=1
  return count

#This function output prediction of y with given w
def pred(X_train,w):
  #calculate predicted y value
  value = np.dot(X_train,w)
  if (value>0):
    return 1
  else:
    return -1

def test_SciKit(X_train, X_test, Y_train, Y_test):
  #use skitlern library to produce confusion matrix
  clf = Perceptron(tol=1e-3, max_iter=5000)
  clf.fit(X_train,Y_train)
  y_pred = clf.predict(X_test)
  matrix = confusion_matrix(Y_test, y_pred)
  return matrix

def test_Part1():
    from sklearn.datasets import load_iris
    X_train, y_train = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X_train[50:],y_train[50:],test_size=0.2)

    #Set the labels to +1 and -1
    y_train[y_train == 1] = 1
    y_train[y_train != 1] = -1
    y_test[y_test == 1] = 1
    y_test[y_test != 1] = -1

    #Pocket algorithm using Numpy
    w=fit_perceptron(X_train,y_train)
    cM=confMatrix(X_test,y_test,w)

    #Pocket algorithm using scikit-learn
    sciKit=test_SciKit(X_train, X_test, y_train, y_test)

    #Print the result
    print ('--------------Test Result-------------------')

    print("Confusion Matrix is from Part 1a is: ",cM)

    print("Confusion Matrix from Part 1b is:",sciKit)


test_Part1()


--------------Test Result-------------------
Confusion Matrix is from Part 1a is:  [[ 8  0]
 [ 0 12]]
Confusion Matrix from Part 1b is: [[ 7  1]
 [ 0 12]]
