In [None]:
import numpy as np
import pandas as pd

In [None]:
# activation function
def stepFunction(y):
    if y > 0:
        return 1
    else:
        return -1


In [None]:
# Perceptron class
class Perceptron:

    def __init__(self, epochs=1000, learningRate=1):
        self.learningRate = learningRate
        self.epochs = epochs
        self.weights = []
        self.bias = 0

    def fit(self, X, y):
        dataPoints, features = X.shape

        self.weights = np.zeros(features, dtype='float64')
        self.bias = 0

        for _ in range(self.epochs):
            for idx in range(dataPoints):
                x_i = X.iloc[idx].to_numpy()
                y_pred = self.predict(x_i)
                y_actual = y.iloc[idx]
                if y_actual * y_pred <= 0:
                    delta = self.learningRate * y_actual
                    self.weights += delta * x_i
                    self.bias += delta

    def predict(self, x_i):
        output = np.dot(x_i, self.weights) + self.bias
        y_pred = stepFunction(output)
        return y_pred



In [None]:
# Original DataSet

OriginalDataSet = pd.read_csv('./dataset.csv')
OriginalDataSet.drop('id', axis=1, inplace=True)

# replacing B to -1 and M to 1 in target "diagnosis"
OriginalDataSet['diagnosis'].replace(
    ['B', 'M'], [-1, 1], inplace=True)



In [None]:
PM1Precision = []
PM1Recall = []
PM1Accuracy = []
PM1AccuracyTrainSet = []

PM2Precision = []
PM2Recall = []
PM2Accuracy = []
PM2AccuracyTrainSet = []

PM3Precision = []
PM3Recall = []
PM3Accuracy = []
PM3AccuracyTrainSet = []

PM4Precision = []
PM4Recall = []
PM4Accuracy = []
PM4AccuracyTrainSet = []

In [None]:
iterations = 10
ep = 10000
lr = 1

In [None]:
for iter in range(iterations):

    print("\nRun", iter+1, "=====================================================\n")

    # Shuffle the original dataset
    # OriginalDataSet = OriginalDataSet.sample(frac=1).reset_index(drop=True)

    # deriving the training and testing datasets
    numOfDataPoints = np.shape(OriginalDataSet)[0]
    trainingDataPoints = (int)(67/100 * numOfDataPoints)

    TrainingDataSet = OriginalDataSet[:trainingDataPoints]
    TestingDataSet = OriginalDataSet[trainingDataPoints:]

    # filling null values with mean
    TrainingDataSet = TrainingDataSet.fillna(TrainingDataSet.mean())
    TestingDataSet = TestingDataSet.fillna(TrainingDataSet.mean())

    TrainX = TrainingDataSet.drop('diagnosis', axis=1)
    Trainy = TrainingDataSet['diagnosis']

    TestX = TestingDataSet.drop('diagnosis', axis=1)
    Testy = TestingDataSet['diagnosis']

    # =========================PM1===========================
    print("\nPM1=============>\n")
    PM1 = Perceptron(ep, lr)
    PM1.fit(TrainX, Trainy)

    trueNegatives = 0
    falseNegatives = 0
    truePositives = 0
    falsePositives = 0
    trainSetCorrect = 0

    for idx in range(trainingDataPoints):
        y_pred = PM1.predict(TrainX.iloc[idx])
        if y_pred == Trainy.iloc[idx]:
            trainSetCorrect = trainSetCorrect+1

    PM1AccuracyTrainSet.append(trainSetCorrect/trainingDataPoints)
    print("Accuracy on train set:",  trainSetCorrect/trainingDataPoints)

    for idx in range(numOfDataPoints - trainingDataPoints):
        y_pred = PM1.predict(TestX.iloc[idx])
        y = Testy.iloc[idx]
        if y == 1:
            if y_pred == 1:
                truePositives += 1
            else:
                falseNegatives += 1
        else:
            if y_pred == -1:
                trueNegatives += 1
            else:
                falsePositives += 1

    if truePositives+falsePositives == 0:
        print("Precision: N.A")
    else:
        PM1Precision.append(truePositives/(truePositives+falsePositives))
        print("Precision:", truePositives/(truePositives+falsePositives))

    if truePositives+falseNegatives == 0:
        print("Recall: N.A")
    else:
        PM1Recall.append(truePositives/(truePositives+falseNegatives))
        print("Recall:", truePositives/(truePositives+falseNegatives))

    PM1Accuracy.append((truePositives+trueNegatives) /
                       (numOfDataPoints - trainingDataPoints))
    print("Accuracy:", (truePositives+trueNegatives) /
          (numOfDataPoints - trainingDataPoints))

    # =========================PM2===========================
    print("\nPM2=============>\n")
    ShuffledTrainingDataSet = TrainingDataSet.sample(frac=1)
    ShuffledTrainX = ShuffledTrainingDataSet.drop('diagnosis', axis=1)
    ShuffledTrainy = ShuffledTrainingDataSet['diagnosis']

    PM2 = Perceptron(ep, lr)
    PM2.fit(ShuffledTrainX, ShuffledTrainy)

    trueNegatives = 0
    falseNegatives = 0
    truePositives = 0
    falsePositives = 0
    trainSetCorrect = 0

    for idx in range(trainingDataPoints):
        y_pred = PM2.predict(ShuffledTrainX.iloc[idx])
        if y_pred == ShuffledTrainy.iloc[idx]:
            trainSetCorrect = trainSetCorrect+1

    PM2AccuracyTrainSet.append(trainSetCorrect/trainingDataPoints)
    print("Accuracy on train set:",  trainSetCorrect/trainingDataPoints)

    for idx in range(numOfDataPoints - trainingDataPoints):
        y_pred = PM2.predict(TestX.iloc[idx])
        y = Testy.iloc[idx]
        if y == 1:
            if y_pred == 1:
                truePositives += 1
            else:
                falseNegatives += 1
        else:
            if y_pred == -1:
                trueNegatives += 1
            else:
                falsePositives += 1

    if truePositives+falsePositives == 0:
        print("Precision: N.A")
    else:
        PM2Precision.append(truePositives/(truePositives+falsePositives))
        print("Precision:", truePositives/(truePositives+falsePositives))

    if truePositives+falseNegatives == 0:
        print("Recall: N.A")
    else:
        PM2Recall.append(truePositives/(truePositives+falseNegatives))
        print("Recall:", truePositives/(truePositives+falseNegatives))

    PM2Accuracy.append((truePositives+trueNegatives) /
                       (numOfDataPoints - trainingDataPoints))
    print("Accuracy:", (truePositives+trueNegatives) /
          (numOfDataPoints - trainingDataPoints))

    # =========================PM3============================
    print("\nPM3=============>\n")

    NormalizedTrainX = (TrainX - TrainX.mean())/(TrainX.std())
    NormalizedTestX = (TestX - TrainX.mean())/(TrainX.std())

    PM3 = Perceptron(ep, lr)
    PM3.fit(NormalizedTrainX, Trainy)

    trueNegatives = 0
    falseNegatives = 0
    truePositives = 0
    falsePositives = 0
    trainSetCorrect = 0

    for idx in range(trainingDataPoints):
        y_pred = PM3.predict(NormalizedTrainX.iloc[idx])
        if y_pred == Trainy.iloc[idx]:
            trainSetCorrect = trainSetCorrect+1

    PM3AccuracyTrainSet.append(trainSetCorrect/trainingDataPoints)
    print("Accuracy on train set:",  trainSetCorrect/trainingDataPoints)

    for idx in range(numOfDataPoints - trainingDataPoints):
        y_pred = PM3.predict(NormalizedTestX.iloc[idx])
        y = Testy.iloc[idx]
        if y == 1:
            if y_pred == 1:
                truePositives += 1
            else:
                falseNegatives += 1
        else:
            if y_pred == -1:
                trueNegatives += 1
            else:
                falsePositives += 1

    if truePositives+falsePositives == 0:
        print("Precision: N.A")
    else:
        PM3Precision.append(truePositives/(truePositives+falsePositives))
        print("Precision:", truePositives/(truePositives+falsePositives))

    if truePositives+falseNegatives == 0:
        print("Recall: N.A")
    else:
        PM3Recall.append(truePositives/(truePositives+falseNegatives))
        print("Recall:", truePositives/(truePositives+falseNegatives))

    PM3Accuracy.append((truePositives+trueNegatives) /
                       (numOfDataPoints - trainingDataPoints))
    print("Accuracy:", (truePositives+trueNegatives) /
          (numOfDataPoints - trainingDataPoints))

    # ==========================PM4============================
    print("\nPM4=============>\n")
    FeaturesShuffledDataset = OriginalDataSet.sample(frac=1, axis=1)
    FeaturesShuffledTrainX = FeaturesShuffledDataset.drop(
        'diagnosis', axis=1)[:trainingDataPoints]

    FeaturesShuffledTestX = FeaturesShuffledDataset.drop(
        'diagnosis', axis=1)[trainingDataPoints:]

    # filling null values with mean
    FeaturesShuffledTrainX = FeaturesShuffledTrainX.fillna(
        FeaturesShuffledTrainX.mean())
    FeaturesShuffledTestX = FeaturesShuffledTestX.fillna(
        FeaturesShuffledTrainX.mean())

    PM4 = Perceptron(ep, lr)
    PM4.fit(FeaturesShuffledTrainX, Trainy)

    trueNegatives = 0
    falseNegatives = 0
    truePositives = 0
    falsePositives = 0
    trainSetCorrect = 0

    for idx in range(trainingDataPoints):
        y_pred = PM4.predict(FeaturesShuffledTrainX.iloc[idx])
        if y_pred == Trainy.iloc[idx]:
            trainSetCorrect = trainSetCorrect+1

    PM4AccuracyTrainSet.append(trainSetCorrect/trainingDataPoints)
    print("Accuracy on train set:",  trainSetCorrect/trainingDataPoints)

    for idx in range(numOfDataPoints - trainingDataPoints):
        y_pred = PM4.predict(FeaturesShuffledTestX.iloc[idx])
        y = Testy.iloc[idx]
        if y == 1:
            if y_pred == 1:
                truePositives += 1
            else:
                falseNegatives += 1
        else:
            if y_pred == -1:
                trueNegatives += 1
            else:
                falsePositives += 1

    if truePositives+falsePositives == 0:
        print("Precision: N.A")
    else:
        PM4Precision.append(truePositives/(truePositives+falsePositives))
        print("Precision:", truePositives/(truePositives+falsePositives))

    if truePositives+falseNegatives == 0:
        print("Recall: N.A")
    else:
        PM4Recall.append(truePositives/(truePositives+falseNegatives))
        print("Recall:", truePositives/(truePositives+falseNegatives))

    PM4Accuracy.append((truePositives+trueNegatives) /
                       (numOfDataPoints - trainingDataPoints))
    print("Accuracy:", (truePositives+trueNegatives) /
          (numOfDataPoints - trainingDataPoints))


In [None]:
print("\nPM1: ")
print("Average Accuracy on training set:", np.mean(PM1AccuracyTrainSet))
print("Average Precision on testing set:", np.mean(PM1Precision))
print("Average Recall on testing set:", np.mean(PM1Recall))
print("Average Accuracy on testing set:", np.mean(PM1Accuracy))

In [None]:
print("\nPM2: ")
print("Average Accuracy on training set:", np.mean(PM2AccuracyTrainSet))
print("Average Precision on testing set:", np.mean(PM2Precision))
print("Average Recall on testing set:", np.mean(PM2Recall))
print("Average Accuracy on testing set:", np.mean(PM2Accuracy))

In [None]:
print("\nPM3: ")
print("Average Accuracy on training set:", np.mean(PM3AccuracyTrainSet))
print("Average Precision on testing set:", np.mean(PM3Precision))
print("Average Recall on testing set:", np.mean(PM3Recall))
print("Average Accuracy on testing set:", np.mean(PM3Accuracy))

In [None]:
print("\nPM4: ")
print("Average Accuracy on training set:", np.mean(PM4AccuracyTrainSet))
print("Average Precision on testing set:", np.mean(PM4Precision))
print("Average Recall on testing set:", np.mean(PM4Recall))
print("Average Accuracy on testing set:", np.mean(PM4Accuracy))