In [32]:
import math
class KNNClassifier():
    def __init__(
    self,
    k=5,
    n=2,
    x=[],
    y=[]
    ):
        self.k = k
        self.n = n
        self.x = list(x)
        self.y = list(y)
        self.normalizedDataset = self.normalizeDataset(x)
    
    def getMaxMinValues(self, dataset):
        n = []
        for i in range(0, len(dataset[0])):
            col_values = [row[i] for row in dataset]
            n.append([min(col_values), max(col_values)])
        return n

    def normalizeDataset(self, dataset): # normalization: (x - min) / (max - min)
        n = self.getMaxMinValues(dataset)
        normalizedData = []
        for row in dataset:
            newRow = []
            for i in range(len(row)):
                newItem = (row[i] - n[i][0]) / ( n[i][1] - n[i][0])
                newRow.append(newItem)
            normalizedData.append(newRow)
        return normalizedData

    def getMinkowskiDistance(self, v1, v2):
        #n=1 Manhattan distance
        #n=2 Euclidean distance
        #Minkowski distance: D(x, y) = (|x₁ - y₁|^n + |x₂ - y₂|^n + ... + |x_d - y_d|^n)^(1/n)
        dim = len(v1)
        distance = 0
        for d in range(dim):
            distance += abs(v1[d] - v2[d])**self.n
        distance = distance*(1/self.n)
        return distance

    def getNeighbors(self, test):
        #1. calculating distances between every observation and the values we want to find neighbors of
        distances = []
        for i in range(len(self.normalizedDataset)):
            row = self.normalizedDataset[i]
            distance = self.getMinkowskiDistance(row, test)
            distances.append((i, distance))
        #2. sorting them by the distance (2nd item of the touple with index 1)
        distances.sort(key=lambda t : t[1])
        #3. extracting the indexes of closest observations
        k_neighbors = []
        for i in range(self.k):
            k_neighbors.append(distances[i][0])
        return k_neighbors
    
    def getClassFromNeighbors(self, indexes):
        d = {}
        for i in indexes:
            if d.get(self.y[i]) is None:
                d[self.y[i]] = 1
            else:
                d[self.y[i]] += 1
        output = 0
        c = None
        for k,v in d.items():
            if v > output:
                output = v
                c = k
        return k
    
        
    
    def predict(self, row, appendToDataset=False):
        neighbors = self.getNeighbors(row)
        prediction = self.getClassFromNeighbors(neighbors)
        
        if appendToDataset:
            self.x.append(row)
            self.y.append(prediction)
            self.normalizedDataset = self.normalizeDataset(self.x)
        return prediction

    def evaluate(self, test_dataset, classes):
        # Normalization of test_dataset
        
        predictions = list()
        for row in test_dataset:
            d = self.x[:]
            d.append(row)
            n = self.normalizeDataset(d)
            normalized_row = n[-1]
            predictions.append(self.predict(normalized_row, appendToDataset=False))
        
        correct = 0
        
        for i in range(len(predictions)):
            if predictions[i] == classes[i]:
                correct +=1
        return correct / len(classes)
        

In [33]:
import csv

columns=[]
dataset=[]

with open("pokemon.csv") as raw:
    data = csv.reader(raw)
    columns = next(data)
    for row in data:
        dataset.append(row)
        
inputColumns = columns[5:11]
inputFeatures = [[int(col) for col in row[5:11]] for row in dataset]
output = [1 if row[12] == "True" else 0 for row in dataset]

In [42]:
from sklearn.model_selection import train_test_split
#splitting the dataset (70% train – 30% test)
x_train, x_test, y_train, y_test = train_test_split(inputFeatures,output,test_size=0.3,random_state=42, stratify=output)

k = 7;
pokemonClassifier = KNNClassifier(k=k, x=x_train, y=y_train)
print(f"Dataset records: {len(dataset)}");
print(f"Training dataset records: {len(x_train)}");
print(f"Testing dataset records: {len(x_test)}");
print(f"k={k}")
print(f"KNN accuracy: {pokemonClassifier.evaluate(x_test, y_test)*100}%");


Dataset records: 800
Training dataset records: 560
Testing dataset records: 240
k=7
KNN accuracy: 87.5%
