In [10]:
import pandas as pd
import math


class Node ():
    def __init__(self, sepalLengthCm, sepalWidthCm, petalLengthCm, petalWidthCm):
        self.sepalLengthCm = sepalLengthCm
        self.sepalWidthCm = sepalWidthCm
        self.petalLengthCm = petalLengthCm
        self.petalWidthCm = petalWidthCm
        
    def set_specie(self, specie):
        self.specie = specie
    
    def calculate_distance(self, other_node):
        sl = self.sepalLengthCm - other_node.sepalLengthCm
        sw = self.sepalWidthCm - other_node.sepalWidthCm
        pl = self.petalLengthCm - other_node.petalLengthCm
        pw = self.petalWidthCm - other_node.petalWidthCm
        
        sl = sl*sl
        sw = sw*sw
        pl = pl*pl
        pw = pw*pw
        
        return math.sqrt(sl+sw+pl+pw)
    
    def show(self):
        print("sepalLengthCm:", self.sepalLengthCm,
        "sepalWidthCm:", self.sepalWidthCm,
        "petalLengthCm:", self.petalLengthCm,
        "petalWidthCm:", self.petalWidthCm,
        "specie:", self.specie)
        

class KNN ():
    def __init__(self):
        self.distance_matrix = []
        self.nodes = []
        self.training_population = None
        self.test_population = None

    def main (self):
        self.set_populations(70)
        self.training_algorithm()
        self.test_algorithm()
    
    def set_populations(self, training_population_size):
        iris = pd.read_csv("iris.csv")
        iris = iris.sample(frac=1).reset_index(drop=True)
        self.training_population = iris.iloc[:training_population_size]
        self.test_population = iris.iloc[training_population_size:]
        
    def training_algorithm(self):
        self.make_nodes(self.training_population)
        self.make_distance_matrix(self.nodes)
        
    def make_nodes(self, df):
        for index, row in df.iterrows():
            #print(index)
            new_node = Node(df["SepalLengthCm"][index],  df["SepalWidthCm"][index],  df["PetalLengthCm"][index],  df["PetalWidthCm"][index])
            new_node.set_specie(df["Species"][index])
            self.nodes.append(new_node)

    def make_distance_matrix(self, nodes):
        self.distance_matrix = [[]] * len(nodes)


        for i in range(len(nodes)):
            self.distance_matrix.append([])
            for j in range(len(nodes)):
                self.distance_matrix[i].append(-1)        

        for i in range(len(nodes)):
            self.distance_matrix[i][i] = 0
            for j in range(i+1, len(nodes)):
                new_distance = nodes[i].calculate_distance(nodes[j])
                self.distance_matrix[i][j] = new_distance
                self.distance_matrix[j][i] = new_distance
        

    def test_algorithm(self):
        pass
    
if __name__ == "__main__":
    knn = KNN()
    knn.main()

70
70
70
70 [0, 2.2781571499789033, 5.274466797696237, 4.868264577855234, 2.1447610589527217, 0.7615773105863906, 1.3000000000000005, 1.9519221295943137, 0.8062257748298546, 2.3130067012440754, 1.679285562374667, 5.282991576748916, 1.7406895185529216, 5.250714237130031, 0.648074069840786, 5.2971690552596105, 5.453439281774393, 5.1903757089443925, 1.6822603841260726, 2.533771891863985, 5.1166395221864125, 1.7691806012954134, 0.4898979485566359, 5.845511098270193, 5.1254268114958, 1.449137674618944, 5.097057974949863, 2.485960578931211, 1.7549928774784245, 1.4730919862656238, 5.229722745997152, 0.22360679774997935, 0.7348469228349536, 2.760434748368452, 5.781003373117854, 0.62449979983984, 0.8366600265340756, 2.002498439450079, 0.9433981132056606, 3.5637059362410923, 1.349073756323204, 4.7812132351527685, 0.6480740698407862, 0.9643650760992956, 2.485960578931211, 5.3376024580330075, 5.591064299397746, 5.187484939737175, 5.349766350038103, 1.4798648586948742, 1.4071247279470291, 5.2297227