In [15]:
import math
import numpy as np
import pandas as pd
from statistics import mode
from collections import Counter

In [20]:
def importer_dataset(nom_fichier):
    dataset = []
    with open(nom_fichier, 'r') as file:
        for line in file:
            instance = line.strip().split()
            instance_values = [float(val) for val in instance[1:-1]]
            instance_class = instance[-1]
            dataset.append(instance_values + [instance_class])
    return dataset
def distance_manhattan(instance1, instance2):
    return sum(abs(a - b) for a, b in zip(instance1, instance2))

def distance_euclidienne(instance1, instance2):
    return math.sqrt(sum((a - b) ** 2 for a, b in zip(instance1, instance2)))

def distance_minkowski(instance1, instance2, p):
    return sum(abs(a - b) ** p for a, b in zip(instance1, instance2)) ** (1 / p)

def distance_cosine(instance1, instance2):
    dot_product = sum(a * b for a, b in zip(instance1, instance2))
    magnitude1 = math.sqrt(sum(a ** 2 for a in instance1))
    magnitude2 = math.sqrt(sum(b ** 2 for b in instance2))
    return 1 - (dot_product / (magnitude1 * magnitude2))

def distance_hamming(instance1, instance2):
    return sum(a != b for a, b in zip(instance1, instance2))


In [17]:
def trier_selon_distance(dataset, instance, distance_function):
    distances = [(data, distance_function(instance, data[:-1])) for data in dataset]
    distances.sort(key=lambda x: x[1])
    return distances

In [18]:
def classe_dominante(knn_instances):
    classes = [instance[-1] for instance, _ in knn_instances]
    count = Counter(classes)
    return count.most_common(1)[0][0]

In [19]:
def k_nn(dataset, k, instance, distance_function):
    distances = trier_selon_distance(dataset, instance, distance_function)
    knn = distances[:k]
    return classe_dominante(knn)

In [13]:
dataset = importer_dataset('Dataset-Exos.txt')
instance_a_classifier = [5.2, 3.5, 1.41, 0.25]

classe_k_3 = k_nn(dataset, 3, instance_a_classifier, distance_euclidienne) 
classe_k_5 = k_nn(dataset, 5, instance_a_classifier, distance_euclidienne)

print("Classe avec K = 3:", classe_k_3)
print("Classe avec K = 5:", classe_k_5)


Classe avec K = 3: 5.1,3.5,1.4,0.2,Iris-setosa
Classe avec K = 5: 5.1,3.5,1.4,0.2,Iris-setosa
