In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import time
import tracemalloc

# 1. Carregar a base Iris
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print("Dados carregados e divididos com sucesso!")
print(f"Formato do conjunto de treino: {X_train.shape}")
print(f"Formato do conjunto de teste: {X_test.shape}")

k_values = [1, 3, 5, 7]
results_manual = {}
results_sklearn = {}

In [None]:
# 2. Implementação KNN manual
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def knn_predict(X_train, y_train, x_test, k):
    distances = [euclidean_distance(x_test, x_train) for x_train in X_train]
    k_indices = np.argsort(distances)[:k]
    k_labels = [y_train[i] for i in k_indices]
    most_common = Counter(k_labels).most_common(1)[0][0]
    return most_common

def evaluate_knn_manual(k):
    tracemalloc.start()
    start_time = time.time()

    y_pred = [knn_predict(X_train, y_train, x, k) for x in X_test]

    end_time = time.time()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Relatório completo
    print(f"\nKNN Manual k={k}")
    print("Acurácia:", acc)
    print("Precisão (macro):", prec)
    print("Revocação (macro):", rec)
    print("F1-score (macro):", f1)
    print("Relatório completo:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

    # Matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=iris.target_names, yticklabels=iris.target_names)
    plt.title(f"Matriz de confusão - KNN Manual (k={k})")
    plt.ylabel("Verdadeiro")
    plt.xlabel("Previsto")
    plt.show()

    return acc, prec, rec, f1, end_time - start_time, peak/1024

# AVALIAÇÃO MANUAL
print("AVALIAÇÃO DO KNN MANUAL")
for k in k_values:
    results_manual[k] = evaluate_knn_manual(k)

In [None]:
# 3. KNN Sklearn
def evaluate_knn_sklearn(k):
    tracemalloc.start()
    start_time = time.time()

    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    end_time = time.time()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    # Relatório completo
    print(f"\nKNN Sklearn k={k}")
    print("Acurácia:", acc)
    print("Precisão (macro):", prec)
    print("Revocação (macro):", rec)
    print("F1-score (macro):", f1)
    print("Relatório completo:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

    # Matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Greens", xticklabels=iris.target_names, yticklabels=iris.target_names)
    plt.title(f"Matriz de confusão - KNN Sklearn (k={k})")
    plt.ylabel("Verdadeiro")
    plt.xlabel("Previsto")
    plt.show()

    return acc, prec, rec, f1, end_time - start_time, peak/1024

# AVALIAÇÃO SKLEARN
print("AVALIAÇÃO DO KNN SKLEARN")
for k in k_values:
    results_sklearn[k] = evaluate_knn_sklearn(k)

In [None]:
# 5. Preparar dados para gráficos
metrics = ['Acurácia', 'Precisão', 'Revocação', 'F1-score', 'Tempo(s)', 'Memória(KB)']
manual_vals = {m: [] for m in metrics}
sklearn_vals = {m: [] for m in metrics}

for k in k_values:
    acc, prec, rec, f1, t, mem = results_manual[k]
    manual_vals['Acurácia'].append(acc)
    manual_vals['Precisão'].append(prec)
    manual_vals['Revocação'].append(rec)
    manual_vals['F1-score'].append(f1)
    manual_vals['Tempo(s)'].append(t)
    manual_vals['Memória(KB)'].append(mem)

    acc, prec, rec, f1, t, mem = results_sklearn[k]
    sklearn_vals['Acurácia'].append(acc)
    sklearn_vals['Precisão'].append(prec)
    sklearn_vals['Revocação'].append(rec)
    sklearn_vals['F1-score'].append(f1)
    sklearn_vals['Tempo(s)'].append(t)
    sklearn_vals['Memória(KB)'].append(mem)

# 6. Função para plotar gráfico de barras
def plot_bar(metric_name, manual, sklearn):
    x = np.arange(len(k_values))
    width = 0.35
    plt.figure(figsize=(8, 5))
    plt.bar(x - width/2, manual, width, label='Manual', color='skyblue')
    plt.bar(x + width/2, sklearn, width, label='Sklearn', color='salmon')
    plt.xticks(x, k_values)
    plt.ylabel(metric_name)
    plt.xlabel('Valor de k')
    plt.title(f'Comparação de {metric_name} - KNN Manual vs Sklearn')
    plt.legend()
    # Adicionar um pequeno espaço no topo para melhor visualização
    if max(manual) > 0 or max(sklearn) > 0:
        plt.ylim(0, max(max(manual), max(sklearn)) * 1.15)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

# 7. Gerar gráficos para cada métrica
for metric in metrics:
    plot_bar(metric, manual_vals[metric], sklearn_vals[metric])

In [None]:
# 8. Análise comparativa textual
def comparative_analysis(k_values, manual_vals, sklearn_vals):
    print("\nAnálise Comparativa Detalhada: KNN Manual vs Sklearn\n")
    for i, k in enumerate(k_values):
        print(f"Para k={k}:")
        for metric in metrics:
            manual_metric = manual_vals[metric][i]
            sklearn_metric = sklearn_vals[metric][i]

            # Formatação para melhor leitura
            format_str = ".6f" if metric in ['Tempo(s)', 'Memória(KB)'] else ".4f"

            if metric in ['Tempo(s)', 'Memória(KB)']:
                better = "Sklearn" if sklearn_metric < manual_metric else "Manual"
                print(f"  - {metric:<12}: Manual={manual_metric:{format_str}}, Sklearn={sklearn_metric:{format_str}} -> Melhor: {better}")
            else:
                if np.isclose(manual_metric, sklearn_metric):
                    better = "Empate"
                else:
                    better = "Sklearn" if sklearn_metric > manual_metric else "Manual"
                print(f"  - {metric:<12}: Manual={manual_metric:{format_str}}, Sklearn={sklearn_metric:{format_str}} -> Melhor: {better}")
        print("-" * 70)

comparative_analysis(k_values, manual_vals, sklearn_vals)