In [1]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
from sklearn.metrics.pairwise import cosine_distances, manhattan_distances, euclidean_distances, pairwise_distances
from sklearn.metrics import accuracy_score

# Tạo dữ liệu mẫu
X_train = np.array([[1, 2, 3, 4], [4, 3, 2, 1], [2, 2, 2, 2]])
y_train = np.array([0, 1, 0])  # Nhãn cho dữ liệu huấn luyện

X_test = np.array([[1, 1, 3, 4], [4, 4, 2, 1]])
y_test = np.array([0, 1])  # Nhãn thực tế cho dữ liệu kiểm tra

def predict_class(X_train, y_train, X_test, distance_func, metric=None):
    if metric:
        distances = distance_func(X_test, X_train, metric=metric)
    else:
        distances = distance_func(X_test, X_train)
    nearest_neighbor_indices = np.argmin(distances, axis=1)
    return y_train[nearest_neighbor_indices]

# Tính khoảng cách và accuracy cho mỗi loại
distance_functions = [
    ("Hamming", pairwise_distances, 'hamming'),
    ("Cosine", cosine_distances, None),
    ("Manhattan", manhattan_distances, None),
    ("Euclid", euclidean_distances, None)
]

for name, func, metric in distance_functions:
    # Dự đoán lớp dựa trên khoảng cách ngắn nhất
    y_pred = predict_class(X_train, y_train, X_test, func, metric)
    
    # Tính accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"{name} Distance:")
    print(f"Predicted classes: {y_pred}")
    print(f"Accuracy: {accuracy:.2f}")
    print()

# Tính và in khoảng cách giữa X_test[0] và X_test[1]
X = X_test[0].reshape(1, -1)
Y = X_test[1].reshape(1, -1)

for name, func, metric in distance_functions:
    if metric:
        distance = func(X, Y, metric=metric)[0][0]
    else:
        distance = func(X, Y)[0][0]
    print(f"{name} Distance between X_test[0] and X_test[1]: {distance:.4f}")

Hamming Distance:
Predicted classes: [0 1]
Accuracy: 1.00

Cosine Distance:
Predicted classes: [0 1]
Accuracy: 1.00

Manhattan Distance:
Predicted classes: [0 1]
Accuracy: 1.00

Euclid Distance:
Predicted classes: [0 1]
Accuracy: 1.00

Hamming Distance between X_test[0] and X_test[1]: 1.0000
Cosine Distance between X_test[0] and X_test[1]: 0.4305
Manhattan Distance between X_test[0] and X_test[1]: 10.0000
Euclid Distance between X_test[0] and X_test[1]: 5.2915
