In [2]:
# Function to calculate Euclidean distance between two vectors
def euclidean_distance(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must be of the same dimension")
    squared_distance = sum((x - y) ** 2 for x, y in zip(vector1, vector2))
    return squared_distance ** 0.5

# Function to calculate Manhattan distance between two vectors
def manhattan_distance(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must be of the same dimension")
    distance = sum(abs(x - y) for x, y in zip(vector1, vector2))
    return distance

# Function to implement k-NN classifier
def knn_classifier(training_data, test_instance, k):
    distances = [(euclidean_distance(test_instance, training_instance), label) for training_instance, label in training_data]
    sorted_distances = sorted(distances, key=lambda x: x[0])
    k_nearest_labels = [label for _, label in sorted_distances[:k]]
    # Assuming labels are integers for simplicity
    predicted_label = max(set(k_nearest_labels), key=k_nearest_labels.count)
    return predicted_label

# Function to convert categorical variables to numeric using label encoding
def label_encoding(categories):
    label_map = {category: index for index, category in enumerate(set(categories))}
    encoded_values = [label_map[category] for category in categories]
    return encoded_values

# Function to convert categorical variables to numeric using One-Hot encoding
def one_hot_encoding(categories):
    unique_categories = list(set(categories))
    encoded_matrix = []
    for category in categories:
        category_index = unique_categories.index(category)
        encoded_row = [1 if i == category_index else 0 for i in range(len(unique_categories))]
        encoded_matrix.append(encoded_row)
    return encoded_matrix

# Example usage in the main program
if __name__ == "__main__":
    # Example data for testing
    vector1 = [1, 2, 3]
    vector2 = [4, 5, 6]
    print(f"Euclidean Distance: {euclidean_distance(vector1, vector2)}")
    print(f"Manhattan Distance: {manhattan_distance(vector1, vector2)}")

    training_data = [([1, 2], 0), ([4, 5], 1), ([7, 8], 0), ([10, 11], 1)]
    test_instance = [3, 4]
    k_value = 3
    print(f"k-NN Classifier Prediction: {knn_classifier(training_data, test_instance, k_value)}")

    categorical_data = ["red", "green", "blue", "red", "green"]
    print(f"Label Encoded Data: {label_encoding(categorical_data)}")

    one_hot_encoded_data = one_hot_encoding(categorical_data)
    print(f"One-Hot Encoded Data:\n{one_hot_encoded_data}")


Euclidean Distance: 5.196152422706632
Manhattan Distance: 9
k-NN Classifier Prediction: 0
Label Encoded Data: [2, 1, 0, 2, 1]
One-Hot Encoded Data:
[[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0]]
