In [None]:
import numpy as np
import matplotlib.pyplot as plt
%timeit

class KNNRegressor:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return np.array([pred[0] for pred in predictions]), [pred[1] for pred in predictions]

    def _predict(self, x):
        distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        return np.mean(k_nearest_labels), k_indices

def visualize_regression(X_train, y_train, X_test, y_test, y_pred, nearest_neighbor_indices):
    plt.figure(figsize=(10, 6))
    plt.scatter(X_train, y_train, color='blue', label='Training Data')
    plt.scatter(X_test, y_test, color='red', label='Test Data')
    plt.plot(X_test, y_pred, color='green', label='KNN Regression Line')
    plt.scatter(X_train[nearest_neighbor_indices], y_train[nearest_neighbor_indices], color='orange', label='Nearest Neighbors')
    plt.title('KNN Regression')
    plt.xlabel('X')
    plt.ylabel('y')
    plt.legend()
    plt.show()

# Generating synthetic data
np.random.seed(0)
X = np.sort(5 * np.random.rand(20, 1), axis=0)
y = np.sin(X).ravel() + np.random.normal(0, 0.1, size=X.shape[0])

# Shuffle data
shuffle_idx = np.random.permutation(len(X))
X = X[shuffle_idx]
y = y[shuffle_idx]

# Split data into train and test sets
X_train, X_test = X[:16], X[16:]
y_train, y_test = y[:16], y[16:]

# Fit KNN regression model
knn_regressor = KNNRegressor(k=2)
knn_regressor.fit(X_train, y_train)

# Predict on test set and get nearest neighbors indices
y_pred, nearest_neighbor_indices = knn_regressor.predict(X_test)



# Visualize
visualize_regression(X_train, y_train, X_test, y_test, y_pred, nearest_neighbor_indices)


In [None]:
X_test

In [None]:
y_pred

In [None]:
nearest_neighbor_indices

In [None]:
import numpy as np

class Node:
    def __init__(self, point, axis, left=None, right=None):
        self.point = point
        self.axis = axis
        self.left = left
        self.right = right

def build_kdtree(points, depth=0):
    if not points:
        return None

    k = len(points[0])
    axis = depth % k

    points.sort(key=lambda x: x[axis])

    median = len(points) // 2
    node = Node(points[median], axis)

    node.left = build_kdtree(points[:median], depth + 1)
    node.right = build_kdtree(points[median + 1:], depth + 1)

    return node

def distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

def nearest_neighbor_search(node, target, depth=0, best=None):
    if node is None:
        return best

    k = len(target)
    axis = depth % k

    next_best = None
    next_branch = None

    if best is None or distance(target, best) > distance(target, node.point):
        next_best = node.point
    else:
        next_best = best

    if target[axis] < node.point[axis]:
        next_branch = node.left
    else:
        next_branch = node.right

    return nearest_neighbor_search(next_branch, target, depth + 1, next_best)

# Example usage:
points = [(2,3), (5,4), (9,6), (4,7), (8,1), (7,2)]
tree = build_kdtree(points)
target = (10,5)
nearest = nearest_neighbor_search(tree, target)
print("Nearest neighbor to", target, "is", nearest)


In [None]:
from sklearn.neighbors import KDTree
import numpy as np
import matplotlib.pyplot as plt

# Generate some random data
np.random.seed(0)
X_train = np.random.rand(10, 2)  # 10 samples, 2 features

# Build the KDTree
kdt = KDTree(X_train, leaf_size=30, metric='euclidean')

# Query point
query_point = np.array([[0.5, 0.5]])

# Find the k-nearest neighbors
k = 4
distances, indices = kdt.query(query_point, k)

# Plotting
plt.figure(figsize=(8, 6))

# Plot training data points
plt.scatter(X_train[:, 0], X_train[:, 1], color='blue', label='Training Data')

# Plot query point
plt.scatter(query_point[0][0], query_point[0][1], color='red', label='Query Point')

# Plot k-nearest neighbors
nearest_neighbors = X_train[indices[0]]  # Extract the indices from the array
plt.scatter(nearest_neighbors[:, 0], nearest_neighbors[:, 1], color='green', label='Nearest Neighbors')

# Connect the query point to its nearest neighbors
for neighbor in nearest_neighbors:
    plt.plot([query_point[0][0], neighbor[0]], [query_point[0][1], neighbor[1]], color='gray', linestyle='--')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Visualization of KDTree KNN')
plt.legend()
plt.grid(True)
plt.show()
