# kdTree 

In [28]:
from collections import namedtuple
from operator import itemgetter
from pprint import pformat
import numpy as np
import math

def distance(x, y, p=2):
    # x1 = [1, 1], x2 = [5,1]
    if len(x) == len(y) and len(x) > 1:
        sum = 0
        for i in range(len(x)):
            sum += math.pow(abs(x[i] - y[i]), p)
        return math.pow(sum, 1/p)
    else:
        return 0
    
class Node(namedtuple('Node', 'location left_child right_child')):
    def __repr__(self):
        return pformat(tuple(self))
    
class kdTree():
    def __init__(self, points):
        self.tree = self._make_kdtree(points)
        if len(points) > 0:
            self.k = len(points[0])
        else:
            self.k = None
    
    def _make_kdtree(self, points, depth=0):
        if not points:
            return None
        
        k = len(points[0])
        axis = depth % k
        
        points.sort(key = itemgetter(axis))
        median = len(points) // 2
        
        return Node(location = points[median], 
                   left_child = self._make_kdtree(points[:median], depth+1),
                   right_child = self._make_kdtree(points[median+1:], depth+1))
    
    def find_nearest(self, point, root=None, axis=0):
        if root is None:
            root = self.tree
            self.best = None
        
        # 找到临近的叶子节点
        if root.left_child or root.right_child:
            new_axis = (axis + 1) % self.k
            if point[axis] < root.location[axis] and root.left_child:
                self.find_nearest(point, root.left_child, new_axis)
            elif root.right_child:
                self.find_nearest(point, root.right_child, new_axis)
        # 回溯 
        dist = distance(root.location, point)
        if self.best is None or dist < self.best[0]:
            self.best = (dist, root.location)
        
        if abs(point[axis] - root.location[axis]) < self.best[0]:
            new_axis = (axis + 1) % self.k
            if root.left_child and point[axis] >= root.location[axis]:
                self.find_nearest(point, root.left_child, new_axis)
            elif root.right_child and point[axis] < root.location[axis]:
                self.find_nearest(point, root.right_child, new_axis)
        
        return self.best  

In [29]:
data = [[2,3],[5,4],[9,6],[4,7],[8,1],[7,2]]
kd = kdTree(data)

point = np.array([3,4.5])
print( kd.find_nearest(point) )


(1.8027756377319946, [2, 3])
