Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

- A cute BK-Tree logo is added to README file and a small typo fixed.

- `SEARCH-VALUE' and `COLLECT-SEARCH-RESULTS' is superseded by the freshly
  introduced more performant `SEARCH-VALUE'. Now it is possible to issue
  searches with `ORDERED-RESULTS' and/or `ORDERED-TRAVERSAL' options. (Per idea
  and blueprints by Pawel Turnau.)

- Class, condition, and variable definitions moved to a separate "specials.lisp"
  file.

- "util.lisp" is renamed to "utils.lisp".

- And lots of small cosmetic modifications in the code.
  • Loading branch information...
commit f1b173c385fd5b18b051ff68b792b9d5bb366828 1 parent e5b4ae4
@vy authored
View
41 README
@@ -1,4 +1,24 @@
--- '(A . B O U T) --------------------------------------------------------------
+
+ ___ ___
+ / /\ / /\
+ / /::\ / /:/
+ / /:/\:\ / /:/
+ / /::\ \:\ / /::\____
+ /__/:/\:\_\:| /__/:/\:::::\
+ \ \:\ \:\/:/ \__\/~|:|~~~~ ___ ___ ___
+ \ \:\_\::/ | |:| ___ / /\ / /\ / /\
+ \ \:\/:/ | |:| /__/\ / /::\ / /::\ / /::\
+ \__\::/ |__|:| \ \:\ / /:/\:\ / /:/\:\ / /:/\:\
+ ~~ \__\| \__\:\ / /::\ \:\ / /::\ \:\ / /::\ \:\
+ / /::\ /__/:/\:\_\:\ /__/:/\:\ \:\ /__/:/\:\ \:\
+ / /:/\:\ \__\/~|::\/:/ \ \:\ \:\_\/ \ \:\ \:\_\/
+ / /:/__\/ | |:|::/ \ \:\ \:\ \ \:\ \:\
+ /__/:/ | |:|\/ \ \:\_\/ \ \:\_\/
+ \__\/ |__|:|~ \ \:\ \ \:\
+ \__\| \__\/ \__\/
+
+
+-- '( A B O U T ) --------------------------------------------------------------
This program implements a derivative of BK-Tree data structure described in
"Some Approaches to Best-Match File Searching" paper of W. A. Burkhard and
@@ -41,7 +61,7 @@ There is no restriction on the type of the value which will be stored in the
tree, as long as you supply appropriate metric function.
--- '(P . E R F O R M A N C E) --------------------------------------------------
+-- '( P E R F O R M A N C E ) --------------------------------------------------
Here is the results of a detailed test performed using BK-TREE package.
@@ -94,14 +114,16 @@ of this difference (which means no fluctuations in the difference) indicates the
stability of the convergence.
--- '(E . X A M P L E) ----------------------------------------------------------
+-- '( E X A M P L E ) ----------------------------------------------------------
Here is an example about how to used supplied interface.
(defpackage :bk-tree-test (:use :cl :bk-tree))
+
(in-package :bk-tree-test)
(defvar *words* nil)
+
(defvar *tree* (make-instance 'bk-tree))
;; Build *WORDS* list.
@@ -118,9 +140,10 @@ Here is an example about how to used supplied interface.
;; Fill the *TREE*.
(mapc
- (handler-case (insert-value word *tree*)
- (duplicate-value (ctx)
- (format t "Duplicated: ~a~%" (value-of ctx))))
+ (lambda (word)
+ (handler-case (insert-value word *tree*)
+ (duplicate-value (ctx)
+ (format t "Duplicated: ~a~%" (value-of ctx)))))
*words*)
;; Let's see that green tree.
@@ -131,7 +154,7 @@ Here is an example about how to used supplied interface.
(mapc
(lambda (result)
(format t "~a ~a~%" (distance-of result) (value-of result)))
- (collect-search-results "kernel" *tree* :threshold 2)))
+ (search-value "kernel" *tree* :threshold 2)))
;; Test brute levenshtein.
(time
@@ -149,7 +172,7 @@ Here is an example about how to used supplied interface.
do (format t "~a ~a~%" distance value)))
--- '(C . A U T I O N S) --------------------------------------------------------
+-- '( C A U T I O N S ) --------------------------------------------------------
For performance reasons, LEVENSHTEIN function coming with the package has some
limitations both on the input string and penalty costs.
@@ -165,7 +188,7 @@ limitations both on the input string and penalty costs.
Just in case, configure these variables suitable to your needs.
--- '(L . I C E N S E) ----------------------------------------------------------
+-- '( L I C E N S E ) ----------------------------------------------------------
Copyright (c) 2007-2009, Volkan YAZICI <volkan.yazici@gmail.com>
All rights reserved.
View
5 bk-tree.asd
@@ -28,5 +28,6 @@
(asdf:defsystem :bk-tree
:serial t
:components ((:file "packages")
- (:file "util")
- (:file "bk-tree")))
+ (:file "specials")
+ (:file "utils")
+ (:file "bk-tree")))
View
226 bk-tree.lisp
@@ -25,54 +25,32 @@
(in-package :bk-tree)
-(defclass bk-tree ()
- ((distance
- :initform 0
- :initarg :distance
- :type unsigned-byte
- :accessor distance-of
- :documentation "Metric distance between current node and its parent.")
- (value
- :initform nil
- :initarg :value
- :accessor value-of)
- (nodes
- :initform nil
- :initarg nodes
- :type list
- :accessor nodes-of
- :documentation "Nodes collected under this node.")))
-
-(defmethod print-object ((self bk-tree) stream)
- (print-unreadable-object (self stream :type t :identity t)
- (format stream ":DISTANCE ~D :VALUE ~S :NODES ~S"
- (distance-of self)
- (value-of self)
- (mapcar #'value-of (nodes-of self)))))
-
-(defclass search-result ()
- ((distance
- :initarg :distance
- :type unsigned-byte
- :accessor distance-of)
- (value
- :initarg :value
- :accessor value-of)))
-
-(defmethod print-object ((self search-result) stream)
- (print-unreadable-object (self stream :type t :identity t)
- (format stream ":DISTANCE ~D :VALUE ~S"
- (distance-of self)
- (value-of self))))
-
-(define-condition duplicate-value (error)
- ((value
- :initarg :value
- :accessor value-of))
- (:documentation "Signaled upon every duplicated entry insertion."))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Convenient Helper Utilities
+;;;
+
+(defun print-tree (tree &key (stream *standard-output*) (depth 0))
+ "Prints supplied `TREE' in a human-readable(?) format."
+ ;; Print current value first.
+ (format stream "~&")
+ (loop repeat depth do (write-char #\space stream))
+ (format stream "-> (~D) ~A" (distance-of tree) (value-of tree))
+ ;; Iterate across sub-trees, according to their distances from root
+ ;; node.
+ (mapc
+ (lambda (tree) (print-tree tree :stream stream :depth (+ 4 depth)))
+ (sort (copy-list (nodes-of tree)) #'< :key #'distance-of)))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Tree Operations
+;;;
(defun insert-value (value tree &key (metric #'levenshtein))
- "Inserts given VALUE into supplied TREE."
+ "Inserts given `VALUE' into supplied `TREE'."
(if (null (value-of tree))
;; If tree has no value yet, insert value here.
(setf (value-of tree) value)
@@ -89,94 +67,86 @@
(push (make-instance 'bk-tree :distance distance :value value)
(nodes-of tree)))))))
-(defun search-value (value tree search-exhausted submit-result
- &key (threshold 1) (metric #'levenshtein))
- "Searches given VALUE in the supplied TREE. Calls SUBMIT-RESULT function
-during every found result satisfying supplied THRESHOLD. Finalizes search via
-calling SEARCH-EXHAUSTED when THRESHOLD could not be satisfied anymore."
- (let ((distance (funcall metric value (value-of tree))))
- ;; Check if there is any sub-trees available to search.
- (unless (endp (nodes-of tree))
- (loop for (sub-distance . sub-tree) in
- ;; Sort available sub-trees collected under this
- ;; tree, according to absolute difference between
- ;; DISTANCE and SUB-DISTANCE.
- (sort (mapcar
- (lambda (sub-tree)
- ;; (SUB-DISTANCE . SUB-TREE) pairs.
- (cons (abs (- distance (distance-of sub-tree)))
- sub-tree))
- (nodes-of tree))
- #'<
- :key #'car)
- ;; Scan sub-trees.
- while (<= sub-distance threshold)
- do (block sub-tree-search
- (flet ((sub-search-exhausted ()
- (return-from sub-tree-search)))
- (search-value value
- sub-tree
- #'sub-search-exhausted
- submit-result
- :threshold threshold
- :metric metric)))))
- ;; After scanning sub-trees, if appropriate, submit this one.
- (if (<= distance threshold)
- ;; Submit current node.
- (funcall submit-result distance (value-of tree))
- ;; If threshold limit is not reached, then search is exhausted.
- (funcall search-exhausted))))
-
-(defun collect-search-results (value tree &rest keys &key (limit 50) &allow-other-keys)
- "Convenient function to search supplied VALUE in the specified TREE. Function
-returns a list of SEARCH-RESULT objects."
- (let ((count 0)
- results)
- (labels ((search-exhausted ()
- "Return from the outmost stack with collected results so far."
- (return-from collect-search-results (nreverse results)))
- (submit-result (distance value)
- "Submit a found result."
- (push
- (make-instance 'search-result :distance distance :value value)
- results)
- (incf count)
- (if (not (< count limit))
- (search-exhausted))))
- (apply #'search-value
- value tree #'search-exhausted #'submit-result
- :allow-other-keys t keys)
- (search-exhausted))))
-
-(defun print-tree (tree &key (stream *standard-output*) (depth 0))
- "Prints supplied BK-TREE in a human-readable format."
- ;; Print current value first.
- (format stream "~&")
- (loop repeat depth do (write-char #\space stream))
- (format stream "-> (~d) ~a" (distance-of tree) (value-of tree))
- ;; Iterate across sub-trees, according to their distances from root
- ;; node.
- (mapc
- (lambda (tree) (print-tree tree :stream stream :depth (+ 4 depth)))
- (sort (copy-list (nodes-of tree)) #'< :key #'distance-of)))
+(defun search-value (value tree &key (threshold 1) (metric #'levenshtein)
+ limit ordered-results ordered-traversal)
+ "Return a list of `SEARCH-RESULT' instances built from `TREE' and its children
+whose value is no more distant from `VALUE' than `THRESHOLD', using `METRIC' to
+measure the distance.
+
+If `LIMIT' is non-NIL, given number of first found results will be
+returned.
+
+If `ORDERED-RESULTS' is non-NIL, returned results will be ordered according to
+their distances from `VALUES'.
+
+If `ORDERED-TRAVERSAL' is non-NIL, candidates in a level (e.g. children of a
+validated node) will be traversed in sorted order according to the absolute
+difference between the parent distance and child distance -- the (probably) more
+similar is first."
+ (when (and tree (value-of tree))
+ (let ((results)
+ (count 0))
+ (labels ((%search (tree)
+ (unless (and limit (< limit count))
+ (let ((distance (funcall metric value (value-of tree))))
+ ;; Try to submit this result first.
+ (unless (< threshold distance)
+ (push (make-instance 'search-result
+ :distance distance
+ :value (value-of tree))
+ results)
+ (incf count))
+ ;; Scan children.
+ (if ordered-traversal
+ ;; Sort available sub-trees collected under
+ ;; this tree, according to absolute difference
+ ;; between DISTANCE and SUB-DISTANCE.
+ (loop for (sub-distance . sub-tree)
+ in (sort
+ ;; (SUB-DISTANCE . SUB-TREE) pairs.
+ (mapcar
+ (lambda (sub-tree)
+ (cons (abs (- distance
+ (distance-of sub-tree)))
+ sub-tree))
+ (nodes-of tree))
+ #'< :key #'car)
+ until (or (< threshold sub-distance)
+ (and limit (< limit count)))
+ do (%search sub-tree))
+ ;; Or scan children without an ordering.
+ (loop for sub-tree in (nodes-of tree)
+ until (and limit (< limit count))
+ do (%search sub-tree)))))))
+ (%search tree))
+ (if ordered-results
+ (sort results #'< :key #'distance-of)
+ results))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Tree Statistics Routines
+;;;
(defun maximum-depth (tree)
- "Returns maximum depth of the TREE."
- (labels ((scan (tree depth)
+ "Returns maximum depth of the `TREE'."
+ (labels ((%scan (tree depth)
(max depth
(loop for node in (nodes-of tree)
- maximize (scan node (1+ depth))))))
- (scan tree 0)))
+ maximize (%scan node (1+ depth))))))
+ (%scan tree 0)))
(defun average-children-count (tree)
- "Returns average children count per node of the supplied TREE."
+ "Returns average children count per node of the supplied `TREE'."
(let ((n-children 0)
- (n-node 0))
- (labels ((scan (tree)
+ (n-nodes 0))
+ (labels ((%scan (tree)
;; Count children, unless this is a leaf node.
(unless (null (nodes-of tree))
- (incf n-node)
- (incf n-children (length (nodes-of tree)))
- (mapc #'scan (nodes-of tree)))))
- (scan tree))
- (if (zerop n-node) 0 (/ n-children n-node))))
+ (incf n-nodes)
+ (dolist (node (nodes-of tree))
+ (incf n-children)
+ (%scan node)))))
+ (%scan tree))
+ (if (zerop n-nodes) 0 (/ n-children n-nodes))))
View
7 packages.lisp
@@ -25,6 +25,12 @@
(in-package :cl-user)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Package Definitions
+;;;
+
(defpackage :bk-tree
(:use :cl)
(:export :bk-tree
@@ -35,7 +41,6 @@
:search-result
:insert-value
:search-value
- :collect-search-results
:print-tree
:maximum-depth
:average-children-count
View
53 specials.lisp
@@ -0,0 +1,53 @@
+(in-package :bk-tree)
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Common Class, Condition & Variable Definitions
+;;;
+
+(defclass bk-tree ()
+ ((distance
+ :initform 0
+ :initarg :distance
+ :type unsigned-byte
+ :accessor distance-of
+ :documentation "Metric distance between current node and its parent.")
+ (value
+ :initform nil
+ :initarg :value
+ :accessor value-of)
+ (nodes
+ :initform nil
+ :initarg nodes
+ :type list
+ :accessor nodes-of
+ :documentation "Nodes collected under this node.")))
+
+(defmethod print-object ((self bk-tree) stream)
+ (print-unreadable-object (self stream :type t :identity t)
+ (format stream ":DISTANCE ~D :VALUE ~S :NODES ~S"
+ (distance-of self)
+ (value-of self)
+ (mapcar #'value-of (nodes-of self)))))
+
+(defclass search-result ()
+ ((distance
+ :initarg :distance
+ :type unsigned-byte
+ :accessor distance-of)
+ (value
+ :initarg :value
+ :accessor value-of)))
+
+(defmethod print-object ((self search-result) stream)
+ (print-unreadable-object (self stream :type t :identity t)
+ (format stream ":DISTANCE ~D :VALUE ~S"
+ (distance-of self)
+ (value-of self))))
+
+(define-condition duplicate-value (error)
+ ((value
+ :initarg :value
+ :accessor value-of))
+ (:documentation "Signaled upon every duplicated entry insertion."))
View
8 util.lisp → utils.lisp
@@ -25,6 +25,12 @@
(in-package :bk-tree)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Levenshtein Metric Related Definitions
+;;;
+
(deftype levenshtein-cost ()
"Available penalty costs."
'(integer 0 7))
@@ -74,4 +80,4 @@
finally (rotatef prev curr)))
;; Because the final value was swapped from the previous row to the
;; current row, that's where we'll find it.
- (elt prev (1- ls)))))))
+ (elt prev (1- ls)))))))
Please sign in to comment.
Something went wrong with that request. Please try again.