Skip to content

Commit

Permalink
- A cute BK-Tree logo is added to README file and a small typo fixed.
Browse files Browse the repository at this point in the history
- `SEARCH-VALUE' and `COLLECT-SEARCH-RESULTS' is superseded by the freshly
  introduced more performant `SEARCH-VALUE'. Now it is possible to issue
  searches with `ORDERED-RESULTS' and/or `ORDERED-TRAVERSAL' options. (Per idea
  and blueprints by Pawel Turnau.)

- Class, condition, and variable definitions moved to a separate "specials.lisp"
  file.

- "util.lisp" is renamed to "utils.lisp".

- And lots of small cosmetic modifications in the code.
  • Loading branch information
vy committed Apr 7, 2009
1 parent e5b4ae4 commit f1b173c
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 141 deletions.
41 changes: 32 additions & 9 deletions README
@@ -1,4 +1,24 @@
-- '(A . B O U T) --------------------------------------------------------------

___ ___
/ /\ / /\
/ /::\ / /:/
/ /:/\:\ / /:/
/ /::\ \:\ / /::\____
/__/:/\:\_\:| /__/:/\:::::\
\ \:\ \:\/:/ \__\/~|:|~~~~ ___ ___ ___
\ \:\_\::/ | |:| ___ / /\ / /\ / /\
\ \:\/:/ | |:| /__/\ / /::\ / /::\ / /::\
\__\::/ |__|:| \ \:\ / /:/\:\ / /:/\:\ / /:/\:\
~~ \__\| \__\:\ / /::\ \:\ / /::\ \:\ / /::\ \:\
/ /::\ /__/:/\:\_\:\ /__/:/\:\ \:\ /__/:/\:\ \:\
/ /:/\:\ \__\/~|::\/:/ \ \:\ \:\_\/ \ \:\ \:\_\/
/ /:/__\/ | |:|::/ \ \:\ \:\ \ \:\ \:\
/__/:/ | |:|\/ \ \:\_\/ \ \:\_\/
\__\/ |__|:|~ \ \:\ \ \:\
\__\| \__\/ \__\/


-- '( A B O U T ) --------------------------------------------------------------

This program implements a derivative of BK-Tree data structure described in
"Some Approaches to Best-Match File Searching" paper of W. A. Burkhard and
Expand Down Expand Up @@ -41,7 +61,7 @@ There is no restriction on the type of the value which will be stored in the
tree, as long as you supply appropriate metric function.


-- '(P . E R F O R M A N C E) --------------------------------------------------
-- '( P E R F O R M A N C E ) --------------------------------------------------

Here is the results of a detailed test performed using BK-TREE package.

Expand Down Expand Up @@ -94,14 +114,16 @@ of this difference (which means no fluctuations in the difference) indicates the
stability of the convergence.


-- '(E . X A M P L E) ----------------------------------------------------------
-- '( E X A M P L E ) ----------------------------------------------------------

Here is an example about how to used supplied interface.

(defpackage :bk-tree-test (:use :cl :bk-tree))

(in-package :bk-tree-test)

(defvar *words* nil)

(defvar *tree* (make-instance 'bk-tree))

;; Build *WORDS* list.
Expand All @@ -118,9 +140,10 @@ Here is an example about how to used supplied interface.

;; Fill the *TREE*.
(mapc
(handler-case (insert-value word *tree*)
(duplicate-value (ctx)
(format t "Duplicated: ~a~%" (value-of ctx))))
(lambda (word)
(handler-case (insert-value word *tree*)
(duplicate-value (ctx)
(format t "Duplicated: ~a~%" (value-of ctx)))))
*words*)

;; Let's see that green tree.
Expand All @@ -131,7 +154,7 @@ Here is an example about how to used supplied interface.
(mapc
(lambda (result)
(format t "~a ~a~%" (distance-of result) (value-of result)))
(collect-search-results "kernel" *tree* :threshold 2)))
(search-value "kernel" *tree* :threshold 2)))

;; Test brute levenshtein.
(time
Expand All @@ -149,7 +172,7 @@ Here is an example about how to used supplied interface.
do (format t "~a ~a~%" distance value)))


-- '(C . A U T I O N S) --------------------------------------------------------
-- '( C A U T I O N S ) --------------------------------------------------------

For performance reasons, LEVENSHTEIN function coming with the package has some
limitations both on the input string and penalty costs.
Expand All @@ -165,7 +188,7 @@ limitations both on the input string and penalty costs.
Just in case, configure these variables suitable to your needs.


-- '(L . I C E N S E) ----------------------------------------------------------
-- '( L I C E N S E ) ----------------------------------------------------------

Copyright (c) 2007-2009, Volkan YAZICI <volkan.yazici@gmail.com>
All rights reserved.
Expand Down
5 changes: 3 additions & 2 deletions bk-tree.asd
Expand Up @@ -28,5 +28,6 @@
(asdf:defsystem :bk-tree
:serial t
:components ((:file "packages")
(:file "util")
(:file "bk-tree")))
(:file "specials")
(:file "utils")
(:file "bk-tree")))
226 changes: 98 additions & 128 deletions bk-tree.lisp
Expand Up @@ -25,54 +25,32 @@

(in-package :bk-tree)

(defclass bk-tree ()
((distance
:initform 0
:initarg :distance
:type unsigned-byte
:accessor distance-of
:documentation "Metric distance between current node and its parent.")
(value
:initform nil
:initarg :value
:accessor value-of)
(nodes
:initform nil
:initarg nodes
:type list
:accessor nodes-of
:documentation "Nodes collected under this node.")))

(defmethod print-object ((self bk-tree) stream)
(print-unreadable-object (self stream :type t :identity t)
(format stream ":DISTANCE ~D :VALUE ~S :NODES ~S"
(distance-of self)
(value-of self)
(mapcar #'value-of (nodes-of self)))))

(defclass search-result ()
((distance
:initarg :distance
:type unsigned-byte
:accessor distance-of)
(value
:initarg :value
:accessor value-of)))

(defmethod print-object ((self search-result) stream)
(print-unreadable-object (self stream :type t :identity t)
(format stream ":DISTANCE ~D :VALUE ~S"
(distance-of self)
(value-of self))))

(define-condition duplicate-value (error)
((value
:initarg :value
:accessor value-of))
(:documentation "Signaled upon every duplicated entry insertion."))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Convenient Helper Utilities
;;;

(defun print-tree (tree &key (stream *standard-output*) (depth 0))
"Prints supplied `TREE' in a human-readable(?) format."
;; Print current value first.
(format stream "~&")
(loop repeat depth do (write-char #\space stream))
(format stream "-> (~D) ~A" (distance-of tree) (value-of tree))
;; Iterate across sub-trees, according to their distances from root
;; node.
(mapc
(lambda (tree) (print-tree tree :stream stream :depth (+ 4 depth)))
(sort (copy-list (nodes-of tree)) #'< :key #'distance-of)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Tree Operations
;;;

(defun insert-value (value tree &key (metric #'levenshtein))
"Inserts given VALUE into supplied TREE."
"Inserts given `VALUE' into supplied `TREE'."
(if (null (value-of tree))
;; If tree has no value yet, insert value here.
(setf (value-of tree) value)
Expand All @@ -89,94 +67,86 @@
(push (make-instance 'bk-tree :distance distance :value value)
(nodes-of tree)))))))

(defun search-value (value tree search-exhausted submit-result
&key (threshold 1) (metric #'levenshtein))
"Searches given VALUE in the supplied TREE. Calls SUBMIT-RESULT function
during every found result satisfying supplied THRESHOLD. Finalizes search via
calling SEARCH-EXHAUSTED when THRESHOLD could not be satisfied anymore."
(let ((distance (funcall metric value (value-of tree))))
;; Check if there is any sub-trees available to search.
(unless (endp (nodes-of tree))
(loop for (sub-distance . sub-tree) in
;; Sort available sub-trees collected under this
;; tree, according to absolute difference between
;; DISTANCE and SUB-DISTANCE.
(sort (mapcar
(lambda (sub-tree)
;; (SUB-DISTANCE . SUB-TREE) pairs.
(cons (abs (- distance (distance-of sub-tree)))
sub-tree))
(nodes-of tree))
#'<
:key #'car)
;; Scan sub-trees.
while (<= sub-distance threshold)
do (block sub-tree-search
(flet ((sub-search-exhausted ()
(return-from sub-tree-search)))
(search-value value
sub-tree
#'sub-search-exhausted
submit-result
:threshold threshold
:metric metric)))))
;; After scanning sub-trees, if appropriate, submit this one.
(if (<= distance threshold)
;; Submit current node.
(funcall submit-result distance (value-of tree))
;; If threshold limit is not reached, then search is exhausted.
(funcall search-exhausted))))

(defun collect-search-results (value tree &rest keys &key (limit 50) &allow-other-keys)
"Convenient function to search supplied VALUE in the specified TREE. Function
returns a list of SEARCH-RESULT objects."
(let ((count 0)
results)
(labels ((search-exhausted ()
"Return from the outmost stack with collected results so far."
(return-from collect-search-results (nreverse results)))
(submit-result (distance value)
"Submit a found result."
(push
(make-instance 'search-result :distance distance :value value)
results)
(incf count)
(if (not (< count limit))
(search-exhausted))))
(apply #'search-value
value tree #'search-exhausted #'submit-result
:allow-other-keys t keys)
(search-exhausted))))

(defun print-tree (tree &key (stream *standard-output*) (depth 0))
"Prints supplied BK-TREE in a human-readable format."
;; Print current value first.
(format stream "~&")
(loop repeat depth do (write-char #\space stream))
(format stream "-> (~d) ~a" (distance-of tree) (value-of tree))
;; Iterate across sub-trees, according to their distances from root
;; node.
(mapc
(lambda (tree) (print-tree tree :stream stream :depth (+ 4 depth)))
(sort (copy-list (nodes-of tree)) #'< :key #'distance-of)))
(defun search-value (value tree &key (threshold 1) (metric #'levenshtein)
limit ordered-results ordered-traversal)
"Return a list of `SEARCH-RESULT' instances built from `TREE' and its children
whose value is no more distant from `VALUE' than `THRESHOLD', using `METRIC' to
measure the distance.
If `LIMIT' is non-NIL, given number of first found results will be
returned.
If `ORDERED-RESULTS' is non-NIL, returned results will be ordered according to
their distances from `VALUES'.
If `ORDERED-TRAVERSAL' is non-NIL, candidates in a level (e.g. children of a
validated node) will be traversed in sorted order according to the absolute
difference between the parent distance and child distance -- the (probably) more
similar is first."
(when (and tree (value-of tree))
(let ((results)
(count 0))
(labels ((%search (tree)
(unless (and limit (< limit count))
(let ((distance (funcall metric value (value-of tree))))
;; Try to submit this result first.
(unless (< threshold distance)
(push (make-instance 'search-result
:distance distance
:value (value-of tree))
results)
(incf count))
;; Scan children.
(if ordered-traversal
;; Sort available sub-trees collected under
;; this tree, according to absolute difference
;; between DISTANCE and SUB-DISTANCE.
(loop for (sub-distance . sub-tree)
in (sort
;; (SUB-DISTANCE . SUB-TREE) pairs.
(mapcar
(lambda (sub-tree)
(cons (abs (- distance
(distance-of sub-tree)))
sub-tree))
(nodes-of tree))
#'< :key #'car)
until (or (< threshold sub-distance)
(and limit (< limit count)))
do (%search sub-tree))
;; Or scan children without an ordering.
(loop for sub-tree in (nodes-of tree)
until (and limit (< limit count))
do (%search sub-tree)))))))
(%search tree))
(if ordered-results
(sort results #'< :key #'distance-of)
results))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Tree Statistics Routines
;;;

(defun maximum-depth (tree)
"Returns maximum depth of the TREE."
(labels ((scan (tree depth)
"Returns maximum depth of the `TREE'."
(labels ((%scan (tree depth)
(max depth
(loop for node in (nodes-of tree)
maximize (scan node (1+ depth))))))
(scan tree 0)))
maximize (%scan node (1+ depth))))))
(%scan tree 0)))

(defun average-children-count (tree)
"Returns average children count per node of the supplied TREE."
"Returns average children count per node of the supplied `TREE'."
(let ((n-children 0)
(n-node 0))
(labels ((scan (tree)
(n-nodes 0))
(labels ((%scan (tree)
;; Count children, unless this is a leaf node.
(unless (null (nodes-of tree))
(incf n-node)
(incf n-children (length (nodes-of tree)))
(mapc #'scan (nodes-of tree)))))
(scan tree))
(if (zerop n-node) 0 (/ n-children n-node))))
(incf n-nodes)
(dolist (node (nodes-of tree))
(incf n-children)
(%scan node)))))
(%scan tree))
(if (zerop n-nodes) 0 (/ n-children n-nodes))))
7 changes: 6 additions & 1 deletion packages.lisp
Expand Up @@ -25,6 +25,12 @@

(in-package :cl-user)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Package Definitions
;;;

(defpackage :bk-tree
(:use :cl)
(:export :bk-tree
Expand All @@ -35,7 +41,6 @@
:search-result
:insert-value
:search-value
:collect-search-results
:print-tree
:maximum-depth
:average-children-count
Expand Down

0 comments on commit f1b173c

Please sign in to comment.