//---------------------------------------------------------------------- // File: bd_tree.cpp // Programmer: David Mount // Description: Basic methods for bd-trees. // Last modified: 01/04/05 (Version 1.0) //---------------------------------------------------------------------- // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and // David Mount. All Rights Reserved. // // This software and related documentation is part of the Approximate // Nearest Neighbor Library (ANN). This software is provided under // the provisions of the Lesser GNU Public License (LGPL). See the // file ../ReadMe.txt for further information. // // The University of Maryland (U.M.) and the authors make no // representations about the suitability or fitness of this software for // any purpose. It is provided "as is" without express or implied // warranty. //---------------------------------------------------------------------- // History: // Revision 0.1 03/04/98 // Initial release // Revision l.0 04/01/05 // Fixed centroid shrink threshold condition to depend on the // dimension. // Moved dump routine to kd_dump.cpp. //---------------------------------------------------------------------- #include "bd_tree.hpp" // bd-tree declarations #include "kd_util.hpp" // kd-tree utilities #include "kd_split.hpp" // kd-tree splitting rules #include "ann_perf.hpp" // performance evaluation //---------------------------------------------------------------------- // Printing a bd-tree // These routines print a bd-tree. See the analogous procedure // in kd_tree.cpp for more information. //---------------------------------------------------------------------- void ANNbd_shrink::print( // print shrinking node int level, // depth of node in tree ostream &out) // output stream { child[ANN_OUT]->print(level+1, out); // print out-child out << " "; for (int i = 0; i < level; i++) // print indentation out << ".."; out << "Shrink"; for (int j = 0; j < n_bnds; j++) { // print sides, 2 per line if (j % 2 == 0) { out << "\n"; // newline and indentation for (int i = 0; i < level+2; i++) out << " "; } out << " ([" << bnds[j].cd << "]" << (bnds[j].sd > 0 ? ">=" : "< ") << bnds[j].cv << ")"; } out << "\n"; child[ANN_IN]->print(level+1, out); // print in-child } //---------------------------------------------------------------------- // kd_tree statistics utility (for performance evaluation) // This routine computes various statistics information for // shrinking nodes. See file kd_tree.cpp for more information. //---------------------------------------------------------------------- void ANNbd_shrink::getStats( // get subtree statistics int dim, // dimension of space ANNkdStats &st, // stats (modified) ANNorthRect &bnd_box) // bounding box { ANNkdStats ch_stats; // stats for children ANNorthRect inner_box(dim); // inner box of shrink annBnds2Box(bnd_box, // enclosing box dim, // dimension n_bnds, // number of bounds bnds, // bounds array inner_box); // inner box (modified) // get stats for inner child ch_stats.reset(); // reset child[ANN_IN]->getStats(dim, ch_stats, inner_box); st.merge(ch_stats); // merge them // get stats for outer child ch_stats.reset(); // reset child[ANN_OUT]->getStats(dim, ch_stats, bnd_box); st.merge(ch_stats); // merge them st.depth++; // increment depth st.n_shr++; // increment number of shrinks } //---------------------------------------------------------------------- // bd-tree constructor // This is the main constructor for bd-trees given a set of points. // It first builds a skeleton kd-tree as a basis, then computes the // bounding box of the data points, and then invokes rbd_tree() to // actually build the tree, passing it the appropriate splitting // and shrinking information. //---------------------------------------------------------------------- ANNkd_ptr rbd_tree( // recursive construction of bd-tree ANNpointArray pa, // point array ANNidxArray pidx, // point indices to store in subtree int n, // number of points int dim, // dimension of space int bsp, // bucket space ANNorthRect &bnd_box, // bounding box for current node ANNkd_splitter splitter, // splitting routine ANNshrinkRule shrink); // shrinking rule ANNbd_tree::ANNbd_tree( // construct from point array ANNpointArray pa, // point array (with at least n pts) int n, // number of points int dd, // dimension int bs, // bucket size ANNsplitRule split, // splitting rule ANNshrinkRule shrink) // shrinking rule : ANNkd_tree(n, dd, bs) // build skeleton base tree { pts = pa; // where the points are if (n == 0) return; // no points--no sweat ANNorthRect bnd_box(dd); // bounding box for points // construct bounding rectangle annEnclRect(pa, pidx, n, dd, bnd_box); // copy to tree structure bnd_box_lo = annCopyPt(dd, bnd_box.lo); bnd_box_hi = annCopyPt(dd, bnd_box.hi); switch (split) { // build by rule case ANN_KD_STD: // standard kd-splitting rule root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, kd_split, shrink); break; case ANN_KD_MIDPT: // midpoint split root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, midpt_split, shrink); break; case ANN_KD_SUGGEST: // best (in our opinion) case ANN_KD_SL_MIDPT: // sliding midpoint split root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_midpt_split, shrink); break; case ANN_KD_FAIR: // fair split root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, fair_split, shrink); break; case ANN_KD_SL_FAIR: // sliding fair split root = rbd_tree(pa, pidx, n, dd, bs, bnd_box, sl_fair_split, shrink); break; default: annError("Illegal splitting method", ANNabort); } } //---------------------------------------------------------------------- // Shrinking rules //---------------------------------------------------------------------- enum ANNdecomp {SPLIT, SHRINK}; // decomposition methods //---------------------------------------------------------------------- // trySimpleShrink - Attempt a simple shrink // // We compute the tight bounding box of the points, and compute // the 2*dim ``gaps'' between the sides of the tight box and the // bounding box. If any of the gaps is large enough relative to // the longest side of the tight bounding box, then we shrink // all sides whose gaps are large enough. (The reason for // comparing against the tight bounding box, is that after // shrinking the longest box size will decrease, and if we use // the standard bounding box, we may decide to shrink twice in // a row. Since the tight box is fixed, we cannot shrink twice // consecutively.) //---------------------------------------------------------------------- const float BD_GAP_THRESH = 0.5; // gap threshold (must be < 1) const int BD_CT_THRESH = 2; // min number of shrink sides ANNdecomp trySimpleShrink( // try a simple shrink ANNpointArray pa, // point array ANNidxArray pidx, // point indices to store in subtree int n, // number of points int dim, // dimension of space const ANNorthRect &bnd_box, // current bounding box ANNorthRect &inner_box) // inner box if shrinking (returned) { int i; // compute tight bounding box annEnclRect(pa, pidx, n, dim, inner_box); ANNcoord max_length = 0; // find longest box side for (i = 0; i < dim; i++) { ANNcoord length = inner_box.hi[i] - inner_box.lo[i]; if (length > max_length) { max_length = length; } } int shrink_ct = 0; // number of sides we shrunk for (i = 0; i < dim; i++) { // select which sides to shrink // gap between boxes ANNcoord gap_hi = bnd_box.hi[i] - inner_box.hi[i]; // big enough gap to shrink? if (gap_hi < max_length*BD_GAP_THRESH) inner_box.hi[i] = bnd_box.hi[i]; // no - expand else shrink_ct++; // yes - shrink this side // repeat for high side ANNcoord gap_lo = inner_box.lo[i] - bnd_box.lo[i]; if (gap_lo < max_length*BD_GAP_THRESH) inner_box.lo[i] = bnd_box.lo[i]; // no - expand else shrink_ct++; // yes - shrink this side } if (shrink_ct >= BD_CT_THRESH) // did we shrink enough sides? return SHRINK; else return SPLIT; } //---------------------------------------------------------------------- // tryCentroidShrink - Attempt a centroid shrink // // We repeatedly apply the splitting rule, always to the larger subset // of points, until the number of points decreases by the constant // fraction BD_FRACTION. If this takes more than dim*BD_MAX_SPLIT_FAC // splits for this to happen, then we shrink to the final inner box // Otherwise we split. //---------------------------------------------------------------------- const float BD_MAX_SPLIT_FAC = 0.5; // maximum number of splits allowed const float BD_FRACTION = 0.5; // ...to reduce points by this fraction // ...This must be < 1. ANNdecomp tryCentroidShrink( // try a centroid shrink ANNpointArray pa, // point array ANNidxArray pidx, // point indices to store in subtree int n, // number of points int dim, // dimension of space const ANNorthRect &bnd_box, // current bounding box ANNkd_splitter splitter, // splitting procedure ANNorthRect &inner_box) // inner box if shrinking (returned) { int n_sub = n; // number of points in subset int n_goal = (int) (n*BD_FRACTION); // number of point in goal int n_splits = 0; // number of splits needed // initialize inner box to bounding box annAssignRect(dim, inner_box, bnd_box); while (n_sub > n_goal) { // keep splitting until goal reached int cd; // cut dim from splitter (ignored) ANNcoord cv; // cut value from splitter (ignored) int n_lo; // number of points on low side // invoke splitting procedure (*splitter)(pa, pidx, inner_box, n_sub, dim, cd, cv, n_lo); n_splits++; // increment split count if (n_lo >= n_sub/2) { // most points on low side inner_box.hi[cd] = cv; // collapse high side n_sub = n_lo; // recurse on lower points } else { // most points on high side inner_box.lo[cd] = cv; // collapse low side pidx += n_lo; // recurse on higher points n_sub -= n_lo; } } if (n_splits > dim*BD_MAX_SPLIT_FAC)// took too many splits return SHRINK; // shrink to final subset else return SPLIT; } //---------------------------------------------------------------------- // selectDecomp - select which decomposition to use //---------------------------------------------------------------------- ANNdecomp selectDecomp( // select decomposition method ANNpointArray pa, // point array ANNidxArray pidx, // point indices to store in subtree int n, // number of points int dim, // dimension of space const ANNorthRect &bnd_box, // current bounding box ANNkd_splitter splitter, // splitting procedure ANNshrinkRule shrink, // shrinking rule ANNorthRect &inner_box) // inner box if shrinking (returned) { ANNdecomp decomp = SPLIT; // decomposition switch (shrink) { // check shrinking rule case ANN_BD_NONE: // no shrinking allowed decomp = SPLIT; break; case ANN_BD_SUGGEST: // author's suggestion case ANN_BD_SIMPLE: // simple shrink decomp = trySimpleShrink( pa, pidx, // points and indices n, dim, // number of points and dimension bnd_box, // current bounding box inner_box); // inner box if shrinking (returned) break; case ANN_BD_CENTROID: // centroid shrink decomp = tryCentroidShrink( pa, pidx, // points and indices n, dim, // number of points and dimension bnd_box, // current bounding box splitter, // splitting procedure inner_box); // inner box if shrinking (returned) break; default: annError("Illegal shrinking rule", ANNabort); } return decomp; } //---------------------------------------------------------------------- // rbd_tree - recursive procedure to build a bd-tree // // This is analogous to rkd_tree, but for bd-trees. See the // procedure rkd_tree() in kd_split.cpp for more information. // // If the number of points falls below the bucket size, then a // leaf node is created for the points. Otherwise we invoke the // procedure selectDecomp() which determines whether we are to // split or shrink. If splitting is chosen, then we essentially // do exactly as rkd_tree() would, and invoke the specified // splitting procedure to the points. Otherwise, the selection // procedure returns a bounding box, from which we extract the // appropriate shrinking bounds, and create a shrinking node. // Finally the points are subdivided, and the procedure is // invoked recursively on the two subsets to form the children. //---------------------------------------------------------------------- ANNkd_ptr rbd_tree( // recursive construction of bd-tree ANNpointArray pa, // point array ANNidxArray pidx, // point indices to store in subtree int n, // number of points int dim, // dimension of space int bsp, // bucket space ANNorthRect &bnd_box, // bounding box for current node ANNkd_splitter splitter, // splitting routine ANNshrinkRule shrink) // shrinking rule { ANNdecomp decomp; // decomposition method ANNorthRect inner_box(dim); // inner box (if shrinking) if (n <= bsp) { // n small, make a leaf node if (n == 0) // empty leaf node return KD_TRIVIAL; // return (canonical) empty leaf else // construct the node and return return new ANNkd_leaf(n, pidx); } decomp = selectDecomp( // select decomposition method pa, pidx, // points and indices n, dim, // number of points and dimension bnd_box, // current bounding box splitter, shrink, // splitting/shrinking methods inner_box); // inner box if shrinking (returned) if (decomp == SPLIT) { // split selected int cd; // cutting dimension ANNcoord cv; // cutting value int n_lo; // number on low side of cut // invoke splitting procedure (*splitter)(pa, pidx, bnd_box, n, dim, cd, cv, n_lo); ANNcoord lv = bnd_box.lo[cd]; // save bounds for cutting dimension ANNcoord hv = bnd_box.hi[cd]; bnd_box.hi[cd] = cv; // modify bounds for left subtree ANNkd_ptr lo = rbd_tree( // build left subtree pa, pidx, n_lo, // ...from pidx[0..n_lo-1] dim, bsp, bnd_box, splitter, shrink); bnd_box.hi[cd] = hv; // restore bounds bnd_box.lo[cd] = cv; // modify bounds for right subtree ANNkd_ptr hi = rbd_tree( // build right subtree pa, pidx + n_lo, n-n_lo,// ...from pidx[n_lo..n-1] dim, bsp, bnd_box, splitter, shrink); bnd_box.lo[cd] = lv; // restore bounds // create the splitting node return new ANNkd_split(cd, cv, lv, hv, lo, hi); } else { // shrink selected int n_in; // number of points in box int n_bnds; // number of bounding sides annBoxSplit( // split points around inner box pa, // points to split pidx, // point indices n, // number of points dim, // dimension inner_box, // inner box n_in); // number of points inside (returned) ANNkd_ptr in = rbd_tree( // build inner subtree pidx[0..n_in-1] pa, pidx, n_in, dim, bsp, inner_box, splitter, shrink); ANNkd_ptr out = rbd_tree( // build outer subtree pidx[n_in..n] pa, pidx+n_in, n - n_in, dim, bsp, bnd_box, splitter, shrink); ANNorthHSArray bnds = NULL; // bounds (alloc in Box2Bnds and // ...freed in bd_shrink destroyer) annBox2Bnds( // convert inner box to bounds inner_box, // inner box bnd_box, // enclosing box dim, // dimension n_bnds, // number of bounds (returned) bnds); // bounds array (modified) // return shrinking node return new ANNbd_shrink(n_bnds, bnds, in, out); } }