tesseract  5.0.0
kdtree.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: kdtree.cpp
3  ** Purpose: Routines for managing K-D search trees
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 /*-----------------------------------------------------------------------------
19  Include Files and Type Defines
20 -----------------------------------------------------------------------------*/
21 #include "kdtree.h"
22 
23 #include <algorithm>
24 #include <cfloat> // for FLT_MAX
25 #include <cmath>
26 #include <cstdio>
27 
28 namespace tesseract {
29 
30 #define Magnitude(X) ((X) < 0 ? -(X) : (X))
31 #define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
32 
33 /*-----------------------------------------------------------------------------
34  Global Data Definitions and Declarations
35 -----------------------------------------------------------------------------*/
36 #define MINSEARCH (-FLT_MAX)
37 #define MAXSEARCH FLT_MAX
38 
39 // Helper function to find the next essential dimension in a cycle.
40 static int NextLevel(KDTREE *tree, int level) {
41  do {
42  ++level;
43  if (level >= tree->KeySize) {
44  level = 0;
45  }
46  } while (tree->KeyDesc[level].NonEssential);
47  return level;
48 }
49 
50 //-----------------------------------------------------------------------------
52 template <typename Key, typename Value>
53 class MinK {
54 public:
55  MinK(Key max_key, int k);
56  ~MinK();
57 
58  struct Element {
59  Element() = default;
60  Element(const Key &k, const Value &v) : key(k), value(v) {}
61 
62  Key key;
63  Value value;
64  };
65 
66  bool insert(Key k, Value v);
67  const Key &max_insertable_key();
68 
70  return elements_count_;
71  }
72  const Element *elements() {
73  return elements_;
74  }
75 
76 private:
77  const Key max_key_;
78  Element *elements_;
79  int elements_count_;
80  int k_;
81  int max_index_;
82 };
83 
84 template <typename Key, typename Value>
85 MinK<Key, Value>::MinK(Key max_key, int k)
86  : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
87  elements_ = new Element[k_];
88 }
89 
90 template <typename Key, typename Value>
92  delete[] elements_;
93 }
94 
95 template <typename Key, typename Value>
97  if (elements_count_ < k_) {
98  return max_key_;
99  }
100  return elements_[max_index_].key;
101 }
102 
103 template <typename Key, typename Value>
104 bool MinK<Key, Value>::insert(Key key, Value value) {
105  if (elements_count_ < k_) {
106  elements_[elements_count_++] = Element(key, value);
107  if (key > elements_[max_index_].key) {
108  max_index_ = elements_count_ - 1;
109  }
110  return true;
111  } else if (key < elements_[max_index_].key) {
112  // evict the largest element.
113  elements_[max_index_] = Element(key, value);
114  // recompute max_index_
115  for (int i = 0; i < elements_count_; i++) {
116  if (elements_[i].key > elements_[max_index_].key) {
117  max_index_ = i;
118  }
119  }
120  return true;
121  }
122  return false;
123 }
124 
125 //-----------------------------------------------------------------------------
129 public:
130  KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
131  ~KDTreeSearch();
132 
134  void Search(int *result_count, float *distances, void **results);
135 
136 private:
137  void SearchRec(int Level, KDNODE *SubTree);
138  bool BoxIntersectsSearch(float *lower, float *upper);
139 
140  KDTREE *tree_;
141  float *query_point_;
142  float *sb_min_;
143  float *sb_max_;
144  MinK<float, void *> results_;
145 };
146 
147 KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
148  : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
149  sb_min_ = new float[tree->KeySize];
150  sb_max_ = new float[tree->KeySize];
151 }
152 
154  delete[] sb_min_;
155  delete[] sb_max_;
156 }
157 
160 void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
161  if (tree_->Root.Left == nullptr) {
162  *result_count = 0;
163  } else {
164  for (int i = 0; i < tree_->KeySize; i++) {
165  sb_min_[i] = tree_->KeyDesc[i].Min;
166  sb_max_[i] = tree_->KeyDesc[i].Max;
167  }
168  SearchRec(0, tree_->Root.Left);
169  int count = results_.elements_count();
170  *result_count = count;
171  for (int j = 0; j < count; j++) {
172  // Pre-cast to float64 as key is a template type and we have no control
173  // over its actual type.
174  distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
175  results[j] = results_.elements()[j].value;
176  }
177  }
178 }
179 
180 /*-----------------------------------------------------------------------------
181  Public Code
182 -----------------------------------------------------------------------------*/
186 KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
187  auto *KDTree = new KDTREE(KeySize);
188  for (int i = 0; i < KeySize; i++) {
189  KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
190  KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
191  if (KeyDesc[i].Circular) {
192  KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
193  KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
194  KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
195  KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
196  KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
197  } else {
198  KDTree->KeyDesc[i].Min = MINSEARCH;
199  KDTree->KeyDesc[i].Max = MAXSEARCH;
200  }
201  }
202  KDTree->Root.Left = nullptr;
203  KDTree->Root.Right = nullptr;
204  return KDTree;
205 }
206 
215 void KDStore(KDTREE *Tree, float *Key, void *Data) {
216  auto PtrToNode = &(Tree->Root.Left);
217  auto Node = *PtrToNode;
218  auto Level = NextLevel(Tree, -1);
219  while (Node != nullptr) {
220  if (Key[Level] < Node->BranchPoint) {
221  PtrToNode = &(Node->Left);
222  if (Key[Level] > Node->LeftBranch) {
223  Node->LeftBranch = Key[Level];
224  }
225  } else {
226  PtrToNode = &(Node->Right);
227  if (Key[Level] < Node->RightBranch) {
228  Node->RightBranch = Key[Level];
229  }
230  }
231  Level = NextLevel(Tree, Level);
232  Node = *PtrToNode;
233  }
234 
235  *PtrToNode = new KDNODE(Tree, Key, Data, Level);
236 } /* KDStore */
237 
252 void KDDelete(KDTREE *Tree, float Key[], void *Data) {
253  int Level;
254  KDNODE *Current;
255  KDNODE *Father;
256 
257  /* initialize search at root of tree */
258  Father = &(Tree->Root);
259  Current = Father->Left;
260  Level = NextLevel(Tree, -1);
261 
262  /* search tree for node to be deleted */
263  while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
264  Father = Current;
265  if (Key[Level] < Current->BranchPoint) {
266  Current = Current->Left;
267  } else {
268  Current = Current->Right;
269  }
270 
271  Level = NextLevel(Tree, Level);
272  }
273 
274  if (Current != nullptr) { /* if node to be deleted was found */
275  if (Current == Father->Left) {
276  Father->Left = nullptr;
277  Father->LeftBranch = Tree->KeyDesc[Level].Min;
278  } else {
279  Father->Right = nullptr;
280  Father->RightBranch = Tree->KeyDesc[Level].Max;
281  }
282 
283  InsertNodes(Tree, Current->Left);
284  InsertNodes(Tree, Current->Right);
285  delete Current;
286  }
287 } /* KDDelete */
288 
305 void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
306  int *NumberOfResults, void **NBuffer, float DBuffer[]) {
307  KDTreeSearch search(Tree, Query, QuerySize);
308  search.Search(NumberOfResults, DBuffer, NBuffer);
309 }
310 
311 /*---------------------------------------------------------------------------*/
313 void KDWalk(KDTREE *Tree, void_proc action, void *context) {
314  if (Tree->Root.Left != nullptr) {
315  Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
316  }
317 }
318 
319 /*-----------------------------------------------------------------------------
320  Private Code
321 -----------------------------------------------------------------------------*/
322 
323 /*---------------------------------------------------------------------------*/
329 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
330  if (level >= tree_->KeySize) {
331  level = 0;
332  }
333 
334  if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
335  return;
336  }
337 
338  results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
339  sub_tree->Data);
340 
341  if (query_point_[level] < sub_tree->BranchPoint) {
342  if (sub_tree->Left != nullptr) {
343  float tmp = sb_max_[level];
344  sb_max_[level] = sub_tree->LeftBranch;
345  SearchRec(NextLevel(tree_, level), sub_tree->Left);
346  sb_max_[level] = tmp;
347  }
348  if (sub_tree->Right != nullptr) {
349  float tmp = sb_min_[level];
350  sb_min_[level] = sub_tree->RightBranch;
351  SearchRec(NextLevel(tree_, level), sub_tree->Right);
352  sb_min_[level] = tmp;
353  }
354  } else {
355  if (sub_tree->Right != nullptr) {
356  float tmp = sb_min_[level];
357  sb_min_[level] = sub_tree->RightBranch;
358  SearchRec(NextLevel(tree_, level), sub_tree->Right);
359  sb_min_[level] = tmp;
360  }
361  if (sub_tree->Left != nullptr) {
362  float tmp = sb_max_[level];
363  sb_max_[level] = sub_tree->LeftBranch;
364  SearchRec(NextLevel(tree_, level), sub_tree->Left);
365  sb_max_[level] = tmp;
366  }
367  }
368 }
369 
370 /*---------------------------------------------------------------------------*/
378 float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
379  float total_distance = 0;
380 
381  for (; k > 0; k--, p1++, p2++, dim++) {
382  if (dim->NonEssential) {
383  continue;
384  }
385 
386  float dimension_distance = *p1 - *p2;
387 
388  /* if this dimension is circular - check wraparound distance */
389  if (dim->Circular) {
390  dimension_distance = Magnitude(dimension_distance);
391  float wrap_distance = dim->Max - dim->Min - dimension_distance;
392  dimension_distance = std::min(dimension_distance, wrap_distance);
393  }
394 
395  total_distance += dimension_distance * dimension_distance;
396  }
397  return total_distance;
398 }
399 
400 float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
401  return std::sqrt(DistanceSquared(k, dim, p1, p2));
402 }
403 
404 /*---------------------------------------------------------------------------*/
409 bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
410  float *query = query_point_;
411  // Compute the sum in higher precision.
412  double total_distance = 0.0;
413  double radius_squared =
414  static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
415  PARAM_DESC *dim = &tree_->KeyDesc[0];
416 
417  for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
418  if (dim->NonEssential) {
419  continue;
420  }
421 
422  float dimension_distance;
423  if (*query < *lower) {
424  dimension_distance = *lower - *query;
425  } else if (*query > *upper) {
426  dimension_distance = *query - *upper;
427  } else {
428  dimension_distance = 0;
429  }
430 
431  /* if this dimension is circular - check wraparound distance */
432  if (dim->Circular) {
433  float wrap_distance = FLT_MAX;
434  if (*query < *lower) {
435  wrap_distance = *query + dim->Max - dim->Min - *upper;
436  } else if (*query > *upper) {
437  wrap_distance = *lower - (*query - (dim->Max - dim->Min));
438  }
439  dimension_distance = std::min(dimension_distance, wrap_distance);
440  }
441 
442  total_distance += static_cast<double>(dimension_distance) * dimension_distance;
443  if (total_distance >= radius_squared) {
444  return false;
445  }
446  }
447  return true;
448 }
449 
450 /*---------------------------------------------------------------------------*/
466 void Walk(KDTREE *tree, void_proc action, void *context, KDNODE *sub_tree, int32_t level) {
467  (*action)(context, sub_tree->Data, level);
468  if (sub_tree->Left != nullptr) {
469  Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
470  }
471  if (sub_tree->Right != nullptr) {
472  Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
473  }
474 }
475 
477 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
478  if (nodes == nullptr) {
479  return;
480  }
481 
482  KDStore(tree, nodes->Key, nodes->Data);
483  InsertNodes(tree, nodes->Left);
484  InsertNodes(tree, nodes->Right);
485 }
486 
487 } // namespace tesseract
#define MAXSEARCH
Definition: kdtree.cpp:37
#define NodeFound(N, K, D)
Definition: kdtree.cpp:31
#define MINSEARCH
Definition: kdtree.cpp:36
#define Magnitude(X)
Definition: kdtree.cpp:30
void KDDelete(KDTREE *Tree, float Key[], void *Data)
Definition: kdtree.cpp:252
void Walk(KDTREE *tree, void_proc action, void *context, KDNODE *sub_tree, int32_t level)
Definition: kdtree.cpp:466
void KDStore(KDTREE *Tree, float *Key, void *Data)
Definition: kdtree.cpp:215
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:400
float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[])
Definition: kdtree.cpp:378
void(*)(...) void_proc
Definition: kdtree.h:25
void InsertNodes(KDTREE *tree, KDNODE *nodes)
Definition: kdtree.cpp:477
KDTREE * MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[])
Definition: kdtree.cpp:186
void KDWalk(KDTREE *Tree, void_proc action, void *context)
Definition: kdtree.cpp:313
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:211
void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, int *NumberOfResults, void **NBuffer, float DBuffer[])
Definition: kdtree.cpp:305
int elements_count()
Definition: kdtree.cpp:69
const Element * elements()
Definition: kdtree.cpp:72
MinK(Key max_key, int k)
Definition: kdtree.cpp:85
bool insert(Key k, Value v)
Definition: kdtree.cpp:104
const Key & max_insertable_key()
Definition: kdtree.cpp:96
Element(const Key &k, const Value &v)
Definition: kdtree.cpp:60
KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
Definition: kdtree.cpp:147
void Search(int *result_count, float *distances, void **results)
Definition: kdtree.cpp:160
float * Key
Definition: kdtree.h:55
KDNODE * Right
Definition: kdtree.h:61
void * Data
Definition: kdtree.h:56
float LeftBranch
Definition: kdtree.h:58
KDNODE * Left
Definition: kdtree.h:60
float RightBranch
Definition: kdtree.h:59
float BranchPoint
Definition: kdtree.h:57
std::vector< PARAM_DESC > KeyDesc
Definition: kdtree.h:80
int16_t KeySize
Definition: kdtree.h:78
KDNODE Root
Definition: kdtree.h:79