tesseract  5.0.0
equationdetect_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "include_gunit.h"
13 
14 #include "colpartitiongrid.h"
15 #include "equationdetect.h"
16 #include "tesseractclass.h"
17 
18 #include <allheaders.h>
19 
20 #include <memory>
21 #include <string>
22 #include <utility>
23 
24 #define ENABLE_IdentifySpecialText_TEST 0
25 #if ENABLE_IdentifySpecialText_TEST
26 # define EQU_TRAINEDDATA_NAME "equ"
27 #else
28 # define EQU_TRAINEDDATA_NAME "equINTENTIONALLY_MISSING_FILE"
29 #endif
30 
31 namespace tesseract {
32 
34 public:
35  TestableEquationDetect(const char *tessdata, Tesseract *lang_tesseract)
37  SetLangTesseract(lang_tesseract);
38  }
39 
40  // Insert a certain math and digit blobs into part.
41  void AddMathDigitBlobs(const int math_blobs, const int digit_blobs, const int total_blobs,
42  ColPartition *part) {
43  CHECK(part != nullptr);
44  CHECK_LE(math_blobs + digit_blobs, total_blobs);
45  int count = 0;
46  for (int i = 0; i < math_blobs; i++, count++) {
47  auto *blob = new BLOBNBOX();
48  blob->set_special_text_type(BSTT_MATH);
49  part->AddBox(blob);
50  }
51  for (int i = 0; i < digit_blobs; i++, count++) {
52  auto *blob = new BLOBNBOX();
53  blob->set_special_text_type(BSTT_DIGIT);
54  part->AddBox(blob);
55  }
56  for (int i = count; i < total_blobs; i++) {
57  auto *blob = new BLOBNBOX();
58  blob->set_special_text_type(BSTT_NONE);
59  part->AddBox(blob);
60  }
61  }
62 
63  // Set up pix_binary for lang_tesseract_.
64  void SetPixBinary(Image pix) {
65  CHECK_EQ(1, pixGetDepth(pix));
67  }
68 
69  void RunIdentifySpecialText(BLOBNBOX *blob, const int height_th) {
70  IdentifySpecialText(blob, height_th);
71  }
72 
74  const UNICHARSET &unicharset = lang_tesseract_->unicharset;
75  return EstimateTypeForUnichar(unicharset, unicharset.unichar_to_id(val));
76  }
77 
79  this->part_grid_ = part_grid;
80  return IsIndented(part);
81  }
82 
83  bool RunIsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) {
84  return IsNearSmallNeighbor(seed_box, part_box);
85  }
86 
88  return CheckSeedBlobsCount(part);
89  }
90 
91  float RunComputeForegroundDensity(const TBOX &tbox) {
92  return ComputeForegroundDensity(tbox);
93  }
94 
95  int RunCountAlignment(const std::vector<int> &sorted_vec, const int val) {
96  return CountAlignment(sorted_vec, val);
97  }
98 
99  void RunSplitCPHorLite(ColPartition *part, std::vector<TBOX> *splitted_boxes) {
100  SplitCPHorLite(part, splitted_boxes);
101  }
102 
103  void RunSplitCPHor(ColPartition *part, std::vector<ColPartition *> *parts_splitted) {
104  SplitCPHor(part, parts_splitted);
105  }
106 
107  void TestComputeCPsSuperBBox(const TBOX &box, ColPartitionGrid *part_grid) {
108  CHECK(part_grid != nullptr);
109  part_grid_ = part_grid;
111  EXPECT_TRUE(*cps_super_bbox_ == box);
112  }
113 };
114 
115 class EquationFinderTest : public testing::Test {
116 protected:
117  std::unique_ptr<TestableEquationDetect> equation_det_;
118  std::unique_ptr<Tesseract> tesseract_;
119 
120  // The directory for testdata;
121  std::string testdata_dir_;
122 
123  void SetUp() override {
124  std::locale::global(std::locale(""));
125  tesseract_ = std::make_unique<Tesseract>();
126  tesseract_->init_tesseract(TESSDATA_DIR, "eng", OEM_TESSERACT_ONLY);
127  tesseract_->set_source_resolution(300);
128  equation_det_ = std::make_unique<TestableEquationDetect>(TESSDATA_DIR, tesseract_.get());
129  equation_det_->SetResolution(300);
130 
131  testdata_dir_ = TESTDATA_DIR;
132  }
133 
134  void TearDown() override {
135  tesseract_.reset(nullptr);
136  equation_det_.reset(nullptr);
137  }
138 
139  // Add a BLOCK covering the whole page.
140  void AddPageBlock(Image pix, BLOCK_LIST *blocks) {
141  CHECK(pix != nullptr);
142  CHECK(blocks != nullptr);
143  BLOCK_IT block_it(blocks);
144  auto *block = new BLOCK("", true, 0, 0, 0, 0, pixGetWidth(pix), pixGetHeight(pix));
145  block_it.add_to_end(block);
146  }
147 
148  // Create col partitions, add into part_grid, and put them into all_parts.
149  void CreateColParts(const int rows, const int cols, ColPartitionGrid *part_grid,
150  std::vector<ColPartition *> *all_parts) {
151  const int kWidth = 10, kHeight = 10;
152  ClearParts(all_parts);
153  for (int y = 0; y < rows; ++y) {
154  for (int x = 0; x < cols; ++x) {
155  int left = x * kWidth * 2, bottom = y * kHeight * 2;
156  TBOX box(left, bottom, left + kWidth, bottom + kHeight);
158  part_grid->InsertBBox(true, true, part);
159  all_parts->push_back(part);
160  }
161  }
162  }
163 
164  void ClearParts(std::vector<ColPartition *> *all_parts) {
165  for (auto &all_part : *all_parts) {
166  all_part->DeleteBoxes();
167  delete all_part;
168  }
169  }
170 
171  // Create a BLOBNBOX object with bounding box tbox, and add it into part.
172  void AddBlobIntoPart(const TBOX &tbox, ColPartition *part) {
173  CHECK(part != nullptr);
174  auto *blob = new BLOBNBOX();
175  blob->set_bounding_box(tbox);
176  part->AddBox(blob);
177  }
178 };
179 
180 TEST_F(EquationFinderTest, IdentifySpecialText) {
181 #if !ENABLE_IdentifySpecialText_TEST
182  GTEST_SKIP();
183 #else // TODO: missing equ_gt1.tif
184  // Load Image.
185  std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
186  Image pix_binary = pixRead(imagefile.c_str());
187  CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
188 
189  // Get components.
190  BLOCK_LIST blocks;
191  TO_BLOCK_LIST to_blocks;
192  AddPageBlock(pix_binary, &blocks);
193  Textord *textord = tesseract_->mutable_textord();
194  textord->find_components(pix_binary, &blocks, &to_blocks);
195 
196  // Identify special texts from to_blocks.
197  TO_BLOCK_IT to_block_it(&to_blocks);
198  std::map<int, int> stt_count;
199  for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) {
200  TO_BLOCK *to_block = to_block_it.data();
201  BLOBNBOX_IT blob_it(&(to_block->blobs));
202  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
203  BLOBNBOX *blob = blob_it.data();
204  // blob->set_special_text_type(BSTT_NONE);
205  equation_det_->RunIdentifySpecialText(blob, 0);
206  tensorflow::gtl::InsertIfNotPresent(&stt_count, blob->special_text_type(), 0);
207  stt_count[blob->special_text_type()]++;
208  }
209  }
210 
211  // Verify the number, but allow a range of +/- kCountRange before squealing.
212  const int kCountRange = 3;
213  EXPECT_GE(39 + kCountRange, stt_count[BSTT_NONE]);
214  EXPECT_LE(39 - kCountRange, stt_count[BSTT_NONE]);
215 
216  // if you count all the subscripts etc, there are ~45 italic chars.
217  EXPECT_GE(45 + kCountRange, stt_count[BSTT_ITALIC]);
218  EXPECT_LE(45 - kCountRange, stt_count[BSTT_ITALIC]);
219  EXPECT_GE(41 + kCountRange, stt_count[BSTT_DIGIT]);
220  EXPECT_LE(41 - kCountRange, stt_count[BSTT_DIGIT]);
221  EXPECT_GE(50 + kCountRange, stt_count[BSTT_MATH]);
222  EXPECT_LE(50 - kCountRange, stt_count[BSTT_MATH]);
223  EXPECT_GE(10 + kCountRange, stt_count[BSTT_UNCLEAR]);
224  EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
225 
226  // Release memory.
227  pix_binary.destroy();
228 #endif
229 }
230 
231 TEST_F(EquationFinderTest, EstimateTypeForUnichar) {
232  // Test abc characters.
233  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("a"));
234  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("c"));
235 
236  // Test punctuation characters.
237  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar("'"));
238  EXPECT_EQ(BSTT_NONE, equation_det_->RunEstimateTypeForUnichar(","));
239 
240  // Test digits.
241  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("1"));
242  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("4"));
243  EXPECT_EQ(BSTT_DIGIT, equation_det_->RunEstimateTypeForUnichar("|"));
244 
245  // Test math symbols.
246  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("("));
247  EXPECT_EQ(BSTT_MATH, equation_det_->RunEstimateTypeForUnichar("+"));
248 }
249 
251  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
252 
253  // Create five ColPartitions:
254  // part 1: ************
255  // part 2: *********
256  // part 3: *******
257  // part 4: *****
258  //
259  // part 5: ********
260  TBOX box1(0, 950, 999, 999);
262  part_grid.InsertBBox(true, true, part1);
263  TBOX box2(300, 920, 900, 940);
265  part_grid.InsertBBox(true, true, part2);
266  TBOX box3(0, 900, 600, 910);
268  part_grid.InsertBBox(true, true, part3);
269  TBOX box4(300, 890, 600, 899);
271  part_grid.InsertBBox(true, true, part4);
272  TBOX box5(300, 500, 900, 510);
274  part_grid.InsertBBox(true, true, part5);
275 
276  // Test
277  // part1 should be no indent.
278  EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part1));
279  // part2 should be left indent in terms of part1.
280  EXPECT_EQ(EquationDetect::LEFT_INDENT, equation_det_->RunIsIndented(&part_grid, part2));
281  // part3 should be right indent.
282  EXPECT_EQ(EquationDetect::RIGHT_INDENT, equation_det_->RunIsIndented(&part_grid, part3));
283  // part4 should be both indented.
284  EXPECT_EQ(EquationDetect::BOTH_INDENT, equation_det_->RunIsIndented(&part_grid, part4));
285  // part5 should be no indent because it is too far from part1.
286  EXPECT_EQ(EquationDetect::NO_INDENT, equation_det_->RunIsIndented(&part_grid, part5));
287 
288  // Release memory.
289  part1->DeleteBoxes();
290  delete (part1);
291  part2->DeleteBoxes();
292  delete (part2);
293  part3->DeleteBoxes();
294  delete (part3);
295  part4->DeleteBoxes();
296  delete (part4);
297  part5->DeleteBoxes();
298  delete (part5);
299 }
300 
301 TEST_F(EquationFinderTest, IsNearSmallNeighbor) {
302  // Create four tboxes:
303  // part 1, part 2
304  // ***** *****
305  // part 3: *****
306  //
307  // part 4: *****************
308  TBOX box1(0, 950, 499, 999);
309  TBOX box2(500, 950, 999, 998);
310  TBOX box3(0, 900, 499, 949);
311  TBOX box4(0, 550, 499, 590);
312 
313  // Test
314  // box2 should be box1's near neighbor but not vice versa.
315  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box2));
316  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box1));
317  // box1 and box3 should be near neighbors of each other.
318  EXPECT_TRUE(equation_det_->RunIsNearSmallNeighbor(box1, box3));
319  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
320  // box2 and box3 should not be near neighbors of each other.
321  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box3));
322  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box2));
323 
324  // box4 should not be the near neighbor of any one.
325  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box1, box4));
326  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box2, box4));
327  EXPECT_FALSE(equation_det_->RunIsNearSmallNeighbor(box3, box4));
328 }
329 
330 TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
331  TBOX box(0, 950, 999, 999);
336 
337  // Part 1: 8 math, 0 digit, 20 total.
338  equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
339  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part1));
340 
341  // Part 2: 1 math, 8 digit, 20 total.
342  equation_det_->AddMathDigitBlobs(1, 8, 20, part2);
343  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part2));
344 
345  // Part 3: 3 math, 8 digit, 8 total.
346  equation_det_->AddMathDigitBlobs(3, 8, 20, part3);
347  EXPECT_TRUE(equation_det_->RunCheckSeedBlobsCount(part3));
348 
349  // Part 4: 8 math, 0 digit, 8 total.
350  equation_det_->AddMathDigitBlobs(0, 0, 8, part4);
351  EXPECT_FALSE(equation_det_->RunCheckSeedBlobsCount(part4));
352 
353  // Release memory.
354  part1->DeleteBoxes();
355  delete (part1);
356  part2->DeleteBoxes();
357  delete (part2);
358  part3->DeleteBoxes();
359  delete (part3);
360  part4->DeleteBoxes();
361  delete (part4);
362 }
363 
364 TEST_F(EquationFinderTest, ComputeForegroundDensity) {
365  // Create the pix with top half foreground, bottom half background.
366  int width = 1024, height = 768;
367  Image pix = pixCreate(width, height, 1);
368  pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
369  TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
370  box3(100, height - 40, 140, height);
371  equation_det_->SetPixBinary(pix);
372 
373  // Verify
374  EXPECT_NEAR(0.0, equation_det_->RunComputeForegroundDensity(box1), 0.0001f);
375  EXPECT_NEAR(0.5, equation_det_->RunComputeForegroundDensity(box2), 0.0001f);
376  EXPECT_NEAR(1.0, equation_det_->RunComputeForegroundDensity(box3), 0.0001f);
377 }
378 
379 TEST_F(EquationFinderTest, CountAlignment) {
380  std::vector<int> vec;
381  vec.push_back(1);
382  vec.push_back(1);
383  vec.push_back(1);
384  vec.push_back(100);
385  vec.push_back(200);
386  vec.push_back(200);
387 
388  // Test the right point.
389  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 1));
390  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 100));
391  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 200));
392 
393  // Test the near neighbors.
394  EXPECT_EQ(3, equation_det_->RunCountAlignment(vec, 3));
395  EXPECT_EQ(1, equation_det_->RunCountAlignment(vec, 99));
396  EXPECT_EQ(2, equation_det_->RunCountAlignment(vec, 202));
397 
398  // Test the far neighbors.
399  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 150));
400  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 50));
401  EXPECT_EQ(0, equation_det_->RunCountAlignment(vec, 250));
402 }
403 
404 TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
405  Image pix = pixCreate(1001, 1001, 1);
406  equation_det_->SetPixBinary(pix);
407  ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
408 
409  TBOX box1(0, 0, 999, 99);
411  TBOX box2(0, 100, 499, 199);
413  TBOX box3(500, 100, 999, 199);
415  TBOX box4(0, 200, 999, 299);
417  TBOX box5(0, 900, 999, 999);
419 
420  // Add part1->part3 into part_grid and test.
421  part_grid.InsertBBox(true, true, part1);
422  part_grid.InsertBBox(true, true, part2);
423  part_grid.InsertBBox(true, true, part3);
424  TBOX super_box(0, 0, 999, 199);
425  equation_det_->TestComputeCPsSuperBBox(super_box, &part_grid);
426 
427  // Add part4 and test.
428  part_grid.InsertBBox(true, true, part4);
429  TBOX super_box2(0, 0, 999, 299);
430  equation_det_->TestComputeCPsSuperBBox(super_box2, &part_grid);
431 
432  // Add part5 and test.
433  part_grid.InsertBBox(true, true, part5);
434  TBOX super_box3(0, 0, 999, 999);
435  equation_det_->TestComputeCPsSuperBBox(super_box3, &part_grid);
436 
437  // Release memory.
438  part1->DeleteBoxes();
439  delete (part1);
440  part2->DeleteBoxes();
441  delete (part2);
442  part3->DeleteBoxes();
443  delete (part3);
444  part4->DeleteBoxes();
445  delete (part4);
446  part5->DeleteBoxes();
447  delete (part5);
448 }
449 
450 TEST_F(EquationFinderTest, SplitCPHorLite) {
451  TBOX box(0, 0, 999, 99);
453  part->DeleteBoxes();
454  part->set_median_width(10);
455  std::vector<TBOX> splitted_boxes;
456 
457  // Test an empty part.
458  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
459  EXPECT_TRUE(splitted_boxes.empty());
460 
461  // Test with one blob.
462  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
463  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
464  EXPECT_EQ(1, splitted_boxes.size());
465  EXPECT_TRUE(TBOX(0, 0, 10, 50) == splitted_boxes[0]);
466 
467  // Add more blob and test.
468  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
469  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
470  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
471  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
472  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
473  equation_det_->RunSplitCPHorLite(part, &splitted_boxes);
474  // Verify.
475  EXPECT_EQ(3, splitted_boxes.size());
476  EXPECT_TRUE(TBOX(0, 0, 30, 60) == splitted_boxes[0]);
477  EXPECT_TRUE(TBOX(100, 0, 140, 45) == splitted_boxes[1]);
478  EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
479 
480  part->DeleteBoxes();
481  delete (part);
482 }
483 
485  TBOX box(0, 0, 999, 99);
487  part->DeleteBoxes();
488  part->set_median_width(10);
489  std::vector<ColPartition *> parts_splitted;
490 
491  // Test an empty part.
492  equation_det_->RunSplitCPHor(part, &parts_splitted);
493  EXPECT_TRUE(parts_splitted.empty());
494  // Test with one blob.
495  AddBlobIntoPart(TBOX(0, 0, 10, 50), part);
496 
497  equation_det_->RunSplitCPHor(part, &parts_splitted);
498  EXPECT_EQ(1, parts_splitted.size());
499  EXPECT_TRUE(TBOX(0, 0, 10, 50) == parts_splitted[0]->bounding_box());
500 
501  // Add more blob and test.
502  AddBlobIntoPart(TBOX(11, 0, 20, 60), part);
503  AddBlobIntoPart(TBOX(25, 0, 30, 55), part); // break point.
504  AddBlobIntoPart(TBOX(100, 0, 110, 15), part);
505  AddBlobIntoPart(TBOX(125, 0, 140, 45), part); // break point.
506  AddBlobIntoPart(TBOX(500, 0, 540, 35), part); // break point.
507  equation_det_->RunSplitCPHor(part, &parts_splitted);
508 
509  // Verify.
510  EXPECT_EQ(3, parts_splitted.size());
511  EXPECT_TRUE(TBOX(0, 0, 30, 60) == parts_splitted[0]->bounding_box());
512  EXPECT_TRUE(TBOX(100, 0, 140, 45) == parts_splitted[1]->bounding_box());
513  EXPECT_TRUE(TBOX(500, 0, 540, 35) == parts_splitted[2]->bounding_box());
514 
515  for (auto part_splitted : parts_splitted) {
516  delete part_splitted;
517  }
518  part->DeleteBoxes();
519  delete (part);
520 }
521 
522 } // namespace tesseract
@ TBOX
#define EQU_TRAINEDDATA_NAME
#define CHECK_EQ(test, value)
Definition: include_gunit.h:79
#define CHECK(condition)
Definition: include_gunit.h:76
#define CHECK_LE(test, value)
Definition: include_gunit.h:83
@ OEM_TESSERACT_ONLY
Definition: publictypes.h:266
@ BRT_TEXT
Definition: blobbox.h:82
BlobSpecialTextType
Definition: blobbox.h:92
@ BSTT_MATH
Definition: blobbox.h:96
@ BSTT_UNCLEAR
Definition: blobbox.h:97
@ BSTT_DIGIT
Definition: blobbox.h:95
@ BSTT_ITALIC
Definition: blobbox.h:94
@ BSTT_NONE
Definition: blobbox.h:93
@ BTFT_NONE
Definition: blobbox.h:111
TEST_F(EuroText, FastLatinOCR)
@ PT_FLOWING_TEXT
Definition: publictypes.h:55
bool CheckSeedBlobsCount(ColPartition *part)
int CountAlignment(const std::vector< int > &sorted_vec, const int val) const
IndentType IsIndented(ColPartition *part)
void SplitCPHor(ColPartition *part, std::vector< ColPartition * > *parts_splitted)
ColPartitionGrid * part_grid_
BlobSpecialTextType EstimateTypeForUnichar(const UNICHARSET &unicharset, const UNICHAR_ID id) const
float ComputeForegroundDensity(const TBOX &tbox)
bool IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const
void SplitCPHorLite(ColPartition *part, std::vector< TBOX > *splitted_boxes)
void SetLangTesseract(Tesseract *lang_tesseract)
Image * mutable_pix_binary()
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:304
BLOBNBOX_LIST blobs
Definition: blobbox.h:776
void destroy()
Definition: image.cpp:32
integer coordinate
Definition: points.h:36
UNICHARSET unicharset
Definition: ccutil.h:61
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:529
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
void AddBox(BLOBNBOX *box)
void set_median_width(int width)
Definition: colpartition.h:144
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:211
void RunIdentifySpecialText(BLOBNBOX *blob, const int height_th)
EquationDetect::IndentType RunIsIndented(ColPartitionGrid *part_grid, ColPartition *part)
void RunSplitCPHor(ColPartition *part, std::vector< ColPartition * > *parts_splitted)
bool RunCheckSeedBlobsCount(ColPartition *part)
TestableEquationDetect(const char *tessdata, Tesseract *lang_tesseract)
float RunComputeForegroundDensity(const TBOX &tbox)
BlobSpecialTextType RunEstimateTypeForUnichar(const char *val)
void AddMathDigitBlobs(const int math_blobs, const int digit_blobs, const int total_blobs, ColPartition *part)
bool RunIsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box)
void RunSplitCPHorLite(ColPartition *part, std::vector< TBOX > *splitted_boxes)
int RunCountAlignment(const std::vector< int > &sorted_vec, const int val)
void TestComputeCPsSuperBBox(const TBOX &box, ColPartitionGrid *part_grid)
std::unique_ptr< Tesseract > tesseract_
void ClearParts(std::vector< ColPartition * > *all_parts)
std::unique_ptr< TestableEquationDetect > equation_det_
void AddPageBlock(Image pix, BLOCK_LIST *blocks)
void CreateColParts(const int rows, const int cols, ColPartitionGrid *part_grid, std::vector< ColPartition * > *all_parts)
void AddBlobIntoPart(const TBOX &tbox, ColPartition *part)
static std::string JoinPath(const std::string &s1, const std::string &s2)
Definition: include_gunit.h:65