tesseract  5.0.0
tablefind_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include <memory>
13 
14 #include "colpartition.h"
15 #include "colpartitiongrid.h"
16 #include "tablefind.h"
17 
18 #include "include_gunit.h"
19 
20 namespace tesseract {
21 
23 public:
32 
33  void ExpectPartition(const TBOX &box) {
35  gsearch.SetUniqueMode(true);
36  gsearch.StartFullSearch();
37  ColPartition *part = nullptr;
38  bool found = false;
39  while ((part = gsearch.NextFullSearch()) != nullptr) {
40  if (part->bounding_box().left() == box.left() &&
41  part->bounding_box().bottom() == box.bottom() &&
42  part->bounding_box().right() == box.right() && part->bounding_box().top() == box.top()) {
43  found = true;
44  }
45  }
46  EXPECT_TRUE(found);
47  }
48  void ExpectPartitionCount(int expected_count) {
50  gsearch.SetUniqueMode(true);
51  gsearch.StartFullSearch();
52  ColPartition *part = nullptr;
53  int count = 0;
54  while ((part = gsearch.NextFullSearch()) != nullptr) {
55  ++count;
56  }
57  EXPECT_EQ(expected_count, count);
58  }
59 };
60 
61 class TableFinderTest : public testing::Test {
62 protected:
63  void SetUp() override {
64  std::locale::global(std::locale(""));
65  free_boxes_it_.set_to_list(&free_boxes_);
66  finder_ = std::make_unique<TestableTableFinder>();
67  finder_->Init(1, ICOORD(0, 0), ICOORD(500, 500));
68  // gap finding
69  finder_->set_global_median_xheight(5);
70  finder_->set_global_median_blob_width(5);
71  }
72 
73  void TearDown() override {
74  if (partition_.get() != nullptr) {
75  partition_->DeleteBoxes();
76  }
78  finder_.reset(nullptr);
79  }
80 
81  void MakePartition(int x_min, int y_min, int x_max, int y_max) {
82  MakePartition(x_min, y_min, x_max, y_max, 0, 0);
83  }
84 
85  void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column,
86  int last_column) {
87  if (partition_.get() != nullptr) {
88  partition_->DeleteBoxes();
89  }
90  TBOX box;
91  box.set_to_given_coords(x_min, y_min, x_max, y_max);
93  partition_->set_first_column(first_column);
94  partition_->set_last_column(last_column);
95  }
96 
98  finder_->InsertTextPartition(part);
99  free_boxes_it_.add_after_then_move(part);
100  }
101 
102  void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max) {
103  InsertLeaderPartition(x_min, y_min, x_max, y_max, 0, 0);
104  }
105 
106  void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max, int first_column,
107  int last_column) {
108  TBOX box;
109  box.set_to_given_coords(x_min, y_min, x_max, y_max);
110  ColPartition *part =
112  part->set_first_column(first_column);
113  part->set_last_column(last_column);
114  finder_->InsertLeaderPartition(part);
115  free_boxes_it_.add_after_then_move(part);
116  }
117 
119  for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list(); free_boxes_it_.forward()) {
120  ColPartition *part = free_boxes_it_.data();
121  part->DeleteBoxes();
122  }
123  }
124 
125  std::unique_ptr<TestableTableFinder> finder_;
126  std::unique_ptr<ColPartition> partition_;
127 
128 private:
129  tesseract::ColPartition_CLIST free_boxes_;
130  tesseract::ColPartition_C_IT free_boxes_it_;
131 };
132 
133 TEST_F(TableFinderTest, GapInXProjectionNoGap) {
134  int data[100];
135  for (int &i : data) {
136  i = 10;
137  }
138  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
139 }
140 
141 TEST_F(TableFinderTest, GapInXProjectionEdgeGap) {
142  int data[100];
143  for (int i = 0; i < 10; ++i) {
144  data[i] = 2;
145  }
146  for (int i = 10; i < 90; ++i) {
147  data[i] = 10;
148  }
149  for (int i = 90; i < 100; ++i) {
150  data[i] = 2;
151  }
152  EXPECT_FALSE(finder_->GapInXProjection(data, 100));
153 }
154 
155 TEST_F(TableFinderTest, GapInXProjectionExists) {
156  int data[100];
157  for (int i = 0; i < 10; ++i) {
158  data[i] = 10;
159  }
160  for (int i = 10; i < 90; ++i) {
161  data[i] = 2;
162  }
163  for (int i = 90; i < 100; ++i) {
164  data[i] = 10;
165  }
166  EXPECT_TRUE(finder_->GapInXProjection(data, 100));
167 }
168 
169 TEST_F(TableFinderTest, HasLeaderAdjacentOverlapping) {
170  InsertLeaderPartition(90, 0, 150, 5);
171  MakePartition(0, 0, 100, 10);
172  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
173  MakePartition(0, 25, 100, 40);
174  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
175  MakePartition(145, 0, 200, 20);
176  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
177  MakePartition(40, 0, 50, 4);
178  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
179 }
180 
181 TEST_F(TableFinderTest, HasLeaderAdjacentNoOverlap) {
182  InsertLeaderPartition(90, 10, 150, 15);
183  MakePartition(0, 10, 85, 20);
184  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
185  MakePartition(0, 25, 100, 40);
186  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
187  MakePartition(0, 0, 100, 10);
188  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
189  // TODO(nbeato): is this a useful metric? case fails
190  // MakePartition(160, 0, 200, 15); // leader is primarily above it
191  // EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
192 }
193 
194 TEST_F(TableFinderTest, HasLeaderAdjacentPreservesColumns) {
195  InsertLeaderPartition(90, 0, 150, 5, 1, 2);
196  MakePartition(0, 0, 85, 10, 0, 0);
197  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
198  MakePartition(0, 0, 100, 10, 0, 1);
199  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
200  MakePartition(0, 0, 200, 10, 0, 5);
201  EXPECT_TRUE(finder_->HasLeaderAdjacent(*partition_));
202  MakePartition(155, 0, 200, 10, 5, 5);
203  EXPECT_FALSE(finder_->HasLeaderAdjacent(*partition_));
204 }
205 
206 // TODO(nbeato): Only testing a splitting case. Add more...
207 // Also test non-split cases.
208 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) {
209  finder_->set_global_median_blob_width(3);
210  finder_->set_global_median_xheight(10);
211 
212  TBOX part_box(10, 5, 100, 15);
213  auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
214  all->set_type(PT_FLOWING_TEXT);
215  all->set_blob_type(BRT_TEXT);
216  all->set_flow(BTFT_CHAIN);
217  all->set_left_margin(10);
218  all->set_right_margin(100);
219  TBOX blob_box = part_box;
220  for (int i = 10; i <= 20; i += 5) {
221  blob_box.set_left(i + 1);
222  blob_box.set_right(i + 4);
223  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
224  }
225  for (int i = 35; i <= 55; i += 5) {
226  blob_box.set_left(i + 1);
227  blob_box.set_right(i + 4);
228  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
229  }
230  for (int i = 80; i <= 95; i += 5) {
231  blob_box.set_left(i + 1);
232  blob_box.set_right(i + 4);
233  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
234  }
235  // TODO(nbeato): Ray's newer code...
236  // all->ClaimBoxes();
237  all->ComputeLimits(); // This is to make sure median iinfo is set.
238  InsertTextPartition(all); // This is to delete blobs
239  ColPartition *fragment_me = all->CopyButDontOwnBlobs();
240 
241  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
242  finder_->ExpectPartition(TBOX(11, 5, 24, 15));
243  finder_->ExpectPartition(TBOX(36, 5, 59, 15));
244  finder_->ExpectPartition(TBOX(81, 5, 99, 15));
245  finder_->ExpectPartitionCount(3);
246 }
247 
248 TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) {
249  finder_->set_global_median_blob_width(3);
250  finder_->set_global_median_xheight(10);
251 
252  TBOX part_box(10, 5, 100, 15);
253  auto *all = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
254  all->set_type(PT_FLOWING_TEXT);
255  all->set_blob_type(BRT_TEXT);
256  all->set_flow(BTFT_CHAIN);
257  all->set_left_margin(10);
258  all->set_right_margin(100);
259  TBOX blob_box = part_box;
260  for (int i = 10; i <= 95; i += 5) {
261  blob_box.set_left(i + 1);
262  blob_box.set_right(i + 4);
263  all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
264  }
265  // TODO(nbeato): Ray's newer code...
266  // all->ClaimBoxes();
267  all->ComputeLimits(); // This is to make sure median iinfo is set.
268  InsertTextPartition(all); // This is to delete blobs
269  ColPartition *fragment_me = all->CopyButDontOwnBlobs();
270 
271  finder_->SplitAndInsertFragmentedTextPartition(fragment_me);
272  finder_->ExpectPartition(TBOX(11, 5, 99, 15));
273  finder_->ExpectPartitionCount(1);
274 }
275 
276 } // namespace tesseract
@ TBOX
@ BRT_TEXT
Definition: blobbox.h:82
@ BRT_UNKNOWN
Definition: blobbox.h:80
@ BTFT_NONE
Definition: blobbox.h:111
@ BTFT_CHAIN
Definition: blobbox.h:114
@ BTFT_LEADER
Definition: blobbox.h:117
TEST_F(EuroText, FastLatinOCR)
@ PT_FLOWING_TEXT
Definition: publictypes.h:55
integer coordinate
Definition: points.h:36
TDimension left() const
Definition: rect.h:82
void set_right(int x)
Definition: rect.h:92
void set_to_given_coords(int x_min, int y_min, int x_max, int y_max)
Definition: rect.h:282
void set_left(int x)
Definition: rect.h:85
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
void SetUniqueMode(bool mode)
Definition: bbgrid.h:249
void StartFullSearch()
Definition: bbgrid.h:701
BBC * NextFullSearch()
Definition: bbgrid.h:711
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
ColPartition * CopyButDontOwnBlobs()
void set_last_column(int column)
Definition: colpartition.h:732
const TBOX & bounding_box() const
Definition: colpartition.h:108
void set_first_column(int column)
Definition: colpartition.h:729
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:437
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:969
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:766
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:411
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1838
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:763
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:769
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:401
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:395
void ExpectPartitionCount(int expected_count)
void ExpectPartition(const TBOX &box)
void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max)
void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column, int last_column)
void InsertLeaderPartition(int x_min, int y_min, int x_max, int y_max, int first_column, int last_column)
std::unique_ptr< ColPartition > partition_
void InsertTextPartition(ColPartition *part)
std::unique_ptr< TestableTableFinder > finder_
void MakePartition(int x_min, int y_min, int x_max, int y_max)