tesseract  5.0.0
tesseract::TestableStructuredTable Class Reference
Inheritance diagram for tesseract::TestableStructuredTable:
tesseract::StructuredTable

Public Member Functions

void InjectCellY (int y)
 
void InjectCellX (int x)
 
void ExpectCellX (int x_min, int second, int add, int almost_done, int x_max)
 
void ExpectSortedX ()
 
int CountHorizontalIntersections (int y)
 
int CountVerticalIntersections (int x)
 
bool FindLinedStructure ()
 
void FindWhitespacedColumns ()
 
bool FindWhitespacedStructure ()
 
bool VerifyLinedTableCells ()
 
- Public Member Functions inherited from tesseract::StructuredTable
 StructuredTable ()
 
 ~StructuredTable ()=default
 
void Init ()
 
void set_text_grid (ColPartitionGrid *text)
 
void set_line_grid (ColPartitionGrid *lines)
 
void set_max_text_height (int height)
 
bool is_lined () const
 
unsigned row_count () const
 
unsigned column_count () const
 
unsigned cell_count () const
 
void set_bounding_box (const TBOX &box)
 
const TBOXbounding_box () const
 
int median_cell_height ()
 
int median_cell_width ()
 
int row_height (unsigned row) const
 
int column_width (unsigned column) const
 
int space_above () const
 
int space_below () const
 
bool FindLinedStructure ()
 
bool FindWhitespacedStructure ()
 
bool DoesPartitionFit (const ColPartition &part) const
 
int CountFilledCells ()
 
int CountFilledCellsInRow (int row)
 
int CountFilledCellsInColumn (int column)
 
int CountFilledCells (unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end)
 
bool VerifyRowFilled (int row)
 
double CalculateCellFilledPercentage (unsigned row, unsigned column)
 
void Display (ScrollView *window, ScrollView::Color color)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::StructuredTable
void ClearStructure ()
 
bool VerifyLinedTableCells ()
 
bool VerifyWhitespacedTable ()
 
void FindWhitespacedColumns ()
 
void FindWhitespacedRows ()
 
void CalculateMargins ()
 
void UpdateMargins (ColPartitionGrid *grid)
 
int FindVerticalMargin (ColPartitionGrid *grid, int start_x, bool decrease) const
 
int FindHorizontalMargin (ColPartitionGrid *grid, int start_y, bool decrease) const
 
void CalculateStats ()
 
void AbsorbNearbyLines ()
 
int CountVerticalIntersections (int x)
 
int CountHorizontalIntersections (int y)
 
int CountPartitions (const TBOX &box)
 
- Static Protected Member Functions inherited from tesseract::StructuredTable
static void FindCellSplitLocations (const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
 
- Protected Attributes inherited from tesseract::StructuredTable
ColPartitionGridtext_grid_
 
ColPartitionGridline_grid_
 
TBOX bounding_box_
 
std::vector< int > cell_x_
 
std::vector< int > cell_y_
 
bool is_lined_
 
int space_above_
 
int space_below_
 
int space_left_
 
int space_right_
 
int median_cell_height_
 
int median_cell_width_
 
int max_text_height_
 

Detailed Description

Definition at line 31 of file tablerecog_test.cc.

Member Function Documentation

◆ CountHorizontalIntersections()

int tesseract::StructuredTable::CountHorizontalIntersections

Definition at line 221 of file tablerecog.cpp.

677  {
678  int count = 0;
679  // Make a small box to keep the search time down.
680  const int kGridSize = text_grid_->gridsize();
681  TBOX horizontal_box = bounding_box_;
682  horizontal_box.set_bottom(y - kGridSize);
683  horizontal_box.set_top(y + kGridSize);
684 
686  gsearch.SetUniqueMode(true);
687  gsearch.StartRectSearch(horizontal_box);
688  ColPartition *text = nullptr;
689  while ((text = gsearch.NextRectSearch()) != nullptr) {
690  if (!text->IsTextType()) {
691  continue;
692  }
693 
694  const TBOX &box = text->bounding_box();
695  if (box.bottom() < y && y < box.top()) {
696  ++count;
697  }
698  }
699  return count;
700 }
@ TBOX
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:919
void set_bottom(int y)
Definition: rect.h:78
int gridsize() const
Definition: bbgrid.h:63
ColPartitionGrid * text_grid_
Definition: tablerecog.h:231

◆ CountVerticalIntersections()

int tesseract::StructuredTable::CountVerticalIntersections

Definition at line 220 of file tablerecog.cpp.

651  {
652  int count = 0;
653  // Make a small box to keep the search time down.
654  const int kGridSize = text_grid_->gridsize();
655  TBOX vertical_box = bounding_box_;
656  vertical_box.set_left(x - kGridSize);
657  vertical_box.set_right(x + kGridSize);
658 
660  gsearch.SetUniqueMode(true);
661  gsearch.StartRectSearch(vertical_box);
662  ColPartition *text = nullptr;
663  while ((text = gsearch.NextRectSearch()) != nullptr) {
664  if (!text->IsTextType()) {
665  continue;
666  }
667  const TBOX &box = text->bounding_box();
668  if (box.left() < x && x < box.right()) {
669  ++count;
670  }
671  }
672  return count;
673 }
void set_left(int x)
Definition: rect.h:85

◆ ExpectCellX()

void tesseract::TestableStructuredTable::ExpectCellX ( int  x_min,
int  second,
int  add,
int  almost_done,
int  x_max 
)
inline

Definition at line 49 of file tablerecog_test.cc.

49  {
50  ASSERT_EQ(0, (almost_done - second) % add);
51  EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.size());
52  EXPECT_EQ(x_min, cell_x_.at(0));
53  EXPECT_EQ(x_max, cell_x_.at(cell_x_.size() - 1));
54  for (unsigned i = 1; i < cell_x_.size() - 1; ++i) {
55  EXPECT_EQ(second + add * (i - 1), cell_x_.at(i));
56  }
57  }
std::vector< int > cell_x_
Definition: tablerecog.h:237

◆ ExpectSortedX()

void tesseract::TestableStructuredTable::ExpectSortedX ( )
inline

Definition at line 59 of file tablerecog_test.cc.

59  {
60  EXPECT_GT(cell_x_.size(), 0);
61  for (unsigned i = 1; i < cell_x_.size(); ++i) {
62  EXPECT_LT(cell_x_.at(i - 1), cell_x_.at(i));
63  }
64  }

◆ FindLinedStructure()

bool tesseract::StructuredTable::FindLinedStructure

Definition at line 105 of file tablerecog.cpp.

135  {
136  ClearStructure();
137 
138  // Search for all of the lines in the current box.
139  // Update the cellular structure with the exact lines.
141  box_search.SetUniqueMode(true);
142  box_search.StartRectSearch(bounding_box_);
143  ColPartition *line = nullptr;
144 
145  while ((line = box_search.NextRectSearch()) != nullptr) {
146  if (line->IsHorizontalLine()) {
147  cell_y_.push_back(line->MidY());
148  }
149  if (line->IsVerticalLine()) {
150  cell_x_.push_back(line->MidX());
151  }
152  }
153 
154  // HasSignificantLines should guarantee cells.
155  // Because that code is a different class, just gracefully
156  // return false. This could be an assert.
157  if (cell_x_.size() < 3 || cell_y_.size() < 3) {
158  return false;
159  }
160 
161  // Sort and remove duplicates that may have occurred due to split lines.
162  std::sort(cell_x_.begin(), cell_x_.end());
163  auto last_x = std::unique(cell_x_.begin(), cell_x_.end());
164  cell_x_.erase(last_x, cell_x_.end());
165  std::sort(cell_y_.begin(), cell_y_.end());
166  auto last_y = std::unique(cell_y_.begin(), cell_y_.end());
167  cell_y_.erase(last_y, cell_y_.end());
168 
169  // The border should be the extents of line boxes, not middle.
170  cell_x_[0] = bounding_box_.left();
171  cell_x_[cell_x_.size() - 1] = bounding_box_.right();
173  cell_y_[cell_y_.size() - 1] = bounding_box_.top();
174 
175  // Remove duplicates that may have occurred due to moving the borders.
176  last_x = std::unique(cell_x_.begin(), cell_x_.end());
177  cell_x_.erase(last_x, cell_x_.end());
178  last_y = std::unique(cell_y_.begin(), cell_y_.end());
179  cell_y_.erase(last_y, cell_y_.end());
180 
182  CalculateStats();
184  return is_lined_;
185 }
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
std::vector< int > cell_y_
Definition: tablerecog.h:238
ColPartitionGrid * line_grid_
Definition: tablerecog.h:232

◆ FindWhitespacedColumns()

void tesseract::StructuredTable::FindWhitespacedColumns

Definition at line 160 of file tablerecog.cpp.

362  {
363  // Set of the extents of all partitions on the page.
364  std::vector<int> left_sides;
365  std::vector<int> right_sides;
366 
367  // Look at each text partition. We want to find the partitions
368  // that have extremal left/right sides. These will give us a basis
369  // for the table columns.
371  gsearch.SetUniqueMode(true);
372  gsearch.StartRectSearch(bounding_box_);
373  ColPartition *text = nullptr;
374  while ((text = gsearch.NextRectSearch()) != nullptr) {
375  if (!text->IsTextType()) {
376  continue;
377  }
378 
379  ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
380  int spacing = static_cast<int>(text->median_width() * kHorizontalSpacing / 2.0 + 0.5);
381  left_sides.push_back(text->bounding_box().left() - spacing);
382  right_sides.push_back(text->bounding_box().right() + spacing);
383  }
384  // It causes disaster below, so avoid it!
385  if (left_sides.empty() || right_sides.empty()) {
386  return;
387  }
388 
389  // Since data may be inserted in grid order, we sort the left/right sides.
390  std::sort(left_sides.begin(), left_sides.end());
391  std::sort(right_sides.begin(), right_sides.end());
392 
393  // At this point, in the "merged list", we expect to have a left side,
394  // followed by either more left sides or a right side. The last number
395  // should be a right side. We find places where the splits occur by looking
396  // for "valleys". If we want to force gap sizes or allow overlap, change
397  // the spacing above. If you want to let lines "slice" partitions as long
398  // as it is infrequent, change the following function.
399  FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, &cell_x_);
400 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
const double kHorizontalSpacing
Definition: tablerecog.cpp:34
const int kCellSplitColumnThreshold
Definition: tablerecog.cpp:41
static void FindCellSplitLocations(const std::vector< int > &min_list, const std::vector< int > &max_list, int max_merged, std::vector< int > *locations)
Definition: tablerecog.cpp:608

◆ FindWhitespacedStructure()

bool tesseract::StructuredTable::FindWhitespacedStructure

Definition at line 110 of file tablerecog.cpp.

188  {
189  ClearStructure();
192 
193  if (!VerifyWhitespacedTable()) {
194  return false;
195  } else {
197  bounding_box_.set_right(cell_x_[cell_x_.size() - 1]);
199  bounding_box_.set_top(cell_y_[cell_y_.size() - 1]);
202  CalculateStats();
203  return true;
204  }
205 }
void set_right(int x)
Definition: rect.h:92
void set_top(int y)
Definition: rect.h:71

◆ InjectCellX()

void tesseract::TestableStructuredTable::InjectCellX ( int  x)
inline

Definition at line 44 of file tablerecog_test.cc.

44  {
45  cell_x_.push_back(x);
46  std::sort(cell_x_.begin(), cell_x_.end());
47  }

◆ InjectCellY()

void tesseract::TestableStructuredTable::InjectCellY ( int  y)
inline

Definition at line 40 of file tablerecog_test.cc.

40  {
41  cell_y_.push_back(y);
42  std::sort(cell_y_.begin(), cell_y_.end());
43  }

◆ VerifyLinedTableCells()

bool tesseract::StructuredTable::VerifyLinedTableCells

Definition at line 148 of file tablerecog.cpp.

328  {
329  // Function only called when lines exist.
330  ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
331  for (int i : cell_y_) {
332  if (CountHorizontalIntersections(i) > 0) {
333  return false;
334  }
335  }
336  for (int i : cell_x_) {
337  if (CountVerticalIntersections(i) > 0) {
338  return false;
339  }
340  }
341  return true;
342 }

The documentation for this class was generated from the following file: