tesseract  5.0.0
tabvector.h
Go to the documentation of this file.
1 // File: tabvector.h
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2008, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_TEXTORD_TABVECTOR_H_
20 #define TESSERACT_TEXTORD_TABVECTOR_H_
21 
22 #include "bbgrid.h"
23 #include "blobgrid.h"
24 #include "clst.h"
25 #include "elst.h"
26 #include "elst2.h"
27 #include "rect.h"
28 
29 #include <algorithm>
30 
31 class BLOBNBOX;
32 class ScrollView;
33 
34 namespace tesseract {
35 
38 
39 // The alignment type that a tab vector represents.
40 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
48  TA_COUNT
49 };
50 
51 // Forward declarations. The classes use their own list types, so we
52 // need to make the list types first.
53 class TabFind;
54 class TabVector;
55 class TabConstraint;
56 
57 ELIST2IZEH(TabVector)
58 CLISTIZEH(TabVector)
59 ELISTIZEH(TabConstraint)
60 
61 // TabConstraint is a totally self-contained class to maintain
62 // a list of [min,max] constraints, each referring to a TabVector.
63 // The constraints are manipulated through static methods that act
64 // on a list of constraints. The list itself is cooperatively owned
65 // by the TabVectors of the constraints on the list and managed
66 // by implicit reference counting via the elements of the list.
67 class TabConstraint : public ELIST_LINK {
68 public:
69  // This empty constructor is here only so that the class can be ELISTIZED.
70  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
71  // and eliminate CLASSNAME##_copier.
72  TabConstraint() = default;
73 
74  // Create a constraint for the top or bottom of this TabVector.
75  static void CreateConstraint(TabVector *vector, bool is_top);
76 
77  // Test to see if the constraints are compatible enough to merge.
78  static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
79 
80  // Merge the lists of constraints and update the TabVector pointers.
81  // The second list is deleted.
82  static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
83 
84  // Set all the tops and bottoms as appropriate to a mean of the
85  // constrained range. Delete all the constraints and list.
86  static void ApplyConstraints(TabConstraint_LIST *constraints);
87 
88 private:
89  TabConstraint(TabVector *vector, bool is_top);
90 
91  // Get the max of the mins and the min of the maxes.
92  static void GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max);
93 
94  // The TabVector this constraint applies to.
95  TabVector *vector_;
96  // If true then we refer to the top of the vector_.
97  bool is_top_;
98  // The allowed range of this vector_.
99  int y_min_;
100  int y_max_;
101 };
102 
103 // Class to hold information about a single vector
104 // that represents a tab stop or a rule line.
105 class TabVector : public ELIST2_LINK {
106 public:
107  // TODO(rays) fix this in elst.h line 1076, where it should use the
108  // copy constructor instead of operator=.
109  TabVector() = default;
110  ~TabVector() = default;
111 
112  // Public factory to build a TabVector from a list of boxes.
113  // The TabVector will be of the given alignment type.
114  // The input vertical vector is used in fitting, and the output
115  // vertical_x, vertical_y have the resulting line vector added to them
116  // if the alignment is not ragged.
117  // The extended_start_y and extended_end_y are the maximum possible
118  // extension to the line segment that can be used to align with others.
119  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
120  static TabVector *FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
121  int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
122  int *vertical_y);
123 
124  // Build a ragged TabVector by copying another's direction, shifting it
125  // to match the given blob, and making its initial extent the height
126  // of the blob, but its extended bounds from the bounds of the original.
127  TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
128  BLOBNBOX *blob);
129 
130  // Copies basic attributes of a tab vector for simple operations.
131  // Copies things such startpt, endpt, range, width.
132  // Does not copy things such as partners, boxes, or constraints.
133  // This is useful if you only need vector information for processing, such
134  // as in the table detection code.
135  TabVector *ShallowCopy() const;
136 
137  // Simple accessors.
138  const ICOORD &startpt() const {
139  return startpt_;
140  }
141  const ICOORD &endpt() const {
142  return endpt_;
143  }
144  int extended_ymax() const {
145  return extended_ymax_;
146  }
147  int extended_ymin() const {
148  return extended_ymin_;
149  }
150  int sort_key() const {
151  return sort_key_;
152  }
153  int mean_width() const {
154  return mean_width_;
155  }
156  void set_top_constraints(TabConstraint_LIST *constraints) {
157  top_constraints_ = constraints;
158  }
159  void set_bottom_constraints(TabConstraint_LIST *constraints) {
160  bottom_constraints_ = constraints;
161  }
162  TabVector_CLIST *partners() {
163  return &partners_;
164  }
165  void set_startpt(const ICOORD &start) {
166  startpt_ = start;
167  }
168  void set_endpt(const ICOORD &end) {
169  endpt_ = end;
170  }
171  bool intersects_other_lines() const {
172  return intersects_other_lines_;
173  }
174  void set_intersects_other_lines(bool value) {
175  intersects_other_lines_ = value;
176  }
177 
178  // Inline quasi-accessors that require some computation.
179 
180  // Compute the x coordinate at the given y coordinate.
181  int XAtY(int y) const {
182  int height = endpt_.y() - startpt_.y();
183  if (height != 0) {
184  return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x();
185  } else {
186  return startpt_.x();
187  }
188  }
189 
190  // Compute the vertical overlap with the other TabVector.
191  int VOverlap(const TabVector &other) const {
192  return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y());
193  }
194  // Compute the vertical overlap with the given y bounds.
195  int VOverlap(int top_y, int bottom_y) const {
196  return std::min(top_y, static_cast<int>(endpt_.y())) -
197  std::max(bottom_y, static_cast<int>(startpt_.y()));
198  }
199  // Compute the extended vertical overlap with the given y bounds.
200  int ExtendedOverlap(int top_y, int bottom_y) const {
201  return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
202  }
203 
204  // Return true if this is a left tab stop, either aligned, or ragged.
205  bool IsLeftTab() const {
206  return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
207  }
208  // Return true if this is a right tab stop, either aligned, or ragged.
209  bool IsRightTab() const {
210  return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
211  }
212  // Return true if this is a separator.
213  bool IsSeparator() const {
214  return alignment_ == TA_SEPARATOR;
215  }
216  // Return true if this is a center aligned tab stop.
217  bool IsCenterTab() const {
218  return alignment_ == TA_CENTER_JUSTIFIED;
219  }
220  // Return true if this is a ragged tab top, either left or right.
221  bool IsRagged() const {
222  return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
223  }
224 
225  // Return true if this vector is to the left of the other in terms
226  // of sort_key_.
227  bool IsLeftOf(const TabVector &other) const {
228  return sort_key_ < other.sort_key_;
229  }
230 
231  // Return true if the vector has no partners.
232  bool Partnerless() {
233  return partners_.empty();
234  }
235 
236  // Return the number of tab boxes in this vector.
237  int BoxCount() {
238  return boxes_.length();
239  }
240 
241  // Lock the vector from refits by clearing the boxes_ list.
242  void Freeze() {
243  boxes_.shallow_clear();
244  }
245 
246  // Flip x and y on the ends so a vector can be created from flipped input.
247  void XYFlip() {
248  int x = startpt_.y();
249  startpt_.set_y(startpt_.x());
250  startpt_.set_x(x);
251  x = endpt_.y();
252  endpt_.set_y(endpt_.x());
253  endpt_.set_x(x);
254  }
255 
256  // Reflect the tab vector in the y-axis.
257  void ReflectInYAxis() {
258  startpt_.set_x(-startpt_.x());
259  endpt_.set_x(-endpt_.x());
260  sort_key_ = -sort_key_;
261  if (alignment_ == TA_LEFT_ALIGNED) {
262  alignment_ = TA_RIGHT_ALIGNED;
263  } else if (alignment_ == TA_RIGHT_ALIGNED) {
264  alignment_ = TA_LEFT_ALIGNED;
265  }
266  if (alignment_ == TA_LEFT_RAGGED) {
267  alignment_ = TA_RIGHT_RAGGED;
268  } else if (alignment_ == TA_RIGHT_RAGGED) {
269  alignment_ = TA_LEFT_RAGGED;
270  }
271  }
272 
273  // Separate function to compute the sort key for a given coordinate pair.
274  static int SortKey(const ICOORD &vertical, int x, int y) {
275  ICOORD pt(x, y);
276  return pt * vertical;
277  }
278 
279  // Return the x at the given y for the given sort key.
280  static int XAtY(const ICOORD &vertical, int sort_key, int y) {
281  if (vertical.y() != 0) {
282  return (vertical.x() * y + sort_key) / vertical.y();
283  } else {
284  return sort_key;
285  }
286  }
287 
288  // Sort function for E2LIST::sort to sort by sort_key_.
289  static int SortVectorsByKey(const void *v1, const void *v2) {
290  const TabVector *tv1 = *static_cast<const TabVector *const *>(v1);
291  const TabVector *tv2 = *static_cast<const TabVector *const *>(v2);
292  return tv1->sort_key_ - tv2->sort_key_;
293  }
294 
295  // More complex members.
296 
297  // Extend this vector to include the supplied blob if it doesn't
298  // already have it.
299  void ExtendToBox(BLOBNBOX *blob);
300 
301  // Set the ycoord of the start and move the xcoord to match.
302  void SetYStart(int start_y);
303  // Set the ycoord of the end and move the xcoord to match.
304  void SetYEnd(int end_y);
305 
306  // Rotate the ends by the given vector.
307  void Rotate(const FCOORD &rotation);
308 
309  // Setup the initial constraints, being the limits of
310  // the vector and the extended ends.
311  void SetupConstraints();
312 
313  // Setup the constraints between the partners of this TabVector.
315 
316  // Setup the constraints between this and its partner.
317  void SetupPartnerConstraints(TabVector *partner);
318 
319  // Use the constraints to modify the top and bottom.
320  void ApplyConstraints();
321 
322  // Merge close tab vectors of the same side that overlap.
323  static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
324  BlobGrid *grid);
325 
326  // Return true if this vector is the same side, overlaps, and close
327  // enough to the other to be merged.
328  bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const;
329 
330  // Eat the other TabVector into this and delete it.
331  void MergeWith(const ICOORD &vertical, TabVector *other);
332 
333  // Add a new element to the list of partner TabVectors.
334  // Partners must be added in order of increasing y coordinate of the text line
335  // that makes them partners.
336  // Groups of identical partners are merged into one.
337  void AddPartner(TabVector *partner);
338 
339  // Return true if other is a partner of this.
340  bool IsAPartner(const TabVector *other);
341 
342  // Print basic information about this tab vector.
343  void Print(const char *prefix);
344 
345  // Print basic information about this tab vector and every box in it.
346  void Debug(const char *prefix);
347 
348  // Draw this tabvector in place in the given window.
349  void Display(ScrollView *tab_win);
350 
351  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
352  void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder);
353 
354  // Evaluate the vector in terms of coverage of its length by good-looking
355  // box edges. A good looking box is one where its nearest neighbour on the
356  // inside is nearer than half the distance its nearest neighbour on the
357  // outside of the putative column. Bad boxes are removed from the line.
358  // A second pass then further filters boxes by requiring that the gutter
359  // width be a minimum fraction of the mean gutter along the line.
360  void Evaluate(const ICOORD &vertical, TabFind *finder);
361 
362  // (Re)Fit a line to the stored points. Returns false if the line
363  // is degenerate. Although the TabVector code mostly doesn't care about the
364  // direction of lines, XAtY would give silly results for a horizontal line.
365  // The class is mostly aimed at use for vertical lines representing
366  // horizontal tab stops.
367  bool Fit(ICOORD vertical, bool force_parallel);
368 
369  // Return the partner of this TabVector if the vector qualifies as
370  // being a vertical text line, otherwise nullptr.
372 
373  // Return the matching tabvector if there is exactly one partner, or
374  // nullptr otherwise. This can be used after matching is done, eg. by
375  // VerticalTextlinePartner(), without checking if the line is vertical.
377 
378 private:
379  // Constructor is private as the static factory is the external way
380  // to build a TabVector.
381  TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST *boxes);
382 
383  // Delete this, but first, repoint all the partners to point to
384  // replacement. If replacement is nullptr, then partner relationships
385  // are removed.
386  void Delete(TabVector *replacement);
387 
388 private:
389  // The bottom of the tab line.
390  ICOORD startpt_;
391  // The top of the tab line.
392  ICOORD endpt_;
393  // The lowest y that the vector might extend to.
394  int extended_ymin_ = 0;
395  // The highest y that the vector might extend to.
396  int extended_ymax_ = 0;
397  // Perpendicular distance of vector from a given vertical for sorting.
398  int sort_key_ = 0;
399  // Result of Evaluate 0-100. Coverage of line with good boxes.
400  int percent_score_ = 0;
401  // The mean width of the blobs. Meaningful only for separator lines.
402  int mean_width_ = 0;
403  // True if the boxes_ list has been modified, so a refit is needed.
404  bool needs_refit_ = false;
405  // True if a fit has been done, so re-evaluation is needed.
406  bool needs_evaluation_ = false;
407  // True if a separator line intersects at least 2 other lines.
408  bool intersects_other_lines_ = false;
409  // The type of this TabVector.
410  TabAlignment alignment_ = TA_LEFT_ALIGNED;
411  // The list of boxes whose edges are aligned at this TabVector.
412  BLOBNBOX_CLIST boxes_;
413  // List of TabVectors that have a connection with this via a text line.
414  TabVector_CLIST partners_;
415  // Constraints used to resolve the exact location of the top and bottom
416  // of the tab line.
417  TabConstraint_LIST *top_constraints_ = nullptr;
418  TabConstraint_LIST *bottom_constraints_ = nullptr;
419 };
420 
421 } // namespace tesseract.
422 
423 #endif // TESSERACT_TEXTORD_TABVECTOR_H_
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:705
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
#define ELIST2IZEH(CLASSNAME)
Definition: elst2.h:822
double textord_tabvector_vertical_gap_fraction
Definition: tabvector.cpp:57
double_VAR_H(classify_min_slope)
@ TA_COUNT
Definition: tabvector.h:48
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:45
@ TA_RIGHT_RAGGED
Definition: tabvector.h:46
@ TA_LEFT_ALIGNED
Definition: tabvector.h:42
@ TA_SEPARATOR
Definition: tabvector.h:47
@ TA_LEFT_RAGGED
Definition: tabvector.h:43
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:44
double textord_tabvector_vertical_box_ratio
Definition: tabvector.cpp:60
integer coordinate
Definition: points.h:36
void set_x(TDimension xin)
rewrite function
Definition: points.h:67
TDimension y() const
access_function
Definition: points.h:62
void set_y(TDimension yin)
rewrite function
Definition: points.h:71
TDimension x() const
access function
Definition: points.h:58
int XAtY(int y) const
Definition: tabvector.h:181
void set_bottom_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:159
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:274
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:274
void set_startpt(const ICOORD &start)
Definition: tabvector.h:165
void AddPartner(TabVector *partner)
Definition: tabvector.cpp:488
bool IsSeparator() const
Definition: tabvector.h:213
void set_endpt(const ICOORD &end)
Definition: tabvector.h:168
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:352
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:200
bool IsCenterTab() const
Definition: tabvector.h:217
int extended_ymin() const
Definition: tabvector.h:147
void set_intersects_other_lines(bool value)
Definition: tabvector.h:174
bool IsAPartner(const TabVector *other)
Definition: tabvector.cpp:503
TabVector_CLIST * partners()
Definition: tabvector.h:162
int sort_key() const
Definition: tabvector.h:150
bool intersects_other_lines() const
Definition: tabvector.h:171
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
Definition: tabvector.cpp:176
const ICOORD & startpt() const
Definition: tabvector.h:138
bool IsRightTab() const
Definition: tabvector.h:209
TabVector * VerticalTextlinePartner()
Definition: tabvector.cpp:883
static int XAtY(const ICOORD &vertical, int sort_key, int y)
Definition: tabvector.h:280
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:581
int extended_ymax() const
Definition: tabvector.h:144
void SetupPartnerConstraints()
Definition: tabvector.cpp:295
void SetYEnd(int end_y)
Definition: tabvector.cpp:268
int mean_width() const
Definition: tabvector.h:153
void set_top_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:156
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:783
bool IsRagged() const
Definition: tabvector.h:221
void SetYStart(int start_y)
Definition: tabvector.cpp:263
bool IsLeftTab() const
Definition: tabvector.h:205
void Print(const char *prefix)
Definition: tabvector.cpp:518
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:566
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:540
int VOverlap(const TabVector &other) const
Definition: tabvector.h:191
TabVector * ShallowCopy() const
Definition: tabvector.cpp:225
void MergeWith(const ICOORD &vertical, TabVector *other)
Definition: tabvector.cpp:454
bool IsLeftOf(const TabVector &other) const
Definition: tabvector.h:227
int VOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:195
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:289
void Debug(const char *prefix)
Definition: tabvector.cpp:527
TabVector * GetSinglePartner()
Definition: tabvector.cpp:872
const ICOORD & endpt() const
Definition: tabvector.h:141
void ExtendToBox(BLOBNBOX *blob)
Definition: tabvector.cpp:238
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
Definition: tabvector.cpp:384