tesseract  5.0.0
textord.h
Go to the documentation of this file.
1 // File: textord.h
3 // Description: The Textord class definition gathers text line and word
4 // finding functionality.
5 // Author: Ray Smith
6 // Created: Fri Mar 13 14:29:01 PDT 2009
7 //
8 // (C) Copyright 2009, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_TEXTORD_TEXTORD_H_
22 #define TESSERACT_TEXTORD_TEXTORD_H_
23 
24 #include "bbgrid.h"
25 #include "blobbox.h"
26 #include "ccstruct.h"
27 #include "gap_map.h"
28 
29 #include <tesseract/publictypes.h> // For PageSegMode.
30 
31 namespace tesseract {
32 
33 class FCOORD;
34 class BLOCK_LIST;
35 class PAGE_RES;
36 class TO_BLOCK;
37 class TO_BLOCK_LIST;
38 class ScrollView;
39 
40 // A simple class that can be used by BBGrid to hold a word and an expanded
41 // bounding box that makes it easy to find words to put diacritics.
42 class WordWithBox {
43 public:
44  WordWithBox() : word_(nullptr) {}
45  explicit WordWithBox(WERD *word) : word_(word), bounding_box_(word->bounding_box()) {
46  int height = bounding_box_.height();
47  bounding_box_.pad(height, height);
48  }
49 
50  const TBOX &bounding_box() const {
51  return bounding_box_;
52  }
53  // Returns the bounding box of only the good blobs.
55  return word_->true_bounding_box();
56  }
57  C_BLOB_LIST *RejBlobs() const {
58  return word_->rej_cblob_list();
59  }
60  const WERD *word() const {
61  return word_;
62  }
63 
64 private:
65  // Borrowed pointer to a real word somewhere that must outlive this class.
66  WERD *word_;
67  // Cached expanded bounding box of the word, padded all round by its height.
68  TBOX bounding_box_;
69 };
70 
71 // Make it usable by BBGrid.
72 CLISTIZEH(WordWithBox)
73 using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
74 using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
75 
76 class Textord {
77 public:
78  explicit Textord(CCStruct *ccstruct);
79  ~Textord() = default;
80 
81  // Make the textlines and words inside each block.
82  // binary_pix is mandatory and is the binarized input after line removal.
83  // grey_pix is optional, but if present must match the binary_pix in size,
84  // and must be a *real* grey image instead of binary_pix * 255.
85  // thresholds_pix is expected to be present iff grey_pix is present and
86  // can be an integer factor reduction of the grey_pix. It represents the
87  // thresholds that were used to create the binary_pix from the grey_pix.
88  // diacritic_blobs contain small confusing components that should be added
89  // to the appropriate word(s) in case they are really diacritics.
90  void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
91  Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
92  BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
93 
94  // If we were supposed to return only a single textline, and there is more
95  // than one, clean up and leave only the best.
96  void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res);
97 
98  bool use_cjk_fp_model() const {
99  return use_cjk_fp_model_;
100  }
101  void set_use_cjk_fp_model(bool flag) {
102  use_cjk_fp_model_ = flag;
103  }
104 
105  // tospace.cpp ///////////////////////////////////////////
106  void to_spacing(ICOORD page_tr, // topright of page
107  TO_BLOCK_LIST *blocks // blocks on page
108  );
109  ROW *make_prop_words(TO_ROW *row, // row to make
110  FCOORD rotation // for drawing
111  );
112  ROW *make_blob_words(TO_ROW *row, // row to make
113  FCOORD rotation // for drawing
114  );
115  // tordmain.cpp ///////////////////////////////////////////
116  void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
117  void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
118 
119 private:
120  // For underlying memory management and other utilities.
121  CCStruct *ccstruct_;
122 
123  // The size of the input image.
124  ICOORD page_tr_;
125 
126  bool use_cjk_fp_model_;
127 
128  // makerow.cpp ///////////////////////////////////////////
129  // Make the textlines inside each block.
130  void MakeRows(PageSegMode pageseg_mode, const FCOORD &skew, int width, int height,
131  TO_BLOCK_LIST *to_blocks);
132  // Make the textlines inside a single block.
133  void MakeBlockRows(int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block,
134  ScrollView *win);
135 
136 public:
137  void compute_block_xheight(TO_BLOCK *block, float gradient);
138  void compute_row_xheight(TO_ROW *row, // row to do
139  const FCOORD &rotation,
140  float gradient, // global skew
141  int block_line_size);
142  void make_spline_rows(TO_BLOCK *block, // block to do
143  float gradient, // gradient to fit
144  bool testing_on);
145 
146 private:
148  void make_old_baselines(TO_BLOCK *block, // block to do
149  bool testing_on, // correct orientation
150  float gradient);
151  void correlate_lines(TO_BLOCK *block, float gradient);
152  void correlate_neighbours(TO_BLOCK *block, // block rows are in.
153  TO_ROW **rows, // rows of block.
154  int rowcount); // no of rows to do.
155  int correlate_with_stats(TO_ROW **rows, // rows of block.
156  int rowcount, // no of rows to do.
157  TO_BLOCK *block);
158  void find_textlines(TO_BLOCK *block, // block row is in
159  TO_ROW *row, // row to do
160  int degree, // required approximation
161  QSPLINE *spline); // starting spline
162  // tospace.cpp ///////////////////////////////////////////
163  // DEBUG USE ONLY
164  void block_spacing_stats(TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional,
165  // resulting estimate
166  int16_t &block_space_gap_width,
167  // resulting estimate
168  int16_t &block_non_space_gap_width);
169  void row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx,
170  // estimate for block
171  int16_t block_space_gap_width,
172  // estimate for block
173  int16_t block_non_space_gap_width);
174  void old_to_method(TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats,
175  STATS *small_gap_stats, int16_t block_space_gap_width,
176  // estimate for block
177  int16_t block_non_space_gap_width);
178  bool isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table,
179  int16_t block_idx, int16_t row_idx);
180  int16_t stats_count_under(STATS *stats, int16_t threshold);
181  void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
182  bool make_a_word_break(TO_ROW *row, // row being made
183  TBOX blob_box, // for next_blob // how many blanks?
184  int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap,
185  int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap,
186  uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non,
187  bool &prev_gap_was_a_space, bool &break_at_next_gap);
188  bool narrow_blob(TO_ROW *row, TBOX blob_box);
189  bool wide_blob(TO_ROW *row, TBOX blob_box);
190  bool suspected_punct_blob(TO_ROW *row, TBOX box);
191  void peek_at_next_gap(TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap,
192  int16_t &next_within_xht_gap);
193  void mark_gap(TBOX blob, // blob following gap
194  int16_t rule, // heuristic id
195  int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap,
196  int16_t next_blob_width, int16_t next_gap);
197  float find_mean_blob_spacing(WERD *word);
198  bool ignore_big_gap(TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right);
199  // get bounding box
200  TBOX reduced_box_next(TO_ROW *row, // current row
201  BLOBNBOX_IT *it // iterator to blobds
202  );
203  TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
204  // tordmain.cpp ///////////////////////////////////////////
205  float filter_noise_blobs(BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list,
206  BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list);
207  // Fixes the block so it obeys all the rules:
208  // Must have at least one ROW.
209  // Must have at least one WERD.
210  // WERDs contain a fake blob.
211  void cleanup_nontext_block(BLOCK *block);
212  void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
213  bool clean_noise_from_row(ROW *row);
214  void clean_noise_from_words(ROW *row);
215  // Remove outlines that are a tiny fraction in either width or height
216  // of the word height.
217  void clean_small_noise_from_words(ROW *row);
218  // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
219  // TransferDiacriticsToWords to copy the diacritic blobs to the most
220  // appropriate words in the group of blocks. Source blobs are not touched.
221  void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks);
222  // Places a copy of blobs that are near a word (after applying rotation to the
223  // blob) in the most appropriate word, unless there is doubt, in which case a
224  // blob can end up in two words. Source blobs are not touched.
225  void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation,
226  WordGrid *word_grid);
227 
228 public:
229  // makerow.cpp ///////////////////////////////////////////
230  BOOL_VAR_H(textord_single_height_mode);
231  // tospace.cpp ///////////////////////////////////////////
232  BOOL_VAR_H(tosp_old_to_method);
233  BOOL_VAR_H(tosp_old_to_constrain_sp_kn);
234  BOOL_VAR_H(tosp_only_use_prop_rows);
235  BOOL_VAR_H(tosp_force_wordbreak_on_punct);
236  BOOL_VAR_H(tosp_use_pre_chopping);
237  BOOL_VAR_H(tosp_old_to_bug_fix);
238  BOOL_VAR_H(tosp_block_use_cert_spaces);
239  BOOL_VAR_H(tosp_row_use_cert_spaces);
240  BOOL_VAR_H(tosp_narrow_blobs_not_cert);
241  BOOL_VAR_H(tosp_row_use_cert_spaces1);
242  BOOL_VAR_H(tosp_recovery_isolated_row_stats);
243  BOOL_VAR_H(tosp_only_small_gaps_for_kern);
244  BOOL_VAR_H(tosp_all_flips_fuzzy);
245  BOOL_VAR_H(tosp_fuzzy_limit_all);
246  BOOL_VAR_H(tosp_stats_use_xht_gaps);
247  BOOL_VAR_H(tosp_use_xht_gaps);
248  BOOL_VAR_H(tosp_only_use_xht_gaps);
249  BOOL_VAR_H(tosp_rule_9_test_punct);
250  BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp);
251  BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn);
252  BOOL_VAR_H(tosp_improve_thresh);
253  INT_VAR_H(tosp_debug_level);
254  INT_VAR_H(tosp_enough_space_samples_for_median);
255  INT_VAR_H(tosp_redo_kern_limit);
256  INT_VAR_H(tosp_few_samples);
257  INT_VAR_H(tosp_short_row);
258  INT_VAR_H(tosp_sanity_method);
259  double_VAR_H(tosp_old_sp_kn_th_factor);
260  double_VAR_H(tosp_threshold_bias1);
261  double_VAR_H(tosp_threshold_bias2);
262  double_VAR_H(tosp_narrow_fraction);
263  double_VAR_H(tosp_narrow_aspect_ratio);
264  double_VAR_H(tosp_wide_fraction);
265  double_VAR_H(tosp_wide_aspect_ratio);
266  double_VAR_H(tosp_fuzzy_space_factor);
267  double_VAR_H(tosp_fuzzy_space_factor1);
268  double_VAR_H(tosp_fuzzy_space_factor2);
269  double_VAR_H(tosp_gap_factor);
270  double_VAR_H(tosp_kern_gap_factor1);
271  double_VAR_H(tosp_kern_gap_factor2);
272  double_VAR_H(tosp_kern_gap_factor3);
273  double_VAR_H(tosp_ignore_big_gaps);
274  double_VAR_H(tosp_ignore_very_big_gaps);
275  double_VAR_H(tosp_rep_space);
276  double_VAR_H(tosp_enough_small_gaps);
277  double_VAR_H(tosp_table_kn_sp_ratio);
278  double_VAR_H(tosp_table_xht_sp_ratio);
279  double_VAR_H(tosp_table_fuzzy_kn_sp_ratio);
280  double_VAR_H(tosp_fuzzy_kn_fraction);
281  double_VAR_H(tosp_fuzzy_sp_fraction);
282  double_VAR_H(tosp_min_sane_kn_sp);
283  double_VAR_H(tosp_init_guess_kn_mult);
284  double_VAR_H(tosp_init_guess_xht_mult);
285  double_VAR_H(tosp_max_sane_kn_thresh);
286  double_VAR_H(tosp_flip_caution);
287  double_VAR_H(tosp_large_kerning);
288  double_VAR_H(tosp_dont_fool_with_small_kerns);
289  double_VAR_H(tosp_near_lh_edge);
290  double_VAR_H(tosp_silly_kn_sp_gap);
291  double_VAR_H(tosp_pass_wide_fuzz_sp_to_context);
292  // tordmain.cpp ///////////////////////////////////////////
293  BOOL_VAR_H(textord_no_rejects);
294  BOOL_VAR_H(textord_show_blobs);
295  BOOL_VAR_H(textord_show_boxes);
296  INT_VAR_H(textord_max_noise_size);
297  INT_VAR_H(textord_baseline_debug);
298  double_VAR_H(textord_noise_area_ratio);
299  double_VAR_H(textord_initialx_ile);
300  double_VAR_H(textord_initialasc_ile);
301  INT_VAR_H(textord_noise_sizefraction);
302  double_VAR_H(textord_noise_sizelimit);
303  INT_VAR_H(textord_noise_translimit);
304  double_VAR_H(textord_noise_normratio);
305  BOOL_VAR_H(textord_noise_rejwords);
306  BOOL_VAR_H(textord_noise_rejrows);
307  double_VAR_H(textord_noise_syfract);
308  double_VAR_H(textord_noise_sxfract);
309  double_VAR_H(textord_noise_hfract);
310  INT_VAR_H(textord_noise_sncount);
311  double_VAR_H(textord_noise_rowratio);
312  BOOL_VAR_H(textord_noise_debug);
313  double_VAR_H(textord_blshift_maxshift);
314  double_VAR_H(textord_blshift_xfraction);
315 };
316 
317 } // namespace tesseract
318 
319 #endif // TESSERACT_TEXTORD_TEXTORD_H_
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:705
integer coordinate
Definition: points.h:36
TDimension height() const
Definition: rect.h:118
void pad(int xpad, int ypad)
Definition: rect.h:144
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:91
TBOX true_bounding_box() const
Definition: werd.cpp:177
const WERD * word() const
Definition: textord.h:60
C_BLOB_LIST * RejBlobs() const
Definition: textord.h:57
WordWithBox(WERD *word)
Definition: textord.h:45
const TBOX & bounding_box() const
Definition: textord.h:50
TBOX true_bounding_box() const
Definition: textord.h:54
INT_VAR_H(textord_noise_sizefraction)
double_VAR_H(tosp_enough_small_gaps)
double_VAR_H(tosp_pass_wide_fuzz_sp_to_context)
INT_VAR_H(tosp_short_row)
BOOL_VAR_H(textord_single_height_mode)
BOOL_VAR_H(tosp_old_to_bug_fix)
BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp)
double_VAR_H(tosp_fuzzy_sp_fraction)
double_VAR_H(tosp_threshold_bias1)
INT_VAR_H(tosp_sanity_method)
BOOL_VAR_H(tosp_narrow_blobs_not_cert)
double_VAR_H(tosp_max_sane_kn_thresh)
double_VAR_H(tosp_kern_gap_factor2)
bool use_cjk_fp_model() const
Definition: textord.h:98
double_VAR_H(tosp_table_fuzzy_kn_sp_ratio)
INT_VAR_H(tosp_debug_level)
INT_VAR_H(textord_noise_translimit)
BOOL_VAR_H(tosp_block_use_cert_spaces)
BOOL_VAR_H(textord_noise_rejrows)
double_VAR_H(tosp_dont_fool_with_small_kerns)
BOOL_VAR_H(tosp_fuzzy_limit_all)
BOOL_VAR_H(tosp_row_use_cert_spaces1)
double_VAR_H(tosp_near_lh_edge)
BOOL_VAR_H(tosp_recovery_isolated_row_stats)
double_VAR_H(tosp_wide_aspect_ratio)
BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn)
double_VAR_H(tosp_fuzzy_space_factor)
double_VAR_H(tosp_wide_fraction)
double_VAR_H(tosp_rep_space)
INT_VAR_H(tosp_few_samples)
double_VAR_H(tosp_large_kerning)
double_VAR_H(textord_noise_hfract)
BOOL_VAR_H(textord_no_rejects)
BOOL_VAR_H(textord_show_blobs)
BOOL_VAR_H(tosp_improve_thresh)
double_VAR_H(tosp_threshold_bias2)
BOOL_VAR_H(textord_noise_rejwords)
double_VAR_H(tosp_kern_gap_factor3)
BOOL_VAR_H(textord_noise_debug)
BOOL_VAR_H(tosp_only_use_prop_rows)
double_VAR_H(textord_noise_sxfract)
INT_VAR_H(textord_max_noise_size)
INT_VAR_H(tosp_enough_space_samples_for_median)
double_VAR_H(tosp_ignore_big_gaps)
void set_use_cjk_fp_model(bool flag)
Definition: textord.h:101
INT_VAR_H(textord_baseline_debug)
BOOL_VAR_H(textord_show_boxes)
double_VAR_H(tosp_table_kn_sp_ratio)
BOOL_VAR_H(tosp_use_pre_chopping)
double_VAR_H(tosp_table_xht_sp_ratio)
BOOL_VAR_H(tosp_old_to_method)
BOOL_VAR_H(tosp_only_use_xht_gaps)
double_VAR_H(tosp_init_guess_kn_mult)
INT_VAR_H(tosp_redo_kern_limit)
double_VAR_H(textord_noise_syfract)
BOOL_VAR_H(tosp_use_xht_gaps)
double_VAR_H(tosp_fuzzy_space_factor1)
BOOL_VAR_H(tosp_stats_use_xht_gaps)
double_VAR_H(tosp_kern_gap_factor1)
double_VAR_H(textord_blshift_maxshift)
BOOL_VAR_H(tosp_only_small_gaps_for_kern)
double_VAR_H(textord_blshift_xfraction)
double_VAR_H(tosp_fuzzy_space_factor2)
double_VAR_H(tosp_min_sane_kn_sp)
double_VAR_H(textord_initialasc_ile)
BOOL_VAR_H(tosp_force_wordbreak_on_punct)
double_VAR_H(tosp_flip_caution)
double_VAR_H(tosp_gap_factor)
BOOL_VAR_H(tosp_row_use_cert_spaces)
double_VAR_H(textord_noise_rowratio)
double_VAR_H(tosp_init_guess_xht_mult)
BOOL_VAR_H(tosp_old_to_constrain_sp_kn)
double_VAR_H(tosp_old_sp_kn_th_factor)
double_VAR_H(tosp_silly_kn_sp_gap)
double_VAR_H(textord_noise_area_ratio)
double_VAR_H(tosp_fuzzy_kn_fraction)
double_VAR_H(tosp_ignore_very_big_gaps)
INT_VAR_H(textord_noise_sncount)
double_VAR_H(tosp_narrow_aspect_ratio)
double_VAR_H(tosp_narrow_fraction)
double_VAR_H(textord_noise_normratio)
double_VAR_H(textord_noise_sizelimit)
BOOL_VAR_H(tosp_rule_9_test_punct)
double_VAR_H(textord_initialx_ile)
BOOL_VAR_H(tosp_all_flips_fuzzy)