tesseract  5.0.0
superscript.cpp
Go to the documentation of this file.
1 /******************************************************************
2  * File: superscript.cpp
3  * Description: Correction pass to fix superscripts and subscripts.
4  * Author: David Eger
5  *
6  * (C) Copyright 2012, Google, Inc.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "normalis.h"
20 #include "tesseractclass.h"
21 
22 namespace tesseract {
23 
24 static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
25  int num_chopped = 0;
26  for (int i = 0; i < num_unichars; i++) {
27  num_chopped += word->best_state[i];
28  }
29  return num_chopped;
30 }
31 
32 static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
33  int num_chopped = 0;
34  for (int i = 0; i < num_unichars; i++) {
35  num_chopped += word->best_state[word->best_state.size() - 1 - i];
36  }
37  return num_chopped;
38 }
39 
46 static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_bottom,
47  int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers,
48  ScriptPos *trailing_pos, int *num_trailing_outliers) {
49  ScriptPos sp_unused1, sp_unused2;
50  int unused1, unused2;
51  if (!leading_pos) {
52  leading_pos = &sp_unused1;
53  }
54  if (!num_leading_outliers) {
55  num_leading_outliers = &unused1;
56  }
57  if (!trailing_pos) {
58  trailing_pos = &sp_unused2;
59  }
60  if (!num_trailing_outliers) {
61  num_trailing_outliers = &unused2;
62  }
63 
64  *num_leading_outliers = *num_trailing_outliers = 0;
65  *leading_pos = *trailing_pos = SP_NORMAL;
66 
67  int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index);
68  int num_chopped_pieces = word->best_state[rebuilt_blob_index];
69  ScriptPos last_pos = SP_NORMAL;
70  int trailing_outliers = 0;
71  for (int i = 0; i < num_chopped_pieces; i++) {
72  TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box();
73  ScriptPos pos = SP_NORMAL;
74  if (box.bottom() >= super_y_bottom) {
75  pos = SP_SUPERSCRIPT;
76  } else if (box.top() <= sub_y_top) {
77  pos = SP_SUBSCRIPT;
78  }
79  if (pos == SP_NORMAL) {
80  if (trailing_outliers == i) {
81  *num_leading_outliers = trailing_outliers;
82  *leading_pos = last_pos;
83  }
84  trailing_outliers = 0;
85  } else {
86  if (pos == last_pos) {
87  trailing_outliers++;
88  } else {
89  trailing_outliers = 1;
90  }
91  }
92  last_pos = pos;
93  }
94  *num_trailing_outliers = trailing_outliers;
95  *trailing_pos = last_pos;
96 }
97 
109  if (word->tess_failed || word->word->flag(W_REP_CHAR) || !word->best_choice) {
110  return false;
111  }
112  int num_leading, num_trailing;
113  ScriptPos sp_leading, sp_trailing;
114  float leading_certainty, trailing_certainty;
115  float avg_certainty, unlikely_threshold;
116 
117  // Calculate the number of whole suspicious characters at the edges.
118  GetSubAndSuperscriptCandidates(word, &num_leading, &sp_leading, &leading_certainty, &num_trailing,
119  &sp_trailing, &trailing_certainty, &avg_certainty,
120  &unlikely_threshold);
121 
122  const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super";
123  const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super";
124 
125  int num_blobs = word->best_choice->length();
126 
127  // Calculate the remainder (partial characters) at the edges.
128  // This accounts for us having classified the best version of
129  // a word as [speaker?'] when it was instead [speaker.^{21}]
130  // (that is we accidentally thought the 2 was attached to the period).
131  int num_remainder_leading = 0, num_remainder_trailing = 0;
132  if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) {
133  int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
134  int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
135  int last_word_char = num_blobs - 1 - num_trailing;
136  float last_char_certainty = word->best_choice->certainty(last_word_char);
137  if (word->best_choice->unichar_id(last_word_char) != 0 &&
138  last_char_certainty <= unlikely_threshold) {
139  ScriptPos rpos;
140  YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, nullptr, nullptr, &rpos,
141  &num_remainder_trailing);
142  if (num_trailing > 0 && rpos != sp_trailing) {
143  num_remainder_trailing = 0;
144  }
145  if (num_remainder_trailing > 0 && last_char_certainty < trailing_certainty) {
146  trailing_certainty = last_char_certainty;
147  }
148  }
149  bool another_blob_available =
150  (num_remainder_trailing == 0) || num_leading + num_trailing + 1 < num_blobs;
151  int first_char_certainty = word->best_choice->certainty(num_leading);
152  if (another_blob_available && word->best_choice->unichar_id(num_leading) != 0 &&
153  first_char_certainty <= unlikely_threshold) {
154  ScriptPos lpos;
155  YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, &lpos, &num_remainder_leading,
156  nullptr, nullptr);
157  if (num_leading > 0 && lpos != sp_leading) {
158  num_remainder_leading = 0;
159  }
160  if (num_remainder_leading > 0 && first_char_certainty < leading_certainty) {
161  leading_certainty = first_char_certainty;
162  }
163  }
164  }
165 
166  // If nothing to do, bail now.
167  if (num_leading + num_trailing + num_remainder_leading + num_remainder_trailing == 0) {
168  return false;
169  }
170 
171  if (superscript_debug >= 1) {
172  tprintf("Candidate for superscript detection: %s (",
173  word->best_choice->unichar_string().c_str());
174  if (num_leading || num_remainder_leading) {
175  tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading, leading_pos);
176  }
177  if (num_trailing || num_remainder_trailing) {
178  tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing, trailing_pos);
179  }
180  tprintf(")\n");
181  }
182  if (superscript_debug >= 3) {
183  word->best_choice->print();
184  }
185  if (superscript_debug >= 2) {
186  tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ", avg_certainty,
187  unlikely_threshold);
188  if (num_leading) {
189  tprintf("Orig. leading (min): %.2f ", leading_certainty);
190  }
191  if (num_trailing) {
192  tprintf("Orig. trailing (min): %.2f ", trailing_certainty);
193  }
194  tprintf("\n");
195  }
196 
197  // We've now calculated the number of rebuilt blobs we want to carve off.
198  // However, split_word() works from TBLOBs in chopped_word, so we need to
199  // convert to those.
200  int num_chopped_leading = LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading;
201  int num_chopped_trailing = TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing;
202 
203  int retry_leading = 0;
204  int retry_trailing = 0;
205  bool is_good = false;
206  WERD_RES *revised = TrySuperscriptSplits(num_chopped_leading, leading_certainty, sp_leading,
207  num_chopped_trailing, trailing_certainty, sp_trailing,
208  word, &is_good, &retry_leading, &retry_trailing);
209  if (is_good) {
210  word->ConsumeWordResults(revised);
211  } else if (retry_leading || retry_trailing) {
212  int retry_chopped_leading = LeadingUnicharsToChopped(revised, retry_leading);
213  int retry_chopped_trailing = TrailingUnicharsToChopped(revised, retry_trailing);
214  WERD_RES *revised2 = TrySuperscriptSplits(
215  retry_chopped_leading, leading_certainty, sp_leading, retry_chopped_trailing,
216  trailing_certainty, sp_trailing, revised, &is_good, &retry_leading, &retry_trailing);
217  if (is_good) {
218  word->ConsumeWordResults(revised2);
219  }
220  delete revised2;
221  }
222  delete revised;
223  return is_good;
224 }
225 
250 void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading,
251  ScriptPos *leading_pos, float *leading_certainty,
252  int *num_rebuilt_trailing, ScriptPos *trailing_pos,
253  float *trailing_certainty, float *avg_certainty,
254  float *unlikely_threshold) {
255  *avg_certainty = *unlikely_threshold = 0.0f;
256  *num_rebuilt_leading = *num_rebuilt_trailing = 0;
257  *leading_certainty = *trailing_certainty = 0.0f;
258 
259  int super_y_bottom = kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
260  int sub_y_top = kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
261 
262  // Step one: Get an average certainty for "normally placed" characters.
263 
264  // Counts here are of blobs in the rebuild_word / unichars in best_choice.
265  *leading_pos = *trailing_pos = SP_NORMAL;
266  int leading_outliers = 0;
267  int trailing_outliers = 0;
268  int num_normal = 0;
269  float normal_certainty_total = 0.0f;
270  float worst_normal_certainty = 0.0f;
271  ScriptPos last_pos = SP_NORMAL;
272  int num_blobs = word->rebuild_word->NumBlobs();
273  for (int b = 0; b < num_blobs; ++b) {
274  TBOX box = word->rebuild_word->blobs[b]->bounding_box();
275  ScriptPos pos = SP_NORMAL;
276  if (box.bottom() >= super_y_bottom) {
277  pos = SP_SUPERSCRIPT;
278  } else if (box.top() <= sub_y_top) {
279  pos = SP_SUBSCRIPT;
280  }
281  if (pos == SP_NORMAL) {
282  if (word->best_choice->unichar_id(b) != 0) {
283  float char_certainty = word->best_choice->certainty(b);
284  if (char_certainty < worst_normal_certainty) {
285  worst_normal_certainty = char_certainty;
286  }
287  num_normal++;
288  normal_certainty_total += char_certainty;
289  }
290  if (trailing_outliers == b) {
291  leading_outliers = trailing_outliers;
292  *leading_pos = last_pos;
293  }
294  trailing_outliers = 0;
295  } else {
296  if (last_pos == pos) {
297  trailing_outliers++;
298  } else {
299  trailing_outliers = 1;
300  }
301  }
302  last_pos = pos;
303  }
304  *trailing_pos = last_pos;
305  if (num_normal >= 3) { // throw out the worst as an outlier.
306  num_normal--;
307  normal_certainty_total -= worst_normal_certainty;
308  }
309  if (num_normal > 0) {
310  *avg_certainty = normal_certainty_total / num_normal;
311  *unlikely_threshold = superscript_worse_certainty * (*avg_certainty);
312  }
313  if (num_normal == 0 || (leading_outliers == 0 && trailing_outliers == 0)) {
314  return;
315  }
316 
317  // Step two: Try to split off bits of the word that are both outliers
318  // and have much lower certainty than average
319  // Calculate num_leading and leading_certainty.
320  for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0; *num_rebuilt_leading < leading_outliers;
321  (*num_rebuilt_leading)++) {
322  float char_certainty = word->best_choice->certainty(*num_rebuilt_leading);
323  if (char_certainty > *unlikely_threshold) {
324  break;
325  }
326  if (char_certainty < *leading_certainty) {
327  *leading_certainty = char_certainty;
328  }
329  }
330 
331  // Calculate num_trailing and trailing_certainty.
332  for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0;
333  *num_rebuilt_trailing < trailing_outliers; (*num_rebuilt_trailing)++) {
334  int blob_idx = num_blobs - 1 - *num_rebuilt_trailing;
335  float char_certainty = word->best_choice->certainty(blob_idx);
336  if (char_certainty > *unlikely_threshold) {
337  break;
338  }
339  if (char_certainty < *trailing_certainty) {
340  *trailing_certainty = char_certainty;
341  }
342  }
343 }
344 
369 WERD_RES *Tesseract::TrySuperscriptSplits(int num_chopped_leading, float leading_certainty,
370  ScriptPos leading_pos, int num_chopped_trailing,
371  float trailing_certainty, ScriptPos trailing_pos,
372  WERD_RES *word, bool *is_good, int *retry_rebuild_leading,
373  int *retry_rebuild_trailing) {
374  int num_chopped = word->chopped_word->NumBlobs();
375 
376  *retry_rebuild_leading = *retry_rebuild_trailing = 0;
377 
378  // Chop apart the word into up to three pieces.
379 
380  BlamerBundle *bb0 = nullptr;
381  BlamerBundle *bb1 = nullptr;
382  WERD_RES *prefix = nullptr;
383  WERD_RES *core = nullptr;
384  WERD_RES *suffix = nullptr;
385  if (num_chopped_leading > 0) {
386  prefix = new WERD_RES(*word);
387  split_word(prefix, num_chopped_leading, &core, &bb0);
388  } else {
389  core = new WERD_RES(*word);
390  }
391 
392  if (num_chopped_trailing > 0) {
393  int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading;
394  split_word(core, split_pt, &suffix, &bb1);
395  }
396 
397  // Recognize the pieces in turn.
398  int saved_cp_multiplier = classify_class_pruner_multiplier;
399  int saved_im_multiplier = classify_integer_matcher_multiplier;
400  if (prefix) {
401  // Turn off Tesseract's y-position penalties for the leading superscript.
402  classify_class_pruner_multiplier.set_value(0);
403  classify_integer_matcher_multiplier.set_value(0);
404 
405  // Adjust our expectations about the baseline for this prefix.
406  if (superscript_debug >= 3) {
407  tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading);
408  }
409  recog_word_recursive(prefix);
410  if (superscript_debug >= 2) {
411  tprintf(" The leading bits look like %s %s\n", ScriptPosToString(leading_pos),
412  prefix->best_choice->unichar_string().c_str());
413  }
414 
415  // Restore the normal y-position penalties.
416  classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
417  classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
418  }
419 
420  if (superscript_debug >= 3) {
421  tprintf(" recognizing middle %d chopped blobs\n",
422  num_chopped - num_chopped_leading - num_chopped_trailing);
423  }
424 
425  if (suffix) {
426  // Turn off Tesseract's y-position penalties for the trailing superscript.
427  classify_class_pruner_multiplier.set_value(0);
428  classify_integer_matcher_multiplier.set_value(0);
429 
430  if (superscript_debug >= 3) {
431  tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing);
432  }
433  recog_word_recursive(suffix);
434  if (superscript_debug >= 2) {
435  tprintf(" The trailing bits look like %s %s\n", ScriptPosToString(trailing_pos),
436  suffix->best_choice->unichar_string().c_str());
437  }
438 
439  // Restore the normal y-position penalties.
440  classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
441  classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
442  }
443 
444  // Evaluate whether we think the results are believably better
445  // than what we already had.
446  bool good_prefix =
447  !prefix || BelievableSuperscript(superscript_debug >= 1, *prefix,
448  superscript_bettered_certainty * leading_certainty,
449  retry_rebuild_leading, nullptr);
450  bool good_suffix =
451  !suffix || BelievableSuperscript(superscript_debug >= 1, *suffix,
452  superscript_bettered_certainty * trailing_certainty, nullptr,
453  retry_rebuild_trailing);
454 
455  *is_good = good_prefix && good_suffix;
456  if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) {
457  // None of it is any good. Quit now.
458  delete core;
459  delete prefix;
460  delete suffix;
461  delete bb1;
462  return nullptr;
463  }
464  recog_word_recursive(core);
465 
466  // Now paste the results together into core.
467  if (suffix) {
468  suffix->SetAllScriptPositions(trailing_pos);
469  join_words(core, suffix, bb1);
470  }
471  if (prefix) {
472  prefix->SetAllScriptPositions(leading_pos);
473  join_words(prefix, core, bb0);
474  core = prefix;
475  prefix = nullptr;
476  }
477 
478  if (superscript_debug >= 1) {
479  tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT",
480  core->best_choice->unichar_string().c_str());
481  }
482  return core;
483 }
484 
503 bool Tesseract::BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold,
504  int *left_ok, int *right_ok) const {
505  unsigned initial_ok_run_count = 0;
506  unsigned ok_run_count = 0;
507  float worst_certainty = 0.0f;
508  const WERD_CHOICE &wc = *word.best_choice;
509 
510  const UnicityTable<FontInfo> &fontinfo_table = get_fontinfo_table();
511  for (unsigned i = 0; i < wc.length(); i++) {
512  TBLOB *blob = word.rebuild_word->blobs[i];
513  UNICHAR_ID unichar_id = wc.unichar_id(i);
514  float char_certainty = wc.certainty(i);
515  bool bad_certainty = char_certainty < certainty_threshold;
516  bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id);
517  bool is_italic = word.fontinfo && word.fontinfo->is_italic();
518  BLOB_CHOICE *choice = word.GetBlobChoice(i);
519  if (choice && fontinfo_table.size() > 0) {
520  // Get better information from the specific choice, if available.
521  int font_id1 = choice->fontinfo_id();
522  bool font1_is_italic = font_id1 >= 0 ? fontinfo_table.at(font_id1).is_italic() : false;
523  int font_id2 = choice->fontinfo_id2();
524  is_italic = font1_is_italic && (font_id2 < 0 || fontinfo_table.at(font_id2).is_italic());
525  }
526 
527  float height_fraction = 1.0f;
528  float char_height = blob->bounding_box().height();
529  float normal_height = char_height;
530  if (wc.unicharset()->top_bottom_useful()) {
531  int min_bot, max_bot, min_top, max_top;
532  wc.unicharset()->get_top_bottom(unichar_id, &min_bot, &max_bot, &min_top, &max_top);
533  float hi_height = max_top - max_bot;
534  float lo_height = min_top - min_bot;
535  normal_height = (hi_height + lo_height) / 2;
536  if (normal_height >= kBlnXHeight) {
537  // Only ding characters that we have decent information for because
538  // they're supposed to be normal sized, not tiny specks or dashes.
539  height_fraction = char_height / normal_height;
540  }
541  }
542  bool bad_height = height_fraction < superscript_scaledown_ratio;
543 
544  if (debug) {
545  if (is_italic) {
546  tprintf(" Rejecting: superscript is italic.\n");
547  }
548  if (is_punc) {
549  tprintf(" Rejecting: punctuation present.\n");
550  }
551  const char *char_str = wc.unicharset()->id_to_unichar(unichar_id);
552  if (bad_certainty) {
553  tprintf(
554  " Rejecting: don't believe character %s with certainty %.2f "
555  "which is less than threshold %.2f\n",
556  char_str, char_certainty, certainty_threshold);
557  }
558  if (bad_height) {
559  tprintf(
560  " Rejecting: character %s seems too small @ %.2f versus "
561  "expected %.2f\n",
562  char_str, char_height, normal_height);
563  }
564  }
565  if (bad_certainty || bad_height || is_punc || is_italic) {
566  if (ok_run_count == i) {
567  initial_ok_run_count = ok_run_count;
568  }
569  ok_run_count = 0;
570  } else {
571  ok_run_count++;
572  }
573  if (char_certainty < worst_certainty) {
574  worst_certainty = char_certainty;
575  }
576  }
577  bool all_ok = ok_run_count == wc.length();
578  if (all_ok && debug) {
579  tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
580  }
581  if (!all_ok) {
582  if (left_ok) {
583  *left_ok = initial_ok_run_count;
584  }
585  if (right_ok) {
586  *right_ok = ok_run_count;
587  }
588  }
589  return all_ok;
590 }
591 
592 } // namespace tesseract
@ TBOX
@ W_REP_CHAR
repeated character
Definition: werd.h:40
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ SP_SUBSCRIPT
Definition: ratngs.h:250
@ SP_NORMAL
Definition: ratngs.h:250
@ SP_SUPERSCRIPT
Definition: ratngs.h:250
const int kBlnXHeight
Definition: normalis.h:33
int UNICHAR_ID
Definition: unichar.h:36
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:193
const int kBlnBaselineOffset
Definition: normalis.h:34
void GetSubAndSuperscriptCandidates(const WERD_RES *word, int *num_rebuilt_leading, ScriptPos *leading_pos, float *leading_certainty, int *num_rebuilt_trailing, ScriptPos *trailing_pos, float *trailing_certainty, float *avg_certainty, float *unlikely_threshold)
void split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_piece, BlamerBundle **orig_blamer_bundle) const
Definition: tfacepp.cpp:163
bool SubAndSuperscriptFix(WERD_RES *word_res)
void recog_word_recursive(WERD_RES *word)
Definition: tfacepp.cpp:94
bool BelievableSuperscript(bool debug, const WERD_RES &word, float certainty_threshold, int *left_ok, int *right_ok) const
WERD_RES * TrySuperscriptSplits(int num_chopped_leading, float leading_certainty, ScriptPos leading_pos, int num_chopped_trailing, float trailing_certainty, ScriptPos trailing_pos, WERD_RES *word, bool *is_good, int *retry_leading, int *retry_trailing)
void join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_bb) const
Definition: tfacepp.cpp:216
TBOX bounding_box() const
Definition: blobs.cpp:466
std::vector< TBLOB * > blobs
Definition: blobs.h:462
unsigned NumBlobs() const
Definition: blobs.h:449
int size() const
Return the size used.
Definition: unicity_table.h:51
const T & at(int id) const
Return the object from an id.
Definition: unicity_table.h:56
bool is_italic() const
Definition: fontinfo.h:118
WERD_CHOICE * best_choice
Definition: pageres.h:239
TWERD * chopped_word
Definition: pageres.h:210
void ConsumeWordResults(WERD_RES *word)
Definition: pageres.cpp:785
const FontInfo * fontinfo
Definition: pageres.h:307
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: pageres.cpp:895
BLOB_CHOICE * GetBlobChoice(unsigned index) const
Definition: pageres.cpp:768
TWERD * rebuild_word
Definition: pageres.h:264
int16_t fontinfo_id2() const
Definition: ratngs.h:93
int16_t fontinfo_id() const
Definition: ratngs.h:90
float certainty() const
Definition: ratngs.h:311
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:295
const UNICHARSET * unicharset() const
Definition: ratngs.h:277
unsigned length() const
Definition: ratngs.h:283
void print() const
Definition: ratngs.h:557
std::string & unichar_string()
Definition: ratngs.h:515
TDimension height() const
Definition: rect.h:118
TDimension top() const
Definition: rect.h:68
TDimension bottom() const
Definition: rect.h:75
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:586
bool top_bottom_useful() const
Definition: unicharset.h:555
bool get_ispunctuation(UNICHAR_ID unichar_id) const
Definition: unicharset.h:533
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324