tesseract  5.0.0
ratngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: ratngs.h (Formerly ratings.h)
3  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1992, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef RATNGS_H
20 #define RATNGS_H
21 
22 #ifdef HAVE_CONFIG_H
23 # include "config_auto.h" // DISABLED_LEGACY_ENGINE
24 #endif
25 
26 #include "clst.h"
27 #include "elst.h"
28 #ifndef DISABLED_LEGACY_ENGINE
29 # include "fontinfo.h"
30 #endif // undef DISABLED_LEGACY_ENGINE
31 #include "matrix.h"
32 #include "unicharset.h"
33 #include "werd.h"
34 
35 #include <tesseract/unichar.h>
36 
37 #include <cassert>
38 #include <cfloat> // for FLT_MAX
39 
40 namespace tesseract {
41 
42 class MATRIX;
43 struct TBLOB;
44 struct TWERD;
45 
46 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
47 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
49  BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
50  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
51  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
52  BCC_AMBIG, // Generated by ambiguity detection.
53  BCC_FAKE, // From some other process.
54 };
55 
56 class BLOB_CHOICE : public ELIST_LINK {
57 public:
59  unichar_id_ = UNICHAR_SPACE;
60  fontinfo_id_ = -1;
61  fontinfo_id2_ = -1;
62  rating_ = 10.0;
63  certainty_ = -1.0;
64  script_id_ = -1;
65  min_xheight_ = 0.0f;
66  max_xheight_ = 0.0f;
67  yshift_ = 0.0f;
68  classifier_ = BCC_FAKE;
69  }
70  BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
71  float src_rating, // rating
72  float src_cert, // certainty
73  int script_id, // script
74  float min_xheight, // min xheight in image pixel units
75  float max_xheight, // max xheight allowed by this char
76  float yshift, // the larger of y shift (top or bottom)
77  BlobChoiceClassifier c); // adapted match or other
78  BLOB_CHOICE(const BLOB_CHOICE &other);
79  ~BLOB_CHOICE() = default;
80 
82  return unichar_id_;
83  }
84  float rating() const {
85  return rating_;
86  }
87  float certainty() const {
88  return certainty_;
89  }
90  int16_t fontinfo_id() const {
91  return fontinfo_id_;
92  }
93  int16_t fontinfo_id2() const {
94  return fontinfo_id2_;
95  }
96 #ifndef DISABLED_LEGACY_ENGINE
97  const std::vector<ScoredFont> &fonts() const {
98  return fonts_;
99  }
100  void set_fonts(const std::vector<ScoredFont> &fonts) {
101  fonts_ = fonts;
102  int score1 = 0, score2 = 0;
103  fontinfo_id_ = -1;
104  fontinfo_id2_ = -1;
105  for (auto &f : fonts_) {
106  if (f.score > score1) {
107  score2 = score1;
108  fontinfo_id2_ = fontinfo_id_;
109  score1 = f.score;
110  fontinfo_id_ = f.fontinfo_id;
111  } else if (f.score > score2) {
112  score2 = f.score;
113  fontinfo_id2_ = f.fontinfo_id;
114  }
115  }
116  }
117 #endif // ndef DISABLED_LEGACY_ENGINE
118  int script_id() const {
119  return script_id_;
120  }
122  return matrix_cell_;
123  }
124  float min_xheight() const {
125  return min_xheight_;
126  }
127  float max_xheight() const {
128  return max_xheight_;
129  }
130  float yshift() const {
131  return yshift_;
132  }
134  return classifier_;
135  }
136  bool IsAdapted() const {
137  return classifier_ == BCC_ADAPTED_CLASSIFIER;
138  }
139  bool IsClassified() const {
140  return classifier_ == BCC_STATIC_CLASSIFIER || classifier_ == BCC_ADAPTED_CLASSIFIER ||
141  classifier_ == BCC_SPECKLE_CLASSIFIER;
142  }
143 
144  void set_unichar_id(UNICHAR_ID newunichar_id) {
145  unichar_id_ = newunichar_id;
146  }
147  void set_rating(float newrat) {
148  rating_ = newrat;
149  }
150  void set_certainty(float newrat) {
151  certainty_ = newrat;
152  }
153  void set_script(int newscript_id) {
154  script_id_ = newscript_id;
155  }
156  void set_matrix_cell(int col, int row) {
157  matrix_cell_.col = col;
158  matrix_cell_.row = row;
159  }
161  classifier_ = classifier;
162  }
163  static BLOB_CHOICE *deep_copy(const BLOB_CHOICE *src) {
164  auto *choice = new BLOB_CHOICE;
165  *choice = *src;
166  return choice;
167  }
168  // Returns true if *this and other agree on the baseline and x-height
169  // to within some tolerance based on a given estimate of the x-height.
170  bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const;
171 
172  void print(const UNICHARSET *unicharset) const {
173  tprintf("r%.2f c%.2f x[%g,%g]: %d %s", rating_, certainty_, min_xheight_, max_xheight_,
174  unichar_id_, (unicharset == nullptr) ? "" : unicharset->debug_str(unichar_id_).c_str());
175  }
176  void print_full() const {
177  print(nullptr);
178  tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n", script_id_, fontinfo_id_,
179  fontinfo_id2_, yshift_, classifier_);
180  }
181  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
182  static int SortByRating(const void *p1, const void *p2) {
183  const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
184  const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
185  return (bc1->rating_ < bc2->rating_) ? -1 : 1;
186  }
187 
188 private:
189  // Copy assignment operator.
190  BLOB_CHOICE &operator=(const BLOB_CHOICE &other);
191 
192  UNICHAR_ID unichar_id_; // unichar id
193 #ifndef DISABLED_LEGACY_ENGINE
194  // Fonts and scores. Allowed to be empty.
195  std::vector<ScoredFont> fonts_;
196 #endif // ndef DISABLED_LEGACY_ENGINE
197  int16_t fontinfo_id_; // char font information
198  int16_t fontinfo_id2_; // 2nd choice font information
199  // Rating is the classifier distance weighted by the length of the outline
200  // in the blob. In terms of probability, classifier distance is -klog p such
201  // that the resulting distance is in the range [0, 1] and then
202  // rating = w (-k log p) where w is the weight for the length of the outline.
203  // Sums of ratings may be compared meaningfully for words of different
204  // segmentation.
205  float rating_; // size related
206  // Certainty is a number in [-20, 0] indicating the classifier certainty
207  // of the choice. In terms of probability, certainty = 20 (k log p) where
208  // k is defined as above to normalize -klog p to the range [0, 1].
209  float certainty_; // absolute
210  int script_id_;
211  // Holds the position of this choice in the ratings matrix.
212  // Used to location position in the matrix during path backtracking.
213  MATRIX_COORD matrix_cell_;
214  // X-height range (in image pixels) that this classification supports.
215  float min_xheight_;
216  float max_xheight_;
217  // yshift_ - The vertical distance (in image pixels) the character is
218  // shifted (up or down) from an acceptable y position.
219  float yshift_;
220  BlobChoiceClassifier classifier_; // What generated *this.
221 };
222 
223 // Make BLOB_CHOICE listable.
224 ELISTIZEH(BLOB_CHOICE)
225 
226 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
227 // or nullptr if there is no match.
228 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
229 
230 // Permuter codes used in WERD_CHOICEs.
232  NO_PERM, // 0
233  PUNC_PERM, // 1
245 
247 };
248 
249 // ScriptPos tells whether a character is subscript, superscript or normal.
251 
252 const char *ScriptPosToString(ScriptPos script_pos);
253 
255 public:
256  static const float kBadRating;
257  static const char *permuter_name(uint8_t permuter);
258 
259  WERD_CHOICE(const UNICHARSET *unicharset) : unicharset_(unicharset) {
260  this->init(8);
261  }
262  WERD_CHOICE(const UNICHARSET *unicharset, int reserved) : unicharset_(unicharset) {
263  this->init(reserved);
264  }
265  WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating,
266  float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
267  : unicharset_(&unicharset) {
268  this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter);
269  }
270  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
271  WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) {
272  this->init(word.length());
273  this->operator=(word);
274  }
276 
277  const UNICHARSET *unicharset() const {
278  return unicharset_;
279  }
280  bool empty() const {
281  return length_ == 0;
282  }
283  inline unsigned length() const {
284  return length_;
285  }
286  float adjust_factor() const {
287  return adjust_factor_;
288  }
289  void set_adjust_factor(float factor) {
290  adjust_factor_ = factor;
291  }
292  inline const std::vector<UNICHAR_ID> &unichar_ids() const {
293  return unichar_ids_;
294  }
295  inline UNICHAR_ID unichar_id(unsigned index) const {
296  assert(index < length_);
297  return unichar_ids_[index];
298  }
299  inline unsigned state(unsigned index) const {
300  return state_[index];
301  }
302  ScriptPos BlobPosition(unsigned index) const {
303  if (index >= length_) {
304  return SP_NORMAL;
305  }
306  return script_pos_[index];
307  }
308  inline float rating() const {
309  return rating_;
310  }
311  inline float certainty() const {
312  return certainty_;
313  }
314  inline float certainty(unsigned index) const {
315  return certainties_[index];
316  }
317  inline float min_x_height() const {
318  return min_x_height_;
319  }
320  inline float max_x_height() const {
321  return max_x_height_;
322  }
323  inline void set_x_heights(float min_height, float max_height) {
324  min_x_height_ = min_height;
325  max_x_height_ = max_height;
326  }
327  inline uint8_t permuter() const {
328  return permuter_;
329  }
330  const char *permuter_name() const;
331  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
332  // taken from the appropriate cell in the ratings MATRIX.
333  // Borrowed pointer, so do not delete.
334  BLOB_CHOICE_LIST *blob_choices(unsigned index, MATRIX *ratings) const;
335 
336  // Returns the MATRIX_COORD corresponding to the location in the ratings
337  // MATRIX for the given index into the word.
338  MATRIX_COORD MatrixCoord(unsigned index) const;
339 
340  inline void set_unichar_id(UNICHAR_ID unichar_id, unsigned index) {
341  assert(index < length_);
342  unichar_ids_[index] = unichar_id;
343  }
344  bool dangerous_ambig_found() const {
345  return dangerous_ambig_found_;
346  }
347  void set_dangerous_ambig_found_(bool value) {
348  dangerous_ambig_found_ = value;
349  }
350  inline void set_rating(float new_val) {
351  rating_ = new_val;
352  }
353  inline void set_certainty(float new_val) {
354  certainty_ = new_val;
355  }
356  inline void set_permuter(uint8_t perm) {
357  permuter_ = perm;
358  }
359  // Note: this function should only be used if all the fields
360  // are populated manually with set_* functions (rather than
361  // (copy)constructors and append_* functions).
362  inline void set_length(unsigned len) {
363  ASSERT_HOST(reserved_ >= len);
364  length_ = len;
365  }
366 
368  inline void double_the_size() {
369  if (reserved_ > 0) {
370  reserved_ *= 2;
371  } else {
372  reserved_ = 1;
373  }
374  unichar_ids_.resize(reserved_);
375  script_pos_.resize(reserved_);
376  state_.resize(reserved_);
377  certainties_.resize(reserved_);
378  }
379 
382  inline void init(unsigned reserved) {
383  reserved_ = reserved;
384  if (reserved > 0) {
385  unichar_ids_.resize(reserved);
386  script_pos_.resize(reserved);
387  state_.resize(reserved);
388  certainties_.resize(reserved);
389  } else {
390  unichar_ids_.clear();
391  script_pos_.clear();
392  state_.clear();
393  certainties_.clear();
394  }
395  length_ = 0;
396  adjust_factor_ = 1.0f;
397  rating_ = 0.0;
398  certainty_ = FLT_MAX;
399  min_x_height_ = 0.0f;
400  max_x_height_ = FLT_MAX;
401  permuter_ = NO_PERM;
402  unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
403  dangerous_ambig_found_ = false;
404  }
405 
411  void init(const char *src_string, const char *src_lengths, float src_rating, float src_certainty,
412  uint8_t src_permuter);
413 
415  inline void make_bad() {
416  length_ = 0;
417  rating_ = kBadRating;
418  certainty_ = -FLT_MAX;
419  }
420 
424  inline void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating,
425  float certainty) {
426  assert(reserved_ > length_);
427  length_++;
428  this->set_unichar_id(unichar_id, blob_count, rating, certainty, length_ - 1);
429  }
430 
431  void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty);
432 
433  inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty,
434  unsigned index) {
435  assert(index < length_);
436  unichar_ids_[index] = unichar_id;
437  state_[index] = blob_count;
438  certainties_[index] = certainty;
439  script_pos_[index] = SP_NORMAL;
440  rating_ += rating;
441  if (certainty < certainty_) {
442  certainty_ = certainty;
443  }
444  }
445  // Sets the entries for the given index from the BLOB_CHOICE, assuming
446  // unit fragment lengths, but setting the state for this index to blob_count.
447  void set_blob_choice(unsigned index, int blob_count, const BLOB_CHOICE *blob_choice);
448 
449  bool contains_unichar_id(UNICHAR_ID unichar_id) const;
450  void remove_unichar_ids(unsigned index, int num);
451  inline void remove_last_unichar_id() {
452  --length_;
453  }
454  inline void remove_unichar_id(unsigned index) {
455  this->remove_unichar_ids(index, 1);
456  }
457  bool has_rtl_unichar_id() const;
458  void reverse_and_mirror_unichar_ids();
459 
460  // Returns the half-open interval of unichar_id indices [start, end) which
461  // enclose the core portion of this word -- the part after stripping
462  // punctuation from the left and right.
463  void punct_stripped(unsigned *start_core, unsigned *end_core) const;
464 
465  // Returns the indices [start, end) containing the core of the word, stripped
466  // of any superscript digits on either side. (i.e., the non-footnote part
467  // of the word). There is no guarantee that the output range is non-empty.
468  void GetNonSuperscriptSpan(int *start, int *end) const;
469 
470  // Return a copy of this WERD_CHOICE with the choices [start, end).
471  // The result is useful only for checking against a dictionary.
472  WERD_CHOICE shallow_copy(unsigned start, unsigned end) const;
473 
474  void string_and_lengths(std::string *word_str, std::string *word_lengths_str) const;
475  std::string debug_string() const {
476  std::string word_str;
477  for (unsigned i = 0; i < length_; ++i) {
478  word_str += unicharset_->debug_str(unichar_ids_[i]);
479  word_str += " ";
480  }
481  return word_str;
482  }
483  // Returns true if any unichar_id in the word is a non-space-delimited char.
485  for (unsigned i = 0; i < length_; ++i) {
486  if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
487  return true;
488  }
489  }
490  return false;
491  }
492  // Returns true if the word is all spaces.
493  bool IsAllSpaces() const {
494  for (unsigned i = 0; i < length_; ++i) {
495  if (unichar_ids_[i] != UNICHAR_SPACE) {
496  return false;
497  }
498  }
499  return true;
500  }
501 
502  // Call this to override the default (strict left to right graphemes)
503  // with the fact that some engine produces a "reading order" set of
504  // Graphemes for each word.
505  bool set_unichars_in_script_order(bool in_script_order) {
506  return unichars_in_script_order_ = in_script_order;
507  }
508 
510  return unichars_in_script_order_;
511  }
512 
513  // Returns a UTF-8 string equivalent to the current choice
514  // of UNICHAR IDs.
515  std::string &unichar_string() {
516  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
517  return unichar_string_;
518  }
519 
520  // Returns a UTF-8 string equivalent to the current choice
521  // of UNICHAR IDs.
522  const std::string &unichar_string() const {
523  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
524  return unichar_string_;
525  }
526 
527  // Returns the lengths, one byte each, representing the number of bytes
528  // required in the unichar_string for each UNICHAR_ID.
529  const std::string &unichar_lengths() const {
530  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
531  return unichar_lengths_;
532  }
533 
534  // Sets up the script_pos_ member using the blobs_list to get the bln
535  // bounding boxes, *this to get the unichars, and this->unicharset
536  // to get the target positions. If small_caps is true, sub/super are not
537  // considered, but dropcaps are.
538  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
539  void SetScriptPositions(bool small_caps, TWERD *word, int debug = 0);
540  // Sets all the script_pos_ positions to the given position.
541  void SetAllScriptPositions(ScriptPos position);
542 
543  static ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset,
544  const TBOX &blob_box, UNICHAR_ID unichar_id);
545 
546  // Returns the "dominant" script ID for the word. By "dominant", the script
547  // must account for at least half the characters. Otherwise, it returns 0.
548  // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
549  int GetTopScriptID() const;
550 
551  // Fixes the state_ for a chop at the given blob_posiiton.
552  void UpdateStateForSplit(int blob_position);
553 
554  // Returns the sum of all the state elements, being the total number of blobs.
555  unsigned TotalOfStates() const;
556 
557  void print() const {
558  this->print("");
559  }
560  void print(const char *msg) const;
561  // Prints the segmentation state with an introductory message.
562  void print_state(const char *msg) const;
563 
564  // Displays the segmentation state of *this (if not the same as the last
565  // one displayed) and waits for a click in the window.
566  void DisplaySegmentation(TWERD *word);
567 
568  WERD_CHOICE &operator+=( // concatanate
569  const WERD_CHOICE &second); // second on first
570 
571  WERD_CHOICE &operator=(const WERD_CHOICE &source);
572 
573 private:
574  const UNICHARSET *unicharset_;
575  // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
576  // unichar_ids_ is an array of classifier "results" that make up a word.
577  // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
578  // of each unichar_id.
579  // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
580  // were put together to make the classification results in the ith position
581  // in unichar_ids_, and certainties_[i] is the certainty of the choice that
582  // was used in this word.
583  // == Change from before ==
584  // Previously there was fragment_lengths_ that allowed a word to be
585  // artificially composed of multiple fragment results. Since the new
586  // segmentation search doesn't do fragments, treatment of fragments has
587  // been moved to a lower level, augmenting the ratings matrix with the
588  // combined fragments, and allowing the language-model/segmentation-search
589  // to deal with only the combined unichar_ids.
590  std::vector<UNICHAR_ID> unichar_ids_; // unichar ids that represent the text of the word
591  std::vector<ScriptPos> script_pos_; // Normal/Sub/Superscript of each unichar.
592  std::vector<int> state_; // Number of blobs in each unichar.
593  std::vector<float> certainties_; // Certainty of each unichar.
594  unsigned reserved_; // size of the above arrays
595  unsigned length_; // word length
596  // Factor that was used to adjust the rating.
597  float adjust_factor_;
598  // Rating is the sum of the ratings of the individual blobs in the word.
599  float rating_; // size related
600  // certainty is the min (worst) certainty of the individual blobs in the word.
601  float certainty_; // absolute
602  // xheight computed from the result, or 0 if inconsistent.
603  float min_x_height_;
604  float max_x_height_;
605  uint8_t permuter_; // permuter code
606 
607  // Normally, the ratings_ matrix represents the recognition results in order
608  // from left-to-right. However, some engines (say Cube) may return
609  // recognition results in the order of the script's major reading direction
610  // (for Arabic, that is right-to-left).
611  bool unichars_in_script_order_;
612  // True if NoDangerousAmbig found an ambiguity.
613  bool dangerous_ambig_found_;
614 
615  // The following variables are populated and passed by reference any
616  // time unichar_string() or unichar_lengths() are called.
617  mutable std::string unichar_string_;
618  mutable std::string unichar_lengths_;
619 };
620 
621 // Make WERD_CHOICE listable.
622 ELISTIZEH(WERD_CHOICE)
623 using BLOB_CHOICE_LIST_VECTOR = std::vector<BLOB_CHOICE_LIST *>;
624 
625 // Utilities for comparing WERD_CHOICEs
626 
627 bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2);
628 
629 // Utilities for debug printing.
630 void print_ratings_list(const char *msg, // intro message
631  BLOB_CHOICE_LIST *ratings, // list of results
632  const UNICHARSET &current_unicharset // unicharset that can be used
633  // for id-to-unichar conversion
634 );
635 
636 } // namespace tesseract
637 
638 #endif
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:803
#define ASSERT_HOST(x)
Definition: errcode.h:59
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ SP_SUBSCRIPT
Definition: ratngs.h:250
@ SP_DROPCAP
Definition: ratngs.h:250
@ SP_NORMAL
Definition: ratngs.h:250
@ SP_SUPERSCRIPT
Definition: ratngs.h:250
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:372
int UNICHAR_ID
Definition: unichar.h:36
@ UNICHAR_SPACE
Definition: unicharset.h:36
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:193
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:177
BlobChoiceClassifier
Definition: ratngs.h:48
@ BCC_AMBIG
Definition: ratngs.h:52
@ BCC_FAKE
Definition: ratngs.h:53
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:51
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:50
PermuterType
Definition: ratngs.h:231
@ UPPER_CASE_PERM
Definition: ratngs.h:236
@ NGRAM_PERM
Definition: ratngs.h:237
@ LOWER_CASE_PERM
Definition: ratngs.h:235
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:240
@ TOP_CHOICE_PERM
Definition: ratngs.h:234
@ NUMBER_PERM
Definition: ratngs.h:238
@ COMPOUND_PERM
Definition: ratngs.h:244
@ NO_PERM
Definition: ratngs.h:232
@ NUM_PERMUTER_TYPES
Definition: ratngs.h:246
@ PUNC_PERM
Definition: ratngs.h:233
@ USER_DAWG_PERM
Definition: ratngs.h:242
@ USER_PATTERN_PERM
Definition: ratngs.h:239
@ DOC_DAWG_PERM
Definition: ratngs.h:241
@ FREQ_DAWG_PERM
Definition: ratngs.h:243
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
Definition: ratngs.cpp:773
std::vector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
Definition: ratngs.h:623
int16_t fontinfo_id2() const
Definition: ratngs.h:93
void set_certainty(float newrat)
Definition: ratngs.h:150
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
float certainty() const
Definition: ratngs.h:87
UNICHAR_ID unichar_id() const
Definition: ratngs.h:81
int script_id() const
Definition: ratngs.h:118
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:160
void print_full() const
Definition: ratngs.h:176
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:172
void set_matrix_cell(int col, int row)
Definition: ratngs.h:156
void set_fonts(const std::vector< ScoredFont > &fonts)
Definition: ratngs.h:100
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:182
const std::vector< ScoredFont > & fonts() const
Definition: ratngs.h:97
void set_script(int newscript_id)
Definition: ratngs.h:153
float min_xheight() const
Definition: ratngs.h:124
float yshift() const
Definition: ratngs.h:130
float max_xheight() const
Definition: ratngs.h:127
BlobChoiceClassifier classifier() const
Definition: ratngs.h:133
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:163
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:121
bool IsClassified() const
Definition: ratngs.h:139
bool IsAdapted() const
Definition: ratngs.h:136
int16_t fontinfo_id() const
Definition: ratngs.h:90
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:152
void set_rating(float newrat)
Definition: ratngs.h:147
float rating() const
Definition: ratngs.h:84
float max_x_height() const
Definition: ratngs.h:320
std::string debug_string() const
Definition: ratngs.h:475
float certainty() const
Definition: ratngs.h:311
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:505
float certainty(unsigned index) const
Definition: ratngs.h:314
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:347
void remove_unichar_id(unsigned index)
Definition: ratngs.h:454
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:323
void set_unichar_id(UNICHAR_ID unichar_id, unsigned index)
Definition: ratngs.h:340
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:259
bool unichars_in_script_order() const
Definition: ratngs.h:509
const std::vector< UNICHAR_ID > & unichar_ids() const
Definition: ratngs.h:292
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:295
bool empty() const
Definition: ratngs.h:280
uint8_t permuter() const
Definition: ratngs.h:327
static const float kBadRating
Definition: ratngs.h:256
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:262
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:415
bool dangerous_ambig_found() const
Definition: ratngs.h:344
const std::string & unichar_string() const
Definition: ratngs.h:522
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:265
void set_certainty(float new_val)
Definition: ratngs.h:353
void set_length(unsigned len)
Definition: ratngs.h:362
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:271
bool ContainsAnyNonSpaceDelimited() const
Definition: ratngs.h:484
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:424
void init(unsigned reserved)
Definition: ratngs.h:382
unsigned state(unsigned index) const
Definition: ratngs.h:299
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:368
void set_permuter(uint8_t perm)
Definition: ratngs.h:356
const std::string & unichar_lengths() const
Definition: ratngs.h:529
float min_x_height() const
Definition: ratngs.h:317
const UNICHARSET * unicharset() const
Definition: ratngs.h:277
unsigned length() const
Definition: ratngs.h:283
void remove_last_unichar_id()
Definition: ratngs.h:451
void set_adjust_factor(float factor)
Definition: ratngs.h:289
void print() const
Definition: ratngs.h:557
float rating() const
Definition: ratngs.h:308
float adjust_factor() const
Definition: ratngs.h:286
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, unsigned index)
Definition: ratngs.h:433
std::string & unichar_string()
Definition: ratngs.h:515
void set_rating(float new_val)
Definition: ratngs.h:350
ScriptPos BlobPosition(unsigned index) const
Definition: ratngs.h:302
bool IsAllSpaces() const
Definition: ratngs.h:493
std::string debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:331
#define TESS_API
Definition: export.h:34