tesseract  5.0.0
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
 ~Classify () override
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
int GetFontinfoId (ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, std::vector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
ADAPT_TEMPLATES_STRUCTReadAdaptedTemplates (TFile *File)
 
void ConvertProto (PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class)
 
INT_TEMPLATES_STRUCTCreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS_STRUCT *Class, ADAPT_TEMPLATES_STRUCT *Templates)
 
void AmbigClassifier (const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES_STRUCT *templates, ADAPT_CLASS_STRUCT **classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES_STRUCT *templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS_STRUCT **classes, int debug, int matcher_multiplier, const TBOX &blob_box, const std::vector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS_STRUCT **classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS_STRUCT *IClass, ADAPT_CLASS_STRUCT *Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
std::string ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, std::vector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES_STRUCT *adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES_STRUCT *templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG_STRUCT *config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES_STRUCTReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const char *filename)
 
 BOOL_VAR_H (allow_blob_division)
 
 BOOL_VAR_H (prioritize_division)
 
 BOOL_VAR_H (classify_enable_learning)
 
 INT_VAR_H (classify_debug_level)
 
 INT_VAR_H (classify_norm_method)
 
 double_VAR_H (classify_char_norm_range)
 
 double_VAR_H (classify_max_rating_ratio)
 
 double_VAR_H (classify_max_certainty_margin)
 
 BOOL_VAR_H (tess_cn_matching)
 
 BOOL_VAR_H (tess_bn_matching)
 
 BOOL_VAR_H (classify_enable_adaptive_matcher)
 
 BOOL_VAR_H (classify_use_pre_adapted_templates)
 
 BOOL_VAR_H (classify_save_adapted_templates)
 
 BOOL_VAR_H (classify_enable_adaptive_debugger)
 
 BOOL_VAR_H (classify_nonlinear_norm)
 
 INT_VAR_H (matcher_debug_level)
 
 INT_VAR_H (matcher_debug_flags)
 
 INT_VAR_H (classify_learning_debug_level)
 
 double_VAR_H (matcher_good_threshold)
 
 double_VAR_H (matcher_reliable_adaptive_result)
 
 double_VAR_H (matcher_perfect_threshold)
 
 double_VAR_H (matcher_bad_match_pad)
 
 double_VAR_H (matcher_rating_margin)
 
 double_VAR_H (matcher_avg_noise_size)
 
 INT_VAR_H (matcher_permanent_classes_min)
 
 INT_VAR_H (matcher_min_examples_for_prototyping)
 
 INT_VAR_H (matcher_sufficient_examples_for_prototyping)
 
 double_VAR_H (matcher_clustering_max_angle_delta)
 
 double_VAR_H (classify_misfit_junk_penalty)
 
 double_VAR_H (rating_scale)
 
 double_VAR_H (certainty_scale)
 
 double_VAR_H (tessedit_class_miss_scale)
 
 double_VAR_H (classify_adapted_pruning_factor)
 
 double_VAR_H (classify_adapted_pruning_threshold)
 
 INT_VAR_H (classify_adapt_proto_threshold)
 
 INT_VAR_H (classify_adapt_feature_threshold)
 
 BOOL_VAR_H (disable_character_fragments)
 
 double_VAR_H (classify_character_fragments_garbage_certainty_threshold)
 
 BOOL_VAR_H (classify_debug_character_fragments)
 
 BOOL_VAR_H (matcher_debug_separate_windows)
 
 STRING_VAR_H (classify_learn_debug_str)
 
 INT_VAR_H (classify_class_pruner_threshold)
 
 INT_VAR_H (classify_class_pruner_multiplier)
 
 INT_VAR_H (classify_cp_cutoff_strength)
 
 INT_VAR_H (classify_integer_matcher_multiplier)
 
 BOOL_VAR_H (classify_bln_numeric_mode)
 
 double_VAR_H (speckle_large_max_size)
 
 double_VAR_H (speckle_rating_penalty)
 
NormEvidenceOf

Return the new type of evidence number corresponding to this normalization adjustment. The equation that represents the transform is: 1 / (1 + (NormAdj / midpoint) ^ curl)

float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
 ~CCStruct () override
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const std::string &argv0, const std::string &basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 
 INT_VAR_H (ambigs_debug_level)
 
 BOOL_VAR_H (use_ambigs_for_adaption)
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, std::vector< INT_FEATURE_STRUCT > *bl_features, std::vector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, std::vector< int > *outline_cn_counts)
 

Public Attributes

INT_TEMPLATES_STRUCTPreTrainedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTAdaptedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTBackupAdaptedTemplates = nullptr
 
BIT_VECTOR AllProtosOn = nullptr
 
BIT_VECTOR AllConfigsOn = nullptr
 
BIT_VECTOR AllConfigsOff = nullptr
 
BIT_VECTOR TempProtoMask = nullptr
 
NORM_PROTOSNormProtos = nullptr
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
bool EnableLearning = true
 
- Public Attributes inherited from tesseract::CCUtil
std::string datadir
 
std::string imagebasename
 
std::string lang
 
std::string language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
std::string imagefile
 
std::string directory
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_ = nullptr
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 94 of file classify.h.

Constructor & Destructor Documentation

◆ Classify()

tesseract::Classify::Classify ( )

Definition at line 60 of file classify.cpp.

61  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping", this->params())
62  , BOOL_MEMBER(prioritize_division, false, "Prioritize blob division over chopping",
63  this->params())
64  , BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier", this->params())
65  , INT_MEMBER(classify_debug_level, 0, "Classify debug level", this->params())
66  , INT_MEMBER(classify_norm_method, character, "Normalization Method ...", this->params())
67  , double_MEMBER(classify_char_norm_range, 0.2, "Character Normalization Range ...",
68  this->params())
69  , double_MEMBER(classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings",
70  this->params())
71  , double_MEMBER(classify_max_certainty_margin, 5.5,
72  "Veto difference between classifier certainties", this->params())
73  , BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", this->params())
74  , BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", this->params())
75  , BOOL_MEMBER(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier", this->params())
76  , BOOL_MEMBER(classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates",
77  this->params())
78  , BOOL_MEMBER(classify_save_adapted_templates, 0, "Save adapted templates to a file",
79  this->params())
80  , BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", this->params())
81  , BOOL_MEMBER(classify_nonlinear_norm, 0, "Non-linear stroke-density normalization",
82  this->params())
83  , INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params())
84  , INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params())
85  , INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", this->params())
86  , double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)", this->params())
87  , double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)", this->params())
88  , double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)", this->params())
89  , double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)", this->params())
90  , double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)", this->params())
91  , double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length", this->params())
92  , INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes", this->params())
93  , INT_MEMBER(matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold",
94  this->params())
95  , INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
96  "Enable adaption even if the ambiguities have not been seen", this->params())
97  , double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
98  "Maximum angle delta for prototype clustering", this->params())
99  , double_MEMBER(classify_misfit_junk_penalty, 0.0,
100  "Penalty to apply when a non-alnum is vertically out of "
101  "its expected textline position",
102  this->params())
103  , double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params())
104  , double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor", this->params())
105  , double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used",
106  this->params())
107  , double_MEMBER(classify_adapted_pruning_factor, 2.5,
108  "Prune poor adapted results this much worse than best result", this->params())
109  , double_MEMBER(classify_adapted_pruning_threshold, -1.0,
110  "Threshold at which classify_adapted_pruning_factor starts", this->params())
111  , INT_MEMBER(classify_adapt_proto_threshold, 230,
112  "Threshold for good protos during adaptive 0-255", this->params())
113  , INT_MEMBER(classify_adapt_feature_threshold, 230,
114  "Threshold for good features during adaptive 0-255", this->params())
115  , BOOL_MEMBER(disable_character_fragments, true,
116  "Do not include character fragments in the"
117  " results of the classifier",
118  this->params())
119  , double_MEMBER(classify_character_fragments_garbage_certainty_threshold, -3.0,
120  "Exclude fragments that do not look like whole"
121  " characters from training and adaption",
122  this->params())
123  , BOOL_MEMBER(classify_debug_character_fragments, false,
124  "Bring up graphical debugging windows for fragments training", this->params())
125  , BOOL_MEMBER(matcher_debug_separate_windows, false,
126  "Use two different windows for debugging the matching: "
127  "One for the protos and one for the features.",
128  this->params())
129  , STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning", this->params())
130  , INT_MEMBER(classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255",
131  this->params())
132  , INT_MEMBER(classify_class_pruner_multiplier, 15,
133  "Class Pruner Multiplier 0-255: ", this->params())
134  , INT_MEMBER(classify_cp_cutoff_strength, 7,
135  "Class Pruner CutoffStrength: ", this->params())
136  , INT_MEMBER(classify_integer_matcher_multiplier, 10,
137  "Integer Matcher Multiplier 0-255: ", this->params())
138  , BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].",
139  this->params())
140  , double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", this->params())
141  , double_MEMBER(speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise",
142  this->params())
143  , im_(&classify_debug_level)
144  , dict_(this) {
145  using namespace std::placeholders; // for _1, _2
146  fontinfo_table_.set_clear_callback(std::bind(FontInfoDeleteCallback, _1));
147 
149 }
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:368
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:374
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:372
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:370
#define classify_enable_adaptive_matcher
Definition: adaptmatch.cpp:78
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:129
@ character
Definition: mfoutline.h:53
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:87
ParamsVectors * params()
Definition: ccutil.h:53
IntegerMatcher im_
Definition: classify.h:446
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:447
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:435

◆ ~Classify()

tesseract::Classify::~Classify ( )
override

Definition at line 151 of file classify.cpp.

151  {
153 #ifndef GRAPHICS_DISABLED
154  delete learn_debug_win_;
155  delete learn_fragmented_word_debug_win_;
156  delete learn_fragments_debug_win_;
157 #endif
158 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:464

Member Function Documentation

◆ AdaptableWord()

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return true if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
true or false

Definition at line 811 of file adaptmatch.cpp.

811  {
812  if (word->best_choice == nullptr) {
813  return false;
814  }
815  auto BestChoiceLength = word->best_choice->length();
816  float adaptable_score = getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT;
817  return // rules that apply in general - simplest to compute first
818  BestChoiceLength > 0 && BestChoiceLength == word->rebuild_word->NumBlobs() &&
819  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
820  // This basically ensures that the word is at least a dictionary match
821  // (freq word, user word, system dawg word, etc).
822  // Since all the other adjustments will make adjust factor higher
823  // than higher than adaptable_score=1.1+0.05=1.15
824  // Since these are other flags that ensure that the word is dict word,
825  // this check could be at times redundant.
826  word->best_choice->adjust_factor() <= adaptable_score &&
827  // Make sure that alternative choices are not dictionary words.
828  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
829 }
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:85
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:87
virtual Dict & getDict()
Definition: classify.h:98

◆ AdaptiveClassifier()

void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings there from. Also contains the detailed results of the integer matcher.

Definition at line 202 of file adaptmatch.cpp.

202  {
203  assert(Choices != nullptr);
204  auto *Results = new ADAPT_RESULTS;
205  Results->Initialize();
206 
207  ASSERT_HOST(AdaptedTemplates != nullptr);
208 
209  DoAdaptiveMatch(Blob, Results);
210 
211  RemoveBadMatches(Results);
212  std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
213  RemoveExtraPuncs(Results);
214  Results->ComputeBest();
215  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results, Choices);
216 
217  // TODO(rays) Move to before ConvertMatchesToChoices!
218  if (LargeSpeckle(*Blob) || Choices->empty()) {
219  AddLargeSpeckleTo(Results->BlobLength, Choices);
220  }
221 
222  if (matcher_debug_level >= 1) {
223  tprintf("AD Matches = ");
224  PrintAdaptiveMatchResults(*Results);
225  }
226 
227 #ifndef GRAPHICS_DISABLED
228  if (classify_enable_adaptive_debugger) {
229  DebugAdaptiveClassifier(Blob, Results);
230  }
231 #endif
232 
233  delete Results;
234 } /* AdaptiveClassifier */
#define ASSERT_HOST(x)
Definition: errcode.h:59
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void RemoveBadMatches(ADAPT_RESULTS *Results)
ADAPT_TEMPLATES_STRUCT * AdaptedTemplates
Definition: classify.h:421
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:191
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:170
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)

◆ AdaptiveClassifierIsEmpty()

bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 268 of file classify.h.

268  {
269  return AdaptedTemplates->NumPermClasses == 0;
270  }

◆ AdaptiveClassifierIsFull()

bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 265 of file classify.h.

265  {
266  return NumAdaptationsFailed > 0;
267  }

◆ AdaptToChar()

void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
float  Threshold,
ADAPT_TEMPLATES_STRUCT adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs

Definition at line 843 of file adaptmatch.cpp.

844  {
845  int NumFeatures;
846  INT_FEATURE_ARRAY IntFeatures;
847  UnicharRating int_result;
848  INT_CLASS_STRUCT *IClass;
849  ADAPT_CLASS_STRUCT *Class;
850  TEMP_CONFIG_STRUCT *TempConfig;
851  FEATURE_SET FloatFeatures;
852  int NewTempConfigId;
853 
854  if (!LegalClassId(ClassId)) {
855  return;
856  }
857 
858  int_result.unichar_id = ClassId;
859  Class = adaptive_templates->Class[ClassId];
860  assert(Class != nullptr);
861  if (IsEmptyAdaptedClass(Class)) {
862  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
863  } else {
864  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
865 
866  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
867  if (NumFeatures <= 0) {
868  return; // Features already freed by GetAdaptiveFeatures.
869  }
870 
871  // Only match configs with the matching font.
872  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
873  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
874  if (GetFontinfoId(Class, cfg) == FontinfoId) {
875  SET_BIT(MatchingFontConfigs, cfg);
876  } else {
877  reset_bit(MatchingFontConfigs, cfg);
878  }
879  }
880  im_.Match(IClass, AllProtosOn, MatchingFontConfigs, NumFeatures, IntFeatures, &int_result,
881  classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows);
882  FreeBitVector(MatchingFontConfigs);
883 
884  SetAdaptiveThreshold(Threshold);
885 
886  if (1.0f - int_result.rating <= Threshold) {
887  if (ConfigIsPermanent(Class, int_result.config)) {
888  if (classify_learning_debug_level >= 1) {
889  tprintf("Found good match to perm config %d = %4.1f%%.\n", int_result.config,
890  int_result.rating * 100.0);
891  }
892  delete FloatFeatures;
893  return;
894  }
895 
896  TempConfig = TempConfigFor(Class, int_result.config);
897  IncreaseConfidence(TempConfig);
898  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
899  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
900  }
901  if (classify_learning_debug_level >= 1) {
902  tprintf("Increasing reliability of temp config %d to %d.\n", int_result.config,
903  TempConfig->NumTimesSeen);
904  }
905 
906  if (TempConfigReliable(ClassId, TempConfig)) {
907  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
908  UpdateAmbigsGroup(ClassId, Blob);
909  }
910  } else {
911  if (classify_learning_debug_level >= 1) {
912  tprintf("Found poor match to temp config %d = %4.1f%%.\n", int_result.config,
913  int_result.rating * 100.0);
914 #ifndef GRAPHICS_DISABLED
915  if (classify_learning_debug_level > 2) {
916  DisplayAdaptedChar(Blob, IClass);
917  }
918 #endif
919  }
920  NewTempConfigId = MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId, NumFeatures,
921  IntFeatures, FloatFeatures);
922  if (NewTempConfigId >= 0 &&
923  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
924  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
925  UpdateAmbigsGroup(ClassId, Blob);
926  }
927 
928 #ifndef GRAPHICS_DISABLED
929  if (classify_learning_debug_level > 1) {
930  DisplayAdaptedChar(Blob, IClass);
931  }
932 #endif
933  }
934  delete FloatFeatures;
935  }
936 } /* AdaptToChar */
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:83
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:85
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:95
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:91
#define NO_DEBUG
Definition: adaptmatch.cpp:84
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define ClassForClassId(T, c)
Definition: intproto.h:156
#define LegalClassId(c)
Definition: intproto.h:154
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
#define reset_bit(array, bit)
Definition: bitvec.h:57
#define SET_BIT(array, bit)
Definition: bitvec.h:55
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:87
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:137
BIT_VECTOR AllProtosOn
Definition: classify.h:428
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:940
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:778
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS_STRUCT *Class, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptmatch.cpp:686
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG_STRUCT *config)
int MakeNewTemporaryConfig(ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
void SetAdaptiveThreshold(float Threshold)
int GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
Definition: adaptive.cpp:118
void MakePermanent(ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:482

◆ AddLargeSpeckleTo()

void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 170 of file classify.cpp.

170  {
171  BLOB_CHOICE_IT bc_it(choices);
172  // If there is no classifier result, we will use the worst possible certainty
173  // and corresponding rating.
174  float certainty = -getDict().certainty_scale;
175  float rating = rating_scale * blob_length;
176  if (!choices->empty() && blob_length > 0) {
177  bc_it.move_to_last();
178  BLOB_CHOICE *worst_choice = bc_it.data();
179  // Add speckle_rating_penalty to worst rating, matching old value.
180  rating = worst_choice->rating() + speckle_rating_penalty;
181  // Compute the rating to correspond to the certainty. (Used to be kept
182  // the same, but that messes up the language model search.)
183  certainty = -rating * getDict().certainty_scale / (rating_scale * blob_length);
184  }
185  auto *blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, -1, 0.0f, FLT_MAX, 0,
187  bc_it.add_to_end(blob_choice);
188 }
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:51

◆ AddNewResult()

void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

  • #matcher_bad_match_pad defines limits of an acceptable match
Parameters
new_resultnew result to add
[out]resultsresults to add new result to

Definition at line 986 of file adaptmatch.cpp.

986  {
987  auto old_match = FindScoredUnichar(new_result.unichar_id, *results);
988 
989  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
990  (old_match < results->match.size() &&
991  new_result.rating <= results->match[old_match].rating)) {
992  return; // New one not good enough.
993  }
994 
995  if (!unicharset.get_fragment(new_result.unichar_id)) {
996  results->HasNonfragment = true;
997  }
998 
999  if (old_match < results->match.size()) {
1000  results->match[old_match].rating = new_result.rating;
1001  } else {
1002  results->match.push_back(new_result);
1003  }
1004 
1005  if (new_result.rating > results->best_rating &&
1006  // Ensure that fragments do not affect best rating, class and config.
1007  // This is needed so that at least one non-fragmented character is
1008  // always present in the results.
1009  // TODO(daria): verify that this helps accuracy and does not
1010  // hurt performance.
1011  !unicharset.get_fragment(new_result.unichar_id)) {
1012  results->best_match_index = old_match;
1013  results->best_rating = new_result.rating;
1014  results->best_unichar_id = new_result.unichar_id;
1015  }
1016 } /* AddNewResult */
UNICHARSET unicharset
Definition: ccutil.h:61
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:769

◆ AmbigClassifier()

void tesseract::Classify::AmbigClassifier ( const std::vector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES_STRUCT templates,
ADAPT_CLASS_STRUCT **  classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info

Definition at line 1037 of file adaptmatch.cpp.

1040  {
1041  if (int_features.empty()) {
1042  return;
1043  }
1044  auto *CharNormArray = new uint8_t[unicharset.size()];
1045  UnicharRating int_result;
1046 
1047  results->BlobLength = GetCharNormFeature(fx_info, templates, nullptr, CharNormArray);
1048  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1049  if (debug) {
1050  tprintf("AM Matches = ");
1051  }
1052 
1053  int top = blob->bounding_box().top();
1054  int bottom = blob->bounding_box().bottom();
1055  while (*ambiguities >= 0) {
1056  CLASS_ID class_id = *ambiguities;
1057 
1058  int_result.unichar_id = class_id;
1059  im_.Match(ClassForClassId(templates, class_id), AllProtosOn, AllConfigsOn, int_features.size(),
1060  &int_features[0], &int_result, classify_adapt_feature_threshold, NO_DEBUG,
1061  matcher_debug_separate_windows);
1062 
1063  ExpandShapesAndApplyCorrections(nullptr, debug, class_id, bottom, top, 0, results->BlobLength,
1064  classify_integer_matcher_multiplier, CharNormArray, &int_result,
1065  results);
1066  ambiguities++;
1067  }
1068  delete[] CharNormArray;
1069 } /* AmbigClassifier */
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
size_t size() const
Definition: unicharset.h:355
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES_STRUCT *templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
void ExpandShapesAndApplyCorrections(ADAPT_CLASS_STRUCT **classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
BIT_VECTOR AllConfigsOn
Definition: classify.h:429

◆ BaselineClassifier()

UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const std::vector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES_STRUCT Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.

Definition at line 1224 of file adaptmatch.cpp.

1227  {
1228  if (int_features.empty()) {
1229  return nullptr;
1230  }
1231  auto *CharNormArray = new uint8_t[unicharset.size()];
1232  ClearCharNormArray(CharNormArray);
1233 
1234  Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
1235  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0], CharNormArray,
1236  BaselineCutoffs, &Results->CPResults);
1237 
1238  if (matcher_debug_level >= 2 || classify_debug_level > 1) {
1239  tprintf("BL Matches = ");
1240  }
1241 
1242  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], CharNormArray,
1243  Templates->Class, matcher_debug_flags, 0, Blob->bounding_box(), Results->CPResults,
1244  Results);
1245 
1246  delete[] CharNormArray;
1247  CLASS_ID ClassId = Results->best_unichar_id;
1248  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0) {
1249  return nullptr;
1250  }
1251 
1252  return Templates->Class[ClassId]
1253  ->Config[Results->match[Results->best_match_index].config]
1254  .Perm->Ambigs;
1255 } /* BaselineClassifier */
const double kStandardFeatureLength
Definition: intfx.h:44
int IntCastRounded(double x)
Definition: helpers.h:175
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, std::vector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:427
void ClearCharNormArray(uint8_t *char_norm_array)
Definition: float2int.cpp:41
void MasterMatcher(INT_TEMPLATES_STRUCT *templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS_STRUCT **classes, int debug, int matcher_multiplier, const TBOX &blob_box, const std::vector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)

◆ BOOL_VAR_H() [1/14]

tesseract::Classify::BOOL_VAR_H ( allow_blob_division  )

◆ BOOL_VAR_H() [2/14]

tesseract::Classify::BOOL_VAR_H ( classify_bln_numeric_mode  )

◆ BOOL_VAR_H() [3/14]

tesseract::Classify::BOOL_VAR_H ( classify_debug_character_fragments  )

◆ BOOL_VAR_H() [4/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_adaptive_debugger  )

◆ BOOL_VAR_H() [5/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_adaptive_matcher  )

◆ BOOL_VAR_H() [6/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_learning  )

◆ BOOL_VAR_H() [7/14]

tesseract::Classify::BOOL_VAR_H ( classify_nonlinear_norm  )

◆ BOOL_VAR_H() [8/14]

tesseract::Classify::BOOL_VAR_H ( classify_save_adapted_templates  )

◆ BOOL_VAR_H() [9/14]

tesseract::Classify::BOOL_VAR_H ( classify_use_pre_adapted_templates  )

◆ BOOL_VAR_H() [10/14]

tesseract::Classify::BOOL_VAR_H ( disable_character_fragments  )

◆ BOOL_VAR_H() [11/14]

tesseract::Classify::BOOL_VAR_H ( matcher_debug_separate_windows  )

◆ BOOL_VAR_H() [12/14]

tesseract::Classify::BOOL_VAR_H ( prioritize_division  )

◆ BOOL_VAR_H() [13/14]

tesseract::Classify::BOOL_VAR_H ( tess_bn_matching  )

◆ BOOL_VAR_H() [14/14]

tesseract::Classify::BOOL_VAR_H ( tess_cn_matching  )

◆ CharNormClassifier()

int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs

Definition at line 1273 of file adaptmatch.cpp.

1274  {
1275  // This is the length that is used for scaling ratings vs certainty.
1276  adapt_results->BlobLength = IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1277  std::vector<UnicharRating> unichar_results;
1278  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, -1, &unichar_results);
1279  // Convert results to the format used internally by AdaptiveClassifier.
1280  for (auto &r : unichar_results) {
1281  AddNewResult(r, adapt_results);
1282  }
1283  return sample.num_features();
1284 } /* CharNormClassifier */
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:986
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector< UnicharRating > *results)

◆ CharNormTrainingSample()

int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
std::vector< UnicharRating > *  results 
)

Definition at line 1288 of file adaptmatch.cpp.

1289  {
1290  results->clear();
1291  std::unique_ptr<ADAPT_RESULTS> adapt_results(new ADAPT_RESULTS());
1292  adapt_results->Initialize();
1293  // Compute the bounding box of the features.
1294  uint32_t num_features = sample.num_features();
1295  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1296  // fabricate right and left using top and bottom.
1297  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1298  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1299  // Compute the char_norm_array from the saved cn_feature.
1300  FEATURE norm_feature = sample.GetCNFeature();
1301  std::vector<uint8_t> char_norm_array(unicharset.size());
1302  auto num_pruner_classes = std::max(static_cast<unsigned>(unicharset.size()), PreTrainedTemplates->NumClasses);
1303  std::vector<uint8_t> pruner_norm_array(num_pruner_classes);
1304  adapt_results->BlobLength = static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5f);
1305  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, &char_norm_array[0], &pruner_norm_array[0]);
1306 
1307  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(), &pruner_norm_array[0],
1308  shape_table_ != nullptr ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1309  &adapt_results->CPResults);
1310  if (keep_this >= 0) {
1311  adapt_results->CPResults[0].Class = keep_this;
1312  adapt_results->CPResults.resize(1);
1313  }
1314  if (pruner_only) {
1315  // Convert pruner results to output format.
1316  for (auto &it : adapt_results->CPResults) {
1317  int class_id = it.Class;
1318  results->push_back(UnicharRating(class_id, 1.0f - it.Rating));
1319  }
1320  } else {
1321  MasterMatcher(PreTrainedTemplates, num_features, sample.features(), &char_norm_array[0], nullptr,
1322  matcher_debug_flags, classify_integer_matcher_multiplier, blob_box,
1323  adapt_results->CPResults, adapt_results.get());
1324  // Convert master matcher results to output format.
1325  for (auto &i : adapt_results->match) {
1326  results->push_back(i);
1327  }
1328  if (results->size() > 1) {
1329  std::sort(results->begin(), results->end(), SortDescendingRating);
1330  }
1331  }
1332  return num_features;
1333 } /* CharNormTrainingSample */
@ TBOX
FEATURE_STRUCT * FEATURE
Definition: ocrfeatures.h:68
@ GeoTop
Definition: picofeat.h:37
@ GeoBottom
Definition: picofeat.h:36
float ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:27
ShapeTable * shape_table_
Definition: classify.h:452
INT_TEMPLATES_STRUCT * PreTrainedTemplates
Definition: classify.h:420
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)

◆ ClassAndConfigIDToFontOrShapeID()

int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2109 of file adaptmatch.cpp.

2109  {
2110  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2111  // Older inttemps have no font_ids.
2112  if (font_set_id < 0) {
2113  return kBlankFontinfoId;
2114  }
2115  const FontSet &fs = fontset_table_.at(font_set_id);
2116  return fs.at(int_result_config);
2117 }
std::vector< int > FontSet
Definition: fontinfo.h:154
const T & at(int id) const
Return the object from an id.
Definition: unicity_table.h:56
UnicityTable< FontSet > fontset_table_
Definition: classify.h:443
INT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: intproto.h:111

◆ ClassIDToDebugStr()

std::string tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2096 of file adaptmatch.cpp.

2097  {
2098  std::string class_string;
2099  if (templates == PreTrainedTemplates && shape_table_ != nullptr) {
2100  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2101  class_string = shape_table_->DebugStr(shape_id);
2102  } else {
2103  class_string = unicharset.debug_str(class_id);
2104  }
2105  return class_string;
2106 }
std::string debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:331
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
std::string DebugStr(unsigned shape_id) const
Definition: shapetable.cpp:292

◆ ClassifyAsNoise()

void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob

Definition at line 1347 of file adaptmatch.cpp.

1347  {
1348  float rating = results->BlobLength / matcher_avg_noise_size;
1349  rating *= rating;
1350  rating /= 1 + rating;
1351 
1352  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1353 } /* ClassifyAsNoise */

◆ ClearCharNormArray()

void tesseract::Classify::ClearCharNormArray ( uint8_t *  char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared

Definition at line 41 of file float2int.cpp.

41  {
42  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
43 } /* ClearCharNormArray */

◆ ComputeCharNormArrays()

void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uint8_t *  char_norm_array,
uint8_t *  pruner_array 
)

Definition at line 1629 of file adaptmatch.cpp.

1630  {
1631  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1632  //if (pruner_array != nullptr) {
1633  if (shape_table_ == nullptr) {
1634  ComputeIntCharNormArray(*norm_feature, pruner_array);
1635  } else {
1636  memset(&pruner_array[0], UINT8_MAX, templates->NumClasses * sizeof(pruner_array[0]));
1637  // Each entry in the pruner norm array is the MIN of all the entries of
1638  // the corresponding unichars in the CharNormArray.
1639  for (unsigned id = 0; id < templates->NumClasses; ++id) {
1640  int font_set_id = templates->Class[id]->font_set_id;
1641  const FontSet &fs = fontset_table_.at(font_set_id);
1642  for (auto f : fs) {
1643  const Shape &shape = shape_table_->GetShape(f);
1644  for (int c = 0; c < shape.size(); ++c) {
1645  if (char_norm_array[shape[c].unichar_id] < pruner_array[id]) {
1646  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1647  }
1648  }
1649  }
1650  }
1651  }
1652  //}
1653  delete norm_feature;
1654 }
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
Definition: float2int.cpp:58
const Shape & GetShape(unsigned shape_id) const
Definition: shapetable.h:292

◆ ComputeCorrectedRating()

double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors 
)

Definition at line 1171 of file adaptmatch.cpp.

1174  {
1175  // Compute class feature corrections.
1176  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length, cn_factors[unichar_id],
1177  matcher_multiplier);
1178  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1179  double vertical_penalty = 0.0;
1180  // Penalize non-alnums for being vertical misfits.
1181  if (!unicharset.get_isalpha(unichar_id) && !unicharset.get_isdigit(unichar_id) &&
1182  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1183  int min_bottom, max_bottom, min_top, max_top;
1184  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
1185  if (debug) {
1186  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n", top, min_top, max_top, bottom,
1187  min_bottom, max_bottom);
1188  }
1189  if (top < min_top || top > max_top || bottom < min_bottom || bottom > max_bottom) {
1190  vertical_penalty = classify_misfit_junk_penalty;
1191  }
1192  }
1193  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1194  if (result < WORST_POSSIBLE_RATING) {
1195  result = WORST_POSSIBLE_RATING;
1196  }
1197  if (debug) {
1198  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1199  unicharset.id_to_unichar(unichar_id), result * 100.0, cp_rating * 100.0,
1200  (1.0 - im_rating) * 100.0, (cn_corrected - (1.0 - im_rating)) * 100.0,
1201  cn_factors[unichar_id], miss_penalty * 100.0, vertical_penalty * 100.0);
1202  }
1203  return result;
1204 }
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:91
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:497
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:586
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:524
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)

◆ ComputeIntCharNormArray()

void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uint8_t *  char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()

Definition at line 58 of file float2int.cpp.

59  {
60  for (unsigned i = 0; i < unicharset.size(); i++) {
61  if (i < PreTrainedTemplates->NumClasses) {
62  int norm_adjust =
63  static_cast<int>(INT_CHAR_NORM_RANGE * ComputeNormMatch(i, norm_feature, false));
64  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
65  } else {
66  // Classes with no templates (eg. ambigs & ligatures) default
67  // to worst match.
68  char_norm_array[i] = MAX_INT_CHAR_NORM;
69  }
70  }
71 } /* ComputeIntCharNormArray */
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:27
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:117
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:110
float ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
Definition: normmatch.cpp:94

◆ ComputeIntFeatures()

void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into

Definition at line 85 of file float2int.cpp.

85  {
86  float YShift;
87 
88  if (classify_norm_method == baseline) {
89  YShift = BASELINE_Y_SHIFT;
90  } else {
91  YShift = Y_SHIFT;
92  }
93 
94  for (int Fid = 0; Fid < Features->NumFeatures; Fid++) {
95  FEATURE Feature = Features->Features[Fid];
96 
97  IntFeatures[Fid].X = Bucket8For(Feature->Params[PicoFeatX], X_SHIFT, INT_FEAT_RANGE);
98  IntFeatures[Fid].Y = Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
99  IntFeatures[Fid].Theta =
101  IntFeatures[Fid].CP_misses = 0;
102  }
103 } /* ComputeIntFeatures */
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
#define INT_FEAT_RANGE
Definition: float2int.h:27
#define ANGLE_SHIFT
Definition: intproto.h:40
#define X_SHIFT
Definition: intproto.h:41
#define Y_SHIFT
Definition: intproto.h:42
uint8_t Bucket8For(float param, float offset, int num_buckets)
Definition: intproto.cpp:385
@ PicoFeatDir
Definition: picofeat.h:43
@ PicoFeatX
Definition: picofeat.h:43
@ PicoFeatY
Definition: picofeat.h:43
@ baseline
Definition: mfoutline.h:53
uint8_t CircBucketFor(float param, float offset, int num_buckets)
Definition: intproto.cpp:399

◆ ComputeNormMatch()

float tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
bool  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.

Definition at line 94 of file normmatch.cpp.

94  {
95  if (ClassId >= NormProtos->NumProtos) {
96  ClassId = NO_CLASS;
97  }
98 
99  /* handle requests for classification as noise */
100  if (ClassId == NO_CLASS) {
101  /* kludge - clean up constants and make into control knobs later */
102  float Match = (feature.Params[CharNormLength] * feature.Params[CharNormLength] * 500.0f +
103  feature.Params[CharNormRx] * feature.Params[CharNormRx] * 8000.0f +
104  feature.Params[CharNormRy] * feature.Params[CharNormRy] * 8000.0f);
105  return (1.0f - NormEvidenceOf(Match));
106  }
107 
108  float BestMatch = FLT_MAX;
109  LIST Protos = NormProtos->Protos[ClassId];
110 
111  if (DebugMatch) {
112  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
113  }
114 
115  int ProtoId = 0;
116  iterate(Protos) {
117  auto Proto = reinterpret_cast<PROTOTYPE *>(Protos->first_node());
118  float Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
119  float Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
120  if (DebugMatch) {
121  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", Proto->Mean[CharNormY], Delta,
122  Proto->Weight.Elliptical[CharNormY], Match);
123  }
124  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
125  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
126  if (DebugMatch) {
127  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n", Proto->Mean[CharNormRx], Delta,
128  Proto->Weight.Elliptical[CharNormRx], Match);
129  }
130  // Ry is width! See intfx.cpp.
131  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
132  if (DebugMatch) {
133  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n", Proto->Mean[CharNormRy], Delta,
134  Proto->Weight.Elliptical[CharNormRy]);
135  }
136  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
137  Delta *= kWidthErrorWeighting;
138  Match += Delta;
139  if (DebugMatch) {
140  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n", Match,
141  Match / classify_norm_adj_midpoint, NormEvidenceOf(Match),
142  256 * (1 - NormEvidenceOf(Match)));
143  }
144 
145  if (Match < BestMatch) {
146  BestMatch = Match;
147  }
148 
149  ProtoId++;
150  }
151  return 1.0 - NormEvidenceOf(BestMatch);
152 } /* ComputeNormMatch */
#define iterate(l)
Definition: oldlist.h:91
#define NO_CLASS
Definition: matchdefs.h:35
list_rec * LIST
Definition: oldlist.h:125
const double kWidthErrorWeighting
Definition: normmatch.cpp:76
double classify_norm_adj_midpoint
Definition: normmatch.cpp:73
@ CharNormLength
Definition: normfeat.h:30
@ CharNormRy
Definition: normfeat.h:30
@ CharNormY
Definition: normfeat.h:30
@ CharNormRx
Definition: normfeat.h:30
NORM_PROTOS * NormProtos
Definition: classify.h:433
std::vector< LIST > Protos
Definition: normmatch.cpp:41

◆ ConvertMatchesToChoices()

void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1361 of file adaptmatch.cpp.

1362  {
1363  assert(Choices != nullptr);
1364  float Rating;
1365  float Certainty;
1366  BLOB_CHOICE_IT temp_it;
1367  bool contains_nonfrag = false;
1368  temp_it.set_to_list(Choices);
1369  int choices_length = 0;
1370  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1371  // number of returned results, but with a shape_table_ we want to have room
1372  // for at least the biggest shape (which might contain hundreds of Indic
1373  // grapheme fragments) and more, so use double the size of the biggest shape
1374  // if that is more than the default.
1375  int max_matches = MAX_MATCHES;
1376  if (shape_table_ != nullptr) {
1377  max_matches = shape_table_->MaxNumUnichars() * 2;
1378  if (max_matches < MAX_MATCHES) {
1379  max_matches = MAX_MATCHES;
1380  }
1381  }
1382 
1383  float best_certainty = -FLT_MAX;
1384  for (auto &it : Results->match) {
1385  const UnicharRating &result = it;
1386  bool adapted = result.adapted;
1387  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr);
1388  if (temp_it.length() + 1 == max_matches && !contains_nonfrag && current_is_frag) {
1389  continue; // look for a non-fragmented character to fill the
1390  // last spot in Choices if only fragments are present
1391  }
1392  // BlobLength can never be legally 0, this means recognition failed.
1393  // But we must return a classification result because some invoking
1394  // functions (chopper/permuter) do not anticipate a null blob choice.
1395  // So we need to assign a poor, but not infinitely bad score.
1396  if (Results->BlobLength == 0) {
1397  Certainty = -20;
1398  Rating = 100; // should be -certainty * real_blob_length
1399  } else {
1400  Rating = Certainty = (1.0f - result.rating);
1401  Rating *= rating_scale * Results->BlobLength;
1402  Certainty *= -(getDict().certainty_scale);
1403  }
1404  // Adapted results, by their very nature, should have good certainty.
1405  // Those that don't are at best misleading, and often lead to errors,
1406  // so don't accept adapted results that are too far behind the best result,
1407  // whether adapted or static.
1408  // TODO(rays) find some way of automatically tuning these constants.
1409  if (Certainty > best_certainty) {
1410  best_certainty = std::min(Certainty, static_cast<float>(classify_adapted_pruning_threshold));
1411  } else if (adapted && Certainty / classify_adapted_pruning_factor < best_certainty) {
1412  continue; // Don't accept bad adapted results.
1413  }
1414 
1415  float min_xheight, max_xheight, yshift;
1416  denorm.XHeightRange(result.unichar_id, unicharset, box, &min_xheight, &max_xheight, &yshift);
1417  auto *choice = new BLOB_CHOICE(
1418  result.unichar_id, Rating, Certainty, unicharset.get_script(result.unichar_id), min_xheight,
1419  max_xheight, yshift, adapted ? BCC_ADAPTED_CLASSIFIER : BCC_STATIC_CLASSIFIER);
1420  choice->set_fonts(result.fonts);
1421  temp_it.add_to_end(choice);
1422  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1423  choices_length++;
1424  if (choices_length >= max_matches) {
1425  break;
1426  }
1427  }
1428  Results->match.resize(choices_length);
1429 } // ConvertMatchesToChoices
#define MAX_MATCHES
Definition: adaptmatch.cpp:82
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:50
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
int MaxNumUnichars() const
Definition: shapetable.cpp:472

◆ ConvertProto()

void tesseract::Classify::ConvertProto ( PROTO_STRUCT Proto,
int  ProtoId,
INT_CLASS_STRUCT Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to

Definition at line 452 of file intproto.cpp.

452  {
453  assert(ProtoId < Class->NumProtos);
454 
455  INT_PROTO_STRUCT *P = ProtoForProtoId(Class, ProtoId);
456 
457  float Param = Proto->A * 128;
458  P->A = TruncateParam(Param, -128, 127);
459 
460  Param = -Proto->B * 256;
461  P->B = TruncateParam(Param, 0, 255);
462 
463  Param = Proto->C * 128;
464  P->C = TruncateParam(Param, -128, 127);
465 
466  Param = Proto->Angle * 256;
467  if (Param < 0 || Param >= 256) {
468  P->Angle = 0;
469  } else {
470  P->Angle = static_cast<uint8_t>(Param);
471  }
472 
473  /* round proto length to nearest integer number of pico-features */
474  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
475  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255);
476  if (classify_learning_debug_level >= 2) {
477  tprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", P->A, P->B, P->C,
478  Class->ProtoLengths[ProtoId]);
479  }
480 } /* ConvertProto */
#define ProtoForProtoId(C, P)
Definition: intproto.h:148
#define GetPicoFeatureLength()
Definition: picofeat.h:56

◆ CreateIntTemplates()

INT_TEMPLATES_STRUCT * tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none

Definition at line 490 of file intproto.cpp.

491  {
492  CLASS_TYPE FClass;
493  INT_CLASS_STRUCT *IClass;
494  int ProtoId;
495  int ConfigId;
496 
497  auto IntTemplates = new INT_TEMPLATES_STRUCT;
498 
499  for (unsigned ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
500  FClass = &(FloatProtos[ClassId]);
501  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
502  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
503  tprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
504  target_unicharset.id_to_unichar(ClassId));
505  }
506  assert(UnusedClassIdIn(IntTemplates, ClassId));
507  IClass = new INT_CLASS_STRUCT(FClass->NumProtos, FClass->NumConfigs);
508  FontSet fs{FClass->font_set.size()};
509  for (unsigned i = 0; i < fs.size(); ++i) {
510  fs[i] = FClass->font_set.at(i);
511  }
512  IClass->font_set_id = this->fontset_table_.push_back(fs);
513  AddIntClass(IntTemplates, ClassId, IClass);
514 
515  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
516  AddIntProto(IClass);
517  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
518  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
519  classify_learning_debug_level >= 2);
520  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
521  }
522 
523  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
524  AddIntConfig(IClass);
525  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
526  }
527  }
528  return (IntTemplates);
529 } /* CreateIntTemplates */
#define UnusedClassIdIn(T, c)
Definition: intproto.h:155
#define ProtoIn(Class, Pid)
Definition: protos.h:70
void AddIntClass(INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:220
void AddProtoToProtoPruner(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class, bool debug)
Definition: intproto.cpp:344
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:430
CLASS_STRUCT * CLASS_TYPE
Definition: protos.h:49
void AddProtoToClassPruner(PROTO_STRUCT *Proto, CLASS_ID ClassId, INT_TEMPLATES_STRUCT *Templates)
Definition: intproto.cpp:306
int AddIntConfig(INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:250
int AddIntProto(INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:270
int push_back(T object)
Add an element in the table.
Definition: unicity_table.h:73
void ConvertProto(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:452

◆ DebugAdaptiveClassifier()

void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Definition at line 1440 of file adaptmatch.cpp.

1440  {
1441  if (static_classifier_ == nullptr) {
1442  return;
1443  }
1444  INT_FX_RESULT_STRUCT fx_info;
1445  std::vector<INT_FEATURE_STRUCT> bl_features;
1446  TrainingSample *sample = BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1447  if (sample == nullptr) {
1448  return;
1449  }
1450  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), Results->best_unichar_id);
1451 } /* DebugAdaptiveClassifier */
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, std::vector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:79
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id)

◆ DisplayAdaptedChar()

void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 940 of file adaptmatch.cpp.

940  {
941  INT_FX_RESULT_STRUCT fx_info;
942  std::vector<INT_FEATURE_STRUCT> bl_features;
943  TrainingSample *sample =
944  BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info, &bl_features);
945  if (sample == nullptr) {
946  return;
947  }
948 
949  UnicharRating int_result;
950  im_.Match(int_class, AllProtosOn, AllConfigsOn, bl_features.size(), &bl_features[0], &int_result,
951  classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows);
952  tprintf("Best match to temp config %d = %4.1f%%.\n", int_result.config,
953  int_result.rating * 100.0);
954  if (classify_learning_debug_level >= 2) {
955  uint32_t ConfigMask;
956  ConfigMask = 1 << int_result.config;
958  im_.Match(int_class, AllProtosOn, static_cast<BIT_VECTOR>(&ConfigMask), bl_features.size(),
959  &bl_features[0], &int_result, classify_adapt_feature_threshold, 6 | 0x19,
960  matcher_debug_separate_windows);
962  }
963 
964  delete sample;
965 }
void UpdateMatchDisplay()
Definition: intproto.cpp:413

◆ DoAdaptiveMatch()

void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match

Definition at line 1474 of file adaptmatch.cpp.

1474  {
1475  UNICHAR_ID *Ambiguities;
1476 
1477  INT_FX_RESULT_STRUCT fx_info;
1478  std::vector<INT_FEATURE_STRUCT> bl_features;
1479  TrainingSample *sample =
1480  BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, &bl_features);
1481  if (sample == nullptr) {
1482  return;
1483  }
1484 
1485  // TODO: With LSTM, static_classifier_ is nullptr.
1486  // Return to avoid crash in CharNormClassifier.
1487  if (static_classifier_ == nullptr) {
1488  delete sample;
1489  return;
1490  }
1491 
1492  if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || tess_cn_matching) {
1493  CharNormClassifier(Blob, *sample, Results);
1494  } else {
1495  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, AdaptedTemplates, Results);
1496  if ((!Results->match.empty() &&
1497  MarginalMatch(Results->best_rating, matcher_reliable_adaptive_result) &&
1498  !tess_bn_matching) ||
1499  Results->match.empty()) {
1500  CharNormClassifier(Blob, *sample, Results);
1501  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1502  AmbigClassifier(bl_features, fx_info, Blob, PreTrainedTemplates, AdaptedTemplates->Class,
1503  Ambiguities, Results);
1504  }
1505  }
1506 
1507  // Force the blob to be classified as noise
1508  // if the results contain only fragments.
1509  // TODO(daria): verify that this is better than
1510  // just adding a nullptr classification.
1511  if (!Results->HasNonfragment || Results->match.empty()) {
1512  ClassifyAsNoise(Results);
1513  }
1514  delete sample;
1515 } /* DoAdaptiveMatch */
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:142
int UNICHAR_ID
Definition: unichar.h:36
ADAPT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: adaptive.h:75
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_RESULTS *Results)
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
void AmbigClassifier(const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES_STRUCT *templates, ADAPT_CLASS_STRUCT **classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)

◆ double_VAR_H() [1/19]

tesseract::Classify::double_VAR_H ( certainty_scale  )

◆ double_VAR_H() [2/19]

tesseract::Classify::double_VAR_H ( classify_adapted_pruning_factor  )

◆ double_VAR_H() [3/19]

tesseract::Classify::double_VAR_H ( classify_adapted_pruning_threshold  )

◆ double_VAR_H() [4/19]

tesseract::Classify::double_VAR_H ( classify_char_norm_range  )

◆ double_VAR_H() [5/19]

tesseract::Classify::double_VAR_H ( classify_character_fragments_garbage_certainty_threshold  )

◆ double_VAR_H() [6/19]

tesseract::Classify::double_VAR_H ( classify_max_certainty_margin  )

◆ double_VAR_H() [7/19]

tesseract::Classify::double_VAR_H ( classify_max_rating_ratio  )

◆ double_VAR_H() [8/19]

tesseract::Classify::double_VAR_H ( classify_misfit_junk_penalty  )

◆ double_VAR_H() [9/19]

tesseract::Classify::double_VAR_H ( matcher_avg_noise_size  )

◆ double_VAR_H() [10/19]

tesseract::Classify::double_VAR_H ( matcher_bad_match_pad  )

◆ double_VAR_H() [11/19]

tesseract::Classify::double_VAR_H ( matcher_clustering_max_angle_delta  )

◆ double_VAR_H() [12/19]

tesseract::Classify::double_VAR_H ( matcher_good_threshold  )

◆ double_VAR_H() [13/19]

tesseract::Classify::double_VAR_H ( matcher_perfect_threshold  )

◆ double_VAR_H() [14/19]

tesseract::Classify::double_VAR_H ( matcher_rating_margin  )

◆ double_VAR_H() [15/19]

tesseract::Classify::double_VAR_H ( matcher_reliable_adaptive_result  )

◆ double_VAR_H() [16/19]

tesseract::Classify::double_VAR_H ( rating_scale  )

◆ double_VAR_H() [17/19]

tesseract::Classify::double_VAR_H ( speckle_large_max_size  )

◆ double_VAR_H() [18/19]

tesseract::Classify::double_VAR_H ( speckle_rating_penalty  )

◆ double_VAR_H() [19/19]

tesseract::Classify::double_VAR_H ( tessedit_class_miss_scale  )

◆ EndAdaptiveClassifier()

void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Definition at line 464 of file adaptmatch.cpp.

464  {
465  std::string Filename;
466  FILE *File;
467 
469  classify_save_adapted_templates) {
470  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
471  File = fopen(Filename.c_str(), "wb");
472  if (File == nullptr) {
473  tprintf("Unable to save adapted templates to %s!\n", Filename.c_str());
474  } else {
475  tprintf("\nSaving adapted templates to %s ...", Filename.c_str());
476  fflush(stdout);
478  tprintf("\n");
479  fclose(File);
480  }
481  }
482 
483  delete AdaptedTemplates;
484  AdaptedTemplates = nullptr;
485  delete BackupAdaptedTemplates;
486  BackupAdaptedTemplates = nullptr;
487 
488  if (PreTrainedTemplates != nullptr) {
489  delete PreTrainedTemplates;
490  PreTrainedTemplates = nullptr;
491  }
493  FreeNormProtos();
494  if (AllProtosOn != nullptr) {
495  FreeBitVector(AllProtosOn);
496  FreeBitVector(AllConfigsOn);
497  FreeBitVector(AllConfigsOff);
498  FreeBitVector(TempProtoMask);
499  AllProtosOn = nullptr;
500  AllConfigsOn = nullptr;
501  AllConfigsOff = nullptr;
502  TempProtoMask = nullptr;
503  }
504  delete shape_table_;
505  shape_table_ = nullptr;
506  delete static_classifier_;
507  static_classifier_ = nullptr;
508 } /* EndAdaptiveClassifier */
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:80
std::string imagefile
Definition: ccutil.h:65
BIT_VECTOR TempProtoMask
Definition: classify.h:431
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:345
BIT_VECTOR AllConfigsOff
Definition: classify.h:430
ADAPT_TEMPLATES_STRUCT * BackupAdaptedTemplates
Definition: classify.h:425
void EndDangerousAmbigs()
Definition: stopper.cpp:358

◆ ExpandShapesAndApplyCorrections()

void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS_STRUCT **  classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1102 of file adaptmatch.cpp.

1106  {
1107  if (classes != nullptr) {
1108  // Adapted result. Convert configs to fontinfo_ids.
1109  int_result->adapted = true;
1110  for (auto &font : int_result->fonts) {
1111  font.fontinfo_id = GetFontinfoId(classes[class_id], font.fontinfo_id);
1112  }
1113  } else {
1114  // Pre-trained result. Map fonts using font_sets_.
1115  int_result->adapted = false;
1116  for (auto &font : int_result->fonts) {
1117  font.fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, font.fontinfo_id);
1118  }
1119  if (shape_table_ != nullptr) {
1120  // Two possible cases:
1121  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1122  // int_result->fonts are the same. In this case build a new vector of
1123  // mapped fonts and replace the fonts in int_result.
1124  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1125  // by int_result. In this case, build a vector of UnicharRating to
1126  // gather together different font-ids for each unichar. Also covers case1.
1127  std::vector<UnicharRating> mapped_results;
1128  for (auto &f : int_result->fonts) {
1129  int shape_id = f.fontinfo_id;
1130  const Shape &shape = shape_table_->GetShape(shape_id);
1131  for (int c = 0; c < shape.size(); ++c) {
1132  int unichar_id = shape[c].unichar_id;
1133  if (!unicharset.get_enabled(unichar_id)) {
1134  continue;
1135  }
1136  // Find the mapped_result for unichar_id.
1137  unsigned r = 0;
1138  for (r = 0; r < mapped_results.size() && mapped_results[r].unichar_id != unichar_id;
1139  ++r) {
1140  }
1141  if (r == mapped_results.size()) {
1142  mapped_results.push_back(*int_result);
1143  mapped_results[r].unichar_id = unichar_id;
1144  mapped_results[r].fonts.clear();
1145  }
1146  for (int font_id : shape[c].font_ids) {
1147  mapped_results[r].fonts.emplace_back(font_id, f.score);
1148  }
1149  }
1150  }
1151  for (auto &m : mapped_results) {
1152  m.rating = ComputeCorrectedRating(
1153  debug, m.unichar_id, cp_rating, int_result->rating,
1154  int_result->feature_misses, bottom, top, blob_length, matcher_multiplier, cn_factors);
1155  AddNewResult(m, final_results);
1156  }
1157  return;
1158  }
1159  }
1160  if (unicharset.get_enabled(class_id)) {
1161  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating, int_result->rating,
1162  int_result->feature_misses, bottom, top,
1163  blob_length, matcher_multiplier, cn_factors);
1164  AddNewResult(*int_result, final_results);
1165  }
1166 }
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:912
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)

◆ ExtractFeatures()

void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
std::vector< INT_FEATURE_STRUCT > *  bl_features,
std::vector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
std::vector< int > *  outline_cn_counts 
)
static

Definition at line 436 of file intfx.cpp.

440  {
441  DENORM bl_denorm, cn_denorm;
442  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, &bl_denorm, &cn_denorm, results);
443  if (outline_cn_counts != nullptr) {
444  outline_cn_counts->clear();
445  }
446  // Iterate the outlines.
447  for (TESSLINE *ol = blob.outlines; ol != nullptr; ol = ol->next) {
448  // Iterate the polygon.
449  EDGEPT *loop_pt = ol->FindBestStartPt();
450  EDGEPT *pt = loop_pt;
451  if (pt == nullptr) {
452  continue;
453  }
454  do {
455  if (pt->IsHidden()) {
456  continue;
457  }
458  // Find a run of equal src_outline.
459  EDGEPT *last_pt = pt;
460  do {
461  last_pt = last_pt->next;
462  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
463  last_pt->src_outline == pt->src_outline);
464  last_pt = last_pt->prev;
465  // Until the adaptive classifier can be weaned off polygon segments,
466  // we have to force extraction from the polygon for the bl_features.
467  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, true, bl_features);
468  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, false, cn_features);
469  pt = last_pt;
470  } while ((pt = pt->next) != loop_pt);
471  if (outline_cn_counts != nullptr) {
472  outline_cn_counts->push_back(cn_features->size());
473  }
474  }
475  results->NumBL = bl_features->size();
476  results->NumCN = cn_features->size();
477  results->YBottom = blob.bounding_box().bottom();
478  results->YTop = blob.bounding_box().top();
479  results->Width = blob.bounding_box().width();
480 }
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:129

◆ ExtractIntCNFeatures()

FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.

Definition at line 204 of file picofeat.cpp.

204  {
205  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
206  std::vector<INT_FEATURE_STRUCT> bl_features;
207  tesseract::TrainingSample *sample =
208  tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
209  if (sample == nullptr) {
210  return nullptr;
211  }
212 
213  uint32_t num_features = sample->num_features();
214  const INT_FEATURE_STRUCT *features = sample->features();
215  auto feature_set = new FEATURE_SET_STRUCT(num_features);
216  for (uint32_t f = 0; f < num_features; ++f) {
217  auto feature = new FEATURE_STRUCT(&IntFeatDesc);
218  feature->Params[IntX] = features[f].X;
219  feature->Params[IntY] = features[f].Y;
220  feature->Params[IntDir] = features[f].Theta;
221  AddFeature(feature_set, feature);
222  }
223  delete sample;
224 
225  return feature_set;
226 } /* ExtractIntCNFeatures */
const FEATURE_DESC_STRUCT IntFeatDesc
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:39
@ IntDir
Definition: picofeat.h:31
uint32_t num_features() const
const INT_FEATURE_STRUCT * features() const

◆ ExtractIntGeoFeatures()

FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.

Definition at line 234 of file picofeat.cpp.

235  {
236  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
237  std::vector<INT_FEATURE_STRUCT> bl_features;
238  tesseract::TrainingSample *sample =
239  tesseract::BlobToTrainingSample(blob, false, &local_fx_info, &bl_features);
240  if (sample == nullptr) {
241  return nullptr;
242  }
243 
244  auto feature_set = new FEATURE_SET_STRUCT(1);
245  auto feature = new FEATURE_STRUCT(&IntFeatDesc);
246 
247  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
248  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
249  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
250  AddFeature(feature_set, feature);
251  delete sample;
252 
253  return feature_set;
254 } /* ExtractIntGeoFeatures */
@ GeoWidth
Definition: picofeat.h:38
int geo_feature(int index) const

◆ ExtractOutlineFeatures()

FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none

Definition at line 40 of file outfeat.cpp.

40  {
41  auto FeatureSet = new FEATURE_SET_STRUCT(MAX_OUTLINE_FEATURES);
42  if (Blob == nullptr) {
43  return (FeatureSet);
44  }
45 
46  auto Outlines = ConvertBlob(Blob);
47 
48  float XScale, YScale;
49  NormalizeOutlines(Outlines, &XScale, &YScale);
50  auto RemainingOutlines = Outlines;
51  iterate(RemainingOutlines) {
52  auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
53  ConvertToOutlineFeatures(Outline, FeatureSet);
54  }
55  if (classify_norm_method == baseline) {
56  NormalizeOutlineX(FeatureSet);
57  }
58  FreeOutlines(Outlines);
59  return (FeatureSet);
60 } /* ExtractOutlineFeatures */
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:34
LIST MFOUTLINE
Definition: mfoutline.h:28
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:151
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:134
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:97
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:34
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:249

◆ ExtractPicoFeatures()

FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.

Definition at line 60 of file picofeat.cpp.

60  {
61  auto FeatureSet = new FEATURE_SET_STRUCT(MAX_PICO_FEATURES);
62  auto Outlines = ConvertBlob(Blob);
63  float XScale, YScale;
64  NormalizeOutlines(Outlines, &XScale, &YScale);
65  auto RemainingOutlines = Outlines;
66  iterate(RemainingOutlines) {
67  auto Outline = static_cast<MFOUTLINE>(RemainingOutlines->first_node());
68  ConvertToPicoFeatures2(Outline, FeatureSet);
69  }
70  if (classify_norm_method == baseline) {
71  NormalizePicoX(FeatureSet);
72  }
73  FreeOutlines(Outlines);
74  return (FeatureSet);
75 
76 } /* ExtractPicoFeatures */
#define MAX_PICO_FEATURES
Definition: picofeat.h:45
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:144
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:181

◆ FreeNormProtos()

void tesseract::Classify::FreeNormProtos ( )

Definition at line 154 of file normmatch.cpp.

154  {
155  if (NormProtos != nullptr) {
156  for (int i = 0; i < NormProtos->NumProtos; i++) {
158  }
159  delete[] NormProtos->ParamDesc;
160  delete NormProtos;
161  NormProtos = nullptr;
162  }
163 }
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:1598
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:40

◆ get_fontinfo_table() [1/2]

UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 324 of file classify.h.

324  {
325  return fontinfo_table_;
326  }

◆ get_fontinfo_table() [2/2]

const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 327 of file classify.h.

327  {
328  return fontinfo_table_;
329  }

◆ get_fontset_table()

UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 330 of file classify.h.

330  {
331  return fontset_table_;
332  }

◆ GetAdaptiveFeatures()

int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)

Definition at line 778 of file adaptmatch.cpp.

779  {
780  FEATURE_SET Features;
781  int NumFeatures;
782 
783  classify_norm_method.set_value(baseline);
784  Features = ExtractPicoFeatures(Blob);
785 
786  NumFeatures = Features->NumFeatures;
787  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
788  delete Features;
789  return 0;
790  }
791 
792  ComputeIntFeatures(Features, IntFeatures);
793  *FloatFeatures = Features;
794 
795  return NumFeatures;
796 } /* GetAdaptiveFeatures */
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:83
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:60
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:85

◆ GetAmbiguities()

UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.

Definition at line 1532 of file adaptmatch.cpp.

1532  {
1533  auto *Results = new ADAPT_RESULTS();
1534  UNICHAR_ID *Ambiguities;
1535 
1536  Results->Initialize();
1537  INT_FX_RESULT_STRUCT fx_info;
1538  std::vector<INT_FEATURE_STRUCT> bl_features;
1539  TrainingSample *sample =
1540  BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, &bl_features);
1541  if (sample == nullptr) {
1542  delete Results;
1543  return nullptr;
1544  }
1545 
1546  CharNormClassifier(Blob, *sample, Results);
1547  delete sample;
1548  RemoveBadMatches(Results);
1549  std::sort(Results->match.begin(), Results->match.end(), SortDescendingRating);
1550 
1551  /* copy the class id's into an string of ambiguities - don't copy if
1552  the correct class is the only class id matched */
1553  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1554  if (Results->match.size() > 1 ||
1555  (Results->match.size() == 1 && Results->match[0].unichar_id != CorrectClass)) {
1556  unsigned i;
1557  for (i = 0; i < Results->match.size(); i++) {
1558  Ambiguities[i] = Results->match[i].unichar_id;
1559  }
1560  Ambiguities[i] = -1;
1561  } else {
1562  Ambiguities[0] = -1;
1563  }
1564 
1565  delete Results;
1566  return Ambiguities;
1567 } /* GetAmbiguities */

◆ GetCharNormFeature()

int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES_STRUCT templates,
uint8_t *  pruner_norm_array,
uint8_t *  char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occurred.

Definition at line 1613 of file adaptmatch.cpp.

1614  {
1615  auto norm_feature = new FEATURE_STRUCT(&CharNormDesc);
1616  float baseline = kBlnBaselineOffset;
1617  float scale = MF_SCALE_FACTOR;
1618  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1619  norm_feature->Params[CharNormLength] = fx_info.Length * scale / LENGTH_COMPRESSION;
1620  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1621  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1622  // Deletes norm_feature.
1623  ComputeCharNormArrays(norm_feature, templates, char_norm_array, pruner_norm_array);
1624  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1625 } /* GetCharNormFeature */
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const float MF_SCALE_FACTOR
Definition: mfoutline.h:61
const FEATURE_DESC_STRUCT CharNormDesc
const int kBlnBaselineOffset
Definition: normalis.h:34

◆ GetClassToDebug()

CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none

Definition at line 1163 of file intproto.cpp.

1164  {
1165  tprintf("%s\n", Prompt);
1166  SVEvent *ev;
1167  SVEventType ev_type;
1168  int unichar_id = INVALID_UNICHAR_ID;
1169  // Wait until a click or popup event.
1170  do {
1171  ev = IntMatchWindow->AwaitEvent(SVET_ANY);
1172  ev_type = ev->type;
1173  if (ev_type == SVET_POPUP) {
1174  if (ev->command_id == IDA_SHAPE_INDEX) {
1175  if (shape_table_ != nullptr) {
1176  *shape_id = atoi(ev->parameter);
1177  *adaptive_on = false;
1178  *pretrained_on = true;
1179  if (*shape_id >= 0 && static_cast<unsigned>(*shape_id) < shape_table_->NumShapes()) {
1180  int font_id;
1181  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, &font_id);
1182  tprintf("Shape %d, first unichar=%d, font=%d\n", *shape_id, unichar_id, font_id);
1183  return unichar_id;
1184  }
1185  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1186  } else {
1187  tprintf("No shape table loaded!\n");
1188  }
1189  } else {
1190  if (unicharset.contains_unichar(ev->parameter)) {
1191  unichar_id = unicharset.unichar_to_id(ev->parameter);
1192  if (ev->command_id == IDA_ADAPTIVE) {
1193  *adaptive_on = true;
1194  *pretrained_on = false;
1195  *shape_id = -1;
1196  } else if (ev->command_id == IDA_STATIC) {
1197  *adaptive_on = false;
1198  *pretrained_on = true;
1199  } else {
1200  *adaptive_on = true;
1201  *pretrained_on = true;
1202  }
1203  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == nullptr) {
1204  *shape_id = -1;
1205  return unichar_id;
1206  }
1207  for (unsigned s = 0; s < shape_table_->NumShapes(); ++s) {
1208  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1209  tprintf("%s\n", shape_table_->DebugStr(s).c_str());
1210  }
1211  }
1212  } else {
1213  tprintf("Char class '%s' not found in unicharset", ev->parameter);
1214  }
1215  }
1216  }
1217  delete ev;
1218  } while (ev_type != SVET_CLICK);
1219  return 0;
1220 } /* GetClassToDebug */
@ SVET_POPUP
Definition: scrollview.h:61
@ SVET_CLICK
Definition: scrollview.h:55
@ IDA_SHAPE_INDEX
Definition: intproto.h:139
@ IDA_ADAPTIVE
Definition: intproto.h:139
@ IDA_STATIC
Definition: intproto.h:139
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:695
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:186
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:150
unsigned NumShapes() const
Definition: shapetable.h:248
void GetFirstUnicharAndFont(unsigned shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:420
SVEventType type
Definition: scrollview.h:73
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:445

◆ getDict()

virtual Dict& tesseract::Classify::getDict ( )
inlinevirtual

Reimplemented in tesseract::Tesseract.

Definition at line 98 of file classify.h.

98  {
99  return dict_;
100  }

◆ GetFontinfoId()

int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS_STRUCT Class,
uint8_t  ConfigId 
)

Definition at line 118 of file adaptive.cpp.

118  {
119  return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
120  : TempConfigFor(Class, ConfigId)->FontinfoId);
121 }
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:93

◆ InitAdaptedClass()

void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS_STRUCT Class,
ADAPT_TEMPLATES_STRUCT Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Definition at line 686 of file adaptmatch.cpp.

687  {
688  FEATURE_SET Features;
689  int Fid, Pid;
690  FEATURE Feature;
691  int NumFeatures;
692  PROTO_STRUCT *Proto;
693  INT_CLASS_STRUCT *IClass;
694  TEMP_CONFIG_STRUCT *Config;
695 
696  classify_norm_method.set_value(baseline);
697  Features = ExtractOutlineFeatures(Blob);
698  NumFeatures = Features->NumFeatures;
699  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
700  delete Features;
701  return;
702  }
703 
704  Config = new TEMP_CONFIG_STRUCT(NumFeatures - 1, FontinfoId);
705  TempConfigFor(Class, 0) = Config;
706 
707  /* this is a kludge to construct cutoffs for adapted templates */
708  if (Templates == AdaptedTemplates) {
709  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
710  }
711 
712  IClass = ClassForClassId(Templates->Templates, ClassId);
713 
714  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
715  Pid = AddIntProto(IClass);
716  assert(Pid != NO_PROTO);
717 
718  Feature = Features->Features[Fid];
719  auto TempProto = new TEMP_PROTO_STRUCT;
720  Proto = &(TempProto->Proto);
721 
722  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
723  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
724  instead of the -0.25 to 0.75 used in baseline normalization */
725  Proto->Angle = Feature->Params[OutlineFeatDir];
726  Proto->X = Feature->Params[OutlineFeatX];
727  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
728  Proto->Length = Feature->Params[OutlineFeatLength];
729  FillABC(Proto);
730 
731  TempProto->ProtoId = Pid;
732  SET_BIT(Config->Protos, Pid);
733 
734  ConvertProto(Proto, Pid, IClass);
735  AddProtoToProtoPruner(Proto, Pid, IClass, classify_learning_debug_level >= 2);
736 
737  Class->TempProtos = push(Class->TempProtos, TempProto);
738  }
739  delete Features;
740 
741  AddIntConfig(IClass);
742  ConvertConfig(AllProtosOn, 0, IClass);
743 
744  if (classify_learning_debug_level >= 1) {
745  tprintf("Added new class '%s' with class id %d and %d protos.\n",
746  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
747 #ifndef GRAPHICS_DISABLED
748  if (classify_learning_debug_level > 1) {
749  DisplayAdaptedChar(Blob, IClass);
750  }
751 #endif
752  }
753 
754  if (IsEmptyAdaptedClass(Class)) {
755  (Templates->NumNonEmptyClasses)++;
756  }
757 } /* InitAdaptedClass */
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:89
#define NO_PROTO
Definition: matchdefs.h:41
@ OutlineFeatLength
Definition: outfeat.h:30
@ OutlineFeatY
Definition: outfeat.h:29
@ OutlineFeatX
Definition: outfeat.h:28
@ OutlineFeatDir
Definition: outfeat.h:31
CLUSTERCONFIG Config
LIST push(LIST list, void *element)
Definition: oldlist.cpp:178
void FillABC(PROTO_STRUCT *Proto)
Definition: protos.cpp:103
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:40

◆ InitAdaptiveClassifier()

void tesseract::Classify::InitAdaptiveClassifier ( TessdataManager mgr)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  }
531  if (AllProtosOn != nullptr) {
532  EndAdaptiveClassifier(); // Don't leak with multiple inits.
533  }
534 
535  // If there is no language_data_path_prefix, the classifier will be
536  // adaptive only.
537  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
538  TFile fp;
539  ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
541 
542  if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
543  shape_table_ = new ShapeTable(unicharset);
544  if (!shape_table_->DeSerialize(&fp)) {
545  tprintf("Error loading shape table!\n");
546  delete shape_table_;
547  shape_table_ = nullptr;
548  }
549  }
550 
551  ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
552  ReadNewCutoffs(&fp, CharNormCutoffs);
553 
554  ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
555  NormProtos = ReadNormProtos(&fp);
556  static_classifier_ = new TessClassifier(false, this);
557  }
558 
559  InitIntegerFX();
560 
561  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
562  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
563  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
564  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
565  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
566  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
567  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
568 
569  for (uint16_t &BaselineCutoff : BaselineCutoffs) {
570  BaselineCutoff = 0;
571  }
572 
573  if (classify_use_pre_adapted_templates) {
574  TFile fp;
575  std::string Filename = imagefile;
576  Filename += ADAPT_TEMPLATE_SUFFIX;
577  if (!fp.Open(Filename.c_str(), nullptr)) {
578  AdaptedTemplates = new ADAPT_TEMPLATES_STRUCT(unicharset);
579  } else {
580  tprintf("\nReading pre-adapted templates from %s ...\n", Filename.c_str());
581  fflush(stdout);
583  tprintf("\n");
585 
586  for (unsigned i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
587  BaselineCutoffs[i] = CharNormCutoffs[i];
588  }
589  }
590  } else {
591  delete AdaptedTemplates;
592  AdaptedTemplates = new ADAPT_TEMPLATES_STRUCT(unicharset);
593  }
594 } /* InitAdaptiveClassifier */
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
@ TESSDATA_SHAPE_TABLE
void InitIntegerFX()
Definition: intfx.cpp:54
std::string language_data_path_prefix
Definition: ccutil.h:60
INT_TEMPLATES_STRUCT * Templates
Definition: adaptive.h:72
INT_TEMPLATES_STRUCT * ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:627
ADAPT_TEMPLATES_STRUCT * ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:235
NORM_PROTOS * ReadNormProtos(TFile *fp)
Definition: normmatch.cpp:173
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
Definition: cutoffs.cpp:41
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:153
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:255

◆ INT_VAR_H() [1/14]

tesseract::Classify::INT_VAR_H ( classify_adapt_feature_threshold  )

◆ INT_VAR_H() [2/14]

tesseract::Classify::INT_VAR_H ( classify_adapt_proto_threshold  )

◆ INT_VAR_H() [3/14]

tesseract::Classify::INT_VAR_H ( classify_class_pruner_multiplier  )

◆ INT_VAR_H() [4/14]

tesseract::Classify::INT_VAR_H ( classify_class_pruner_threshold  )

◆ INT_VAR_H() [5/14]

tesseract::Classify::INT_VAR_H ( classify_cp_cutoff_strength  )

◆ INT_VAR_H() [6/14]

tesseract::Classify::INT_VAR_H ( classify_debug_level  )

◆ INT_VAR_H() [7/14]

tesseract::Classify::INT_VAR_H ( classify_integer_matcher_multiplier  )

◆ INT_VAR_H() [8/14]

tesseract::Classify::INT_VAR_H ( classify_learning_debug_level  )

◆ INT_VAR_H() [9/14]

tesseract::Classify::INT_VAR_H ( classify_norm_method  )

◆ INT_VAR_H() [10/14]

tesseract::Classify::INT_VAR_H ( matcher_debug_flags  )

◆ INT_VAR_H() [11/14]

tesseract::Classify::INT_VAR_H ( matcher_debug_level  )

◆ INT_VAR_H() [12/14]

tesseract::Classify::INT_VAR_H ( matcher_min_examples_for_prototyping  )

◆ INT_VAR_H() [13/14]

tesseract::Classify::INT_VAR_H ( matcher_permanent_classes_min  )

◆ INT_VAR_H() [14/14]

tesseract::Classify::INT_VAR_H ( matcher_sufficient_examples_for_prototyping  )

◆ LargeSpeckle()

bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 191 of file classify.cpp.

191  {
192  double speckle_size = kBlnXHeight * speckle_large_max_size;
193  TBOX bbox = blob.bounding_box();
194  return bbox.width() < speckle_size && bbox.height() < speckle_size;
195 }
const int kBlnXHeight
Definition: normalis.h:33

◆ LearnBlob()

void tesseract::Classify::LearnBlob ( const std::string &  fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 35 of file blobclass.cpp.

36  {
37  std::unique_ptr<CHAR_DESC_STRUCT> CharDesc(new CHAR_DESC_STRUCT(feature_defs_));
38  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
39  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
40  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
41  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
42 
43  if (ValidCharDescription(feature_defs_, CharDesc.get())) {
44  // Label the features with a class name and font name.
45  tr_file_data_ += "\n";
46  tr_file_data_ += fontname;
47  tr_file_data_ += " ";
48  tr_file_data_ += blob_text;
49  tr_file_data_ += "\n";
50 
51  // write micro-features to file and clean up
52  WriteCharDescription(feature_defs_, CharDesc.get(), tr_file_data_);
53  } else {
54  tprintf("Blob learned was invalid!\n");
55  }
56 } // LearnBlob
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc)
Definition: featdefs.cpp:131
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str)
Definition: featdefs.cpp:109
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:41
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:56
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:204
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:234

◆ LearnPieces()

void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 385 of file adaptmatch.cpp.

387  {
388  // TODO(daria) Remove/modify this if/when we want
389  // to train and/or adapt to n-grams.
390  if (segmentation != CST_WHOLE && (segmentation != CST_FRAGMENT || disable_character_fragments)) {
391  return;
392  }
393 
394  if (length > 1) {
395  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start, start + length - 1);
396  }
397  TBLOB *blob = word->chopped_word->blobs[start];
398  // Rotate the blob if needed for classification.
399  TBLOB *rotated_blob = blob->ClassifyNormalizeIfNeeded();
400  if (rotated_blob == nullptr) {
401  rotated_blob = blob;
402  }
403 
404 #ifndef GRAPHICS_DISABLED
405  // Draw debug windows showing the blob that is being learned if needed.
406  if (strcmp(classify_learn_debug_str.c_str(), correct_text) == 0) {
407  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600, word->chopped_word->bounding_box());
408  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
409  learn_debug_win_->Update();
410  learn_debug_win_->Wait();
411  }
412  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
413  ASSERT_HOST(learn_fragments_debug_win_ != nullptr); // set up in LearnWord
414  blob->plot(learn_fragments_debug_win_, ScrollView::BLUE, ScrollView::BROWN);
415  learn_fragments_debug_win_->Update();
416  }
417 #endif // !GRAPHICS_DISABLED
418 
419  if (fontname != nullptr) {
420  classify_norm_method.set_value(character); // force char norm spc 30/11/93
421  tess_bn_matching.set_value(false); // turn it off
422  tess_cn_matching.set_value(false);
423  DENORM bl_denorm, cn_denorm;
424  INT_FX_RESULT_STRUCT fx_info;
425  SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, &bl_denorm, &cn_denorm, &fx_info);
426  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
427  } else if (unicharset.contains_unichar(correct_text)) {
428  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
429  int font_id = word->fontinfo != nullptr ? fontinfo_table_.get_index(*word->fontinfo) : 0;
430  if (classify_learning_debug_level >= 1) {
431  tprintf("Adapting to char = %s, thr= %g font_id= %d\n", unicharset.id_to_unichar(class_id),
432  threshold, font_id);
433  }
434  // If filename is not nullptr we are doing recognition
435  // (as opposed to training), so we must have already set word fonts.
436  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
437  if (BackupAdaptedTemplates != nullptr) {
438  // Adapt the backup templates too. They will be used if the primary gets
439  // too full.
440  AdaptToChar(rotated_blob, class_id, font_id, threshold, BackupAdaptedTemplates);
441  }
442  } else if (classify_debug_level >= 1) {
443  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
444  }
445  if (rotated_blob != blob) {
446  delete rotated_blob;
447  }
448 
449  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start, start + length - 1);
450 } // LearnPieces.
@ CST_WHOLE
Definition: classify.h:89
@ CST_FRAGMENT
Definition: classify.h:88
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:204
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:181
void LearnBlob(const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:35
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:240
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES_STRUCT *adaptive_templates)
Definition: adaptmatch.cpp:843
static void Update()
Definition: scrollview.cpp:713

◆ LearnWord()

void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 262 of file adaptmatch.cpp.

262  {
263  int word_len = word->correct_text.size();
264  if (word_len == 0) {
265  return;
266  }
267 
268  float *thresholds = nullptr;
269  if (fontname == nullptr) {
270  // Adaption mode.
271  if (!EnableLearning || word->best_choice == nullptr) {
272  return; // Can't or won't adapt.
273  }
274 
275  if (classify_learning_debug_level >= 1) {
276  tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().c_str());
277  }
278  thresholds = new float[word_len];
279  word->ComputeAdaptionThresholds(certainty_scale, matcher_perfect_threshold,
280  matcher_good_threshold, matcher_rating_margin, thresholds);
281  }
282  int start_blob = 0;
283 
284 #ifndef GRAPHICS_DISABLED
285  if (classify_debug_character_fragments) {
286  if (learn_fragmented_word_debug_win_ != nullptr) {
287  learn_fragmented_word_debug_win_->Wait();
288  }
289  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
290  word->chopped_word->bounding_box());
291  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
292  word->chopped_word->bounding_box());
293  word->chopped_word->plot(learn_fragmented_word_debug_win_);
295  }
296 #endif // !GRAPHICS_DISABLED
297 
298  for (int ch = 0; ch < word_len; ++ch) {
299  if (classify_debug_character_fragments) {
300  tprintf("\nLearning %s\n", word->correct_text[ch].c_str());
301  }
302  if (word->correct_text[ch].length() > 0) {
303  float threshold = thresholds != nullptr ? thresholds[ch] : 0.0f;
304 
305  LearnPieces(fontname, start_blob, word->best_state[ch], threshold, CST_WHOLE,
306  word->correct_text[ch].c_str(), word);
307 
308  if (word->best_state[ch] > 1 && !disable_character_fragments) {
309  // Check that the character breaks into meaningful fragments
310  // that each match a whole character with at least
311  // classify_character_fragments_garbage_certainty_threshold
312  bool garbage = false;
313  int frag;
314  for (frag = 0; frag < word->best_state[ch]; ++frag) {
315  TBLOB *frag_blob = word->chopped_word->blobs[start_blob + frag];
316  if (classify_character_fragments_garbage_certainty_threshold < 0) {
317  garbage |= LooksLikeGarbage(frag_blob);
318  }
319  }
320  // Learn the fragments.
321  if (!garbage) {
322  bool pieces_all_natural = word->PiecesAllNatural(start_blob, word->best_state[ch]);
323  if (pieces_all_natural || !prioritize_division) {
324  for (frag = 0; frag < word->best_state[ch]; ++frag) {
325  std::vector<std::string> tokens = split(word->correct_text[ch], ' ');
326 
327  tokens[0] = CHAR_FRAGMENT::to_string(tokens[0].c_str(), frag, word->best_state[ch],
328  pieces_all_natural);
329 
330  std::string full_string;
331  for (unsigned i = 0; i < tokens.size(); i++) {
332  full_string += tokens[i];
333  if (i != tokens.size() - 1) {
334  full_string += ' ';
335  }
336  }
337  LearnPieces(fontname, start_blob + frag, 1, threshold, CST_FRAGMENT,
338  full_string.c_str(), word);
339  }
340  }
341  }
342  }
343 
344  // TODO(rays): re-enable this part of the code when we switch to the
345  // new classifier that needs to see examples of garbage.
346  /*
347 if (word->best_state[ch] > 1) {
348  // If the next blob is good, make junk with the rightmost fragment.
349  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
350  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
351  word->best_state[ch + 1] + 1,
352  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
353  }
354  // If the previous blob is good, make junk with the leftmost fragment.
355  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
356  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
357  word->best_state[ch - 1] + 1,
358  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
359  }
360 }
361 // If the next blob is good, make a join with it.
362 if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
363  std::string joined_text = word->correct_text[ch];
364  joined_text += word->correct_text[ch + 1];
365  LearnPieces(fontname, start_blob,
366  word->best_state[ch] + word->best_state[ch + 1],
367  threshold, CST_NGRAM, joined_text.c_str(), word);
368 }
369 */
370  }
371  start_blob += word->best_state[ch];
372  }
373  delete[] thresholds;
374 } // LearnWord.
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:41
std::string to_string() const
Definition: unicharset.h:91
bool LooksLikeGarbage(TBLOB *blob)
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:385

◆ LooksLikeGarbage()

bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1571 of file adaptmatch.cpp.

1571  {
1572  auto *ratings = new BLOB_CHOICE_LIST();
1573  AdaptiveClassifier(blob, ratings);
1574  BLOB_CHOICE_IT ratings_it(ratings);
1575  const UNICHARSET &unicharset = getDict().getUnicharset();
1576  if (classify_debug_character_fragments) {
1577  print_ratings_list("======================\nLooksLikeGarbage() got ", ratings, unicharset);
1578  }
1579  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list(); ratings_it.forward()) {
1580  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != nullptr) {
1581  continue;
1582  }
1583  float certainty = ratings_it.data()->certainty();
1584  delete ratings;
1585  return certainty < classify_character_fragments_garbage_certainty_threshold;
1586  }
1587  delete ratings;
1588  return true; // no whole characters in ratings
1589 }
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:202
const UNICHARSET & getUnicharset() const
Definition: dict.h:104

◆ MakeNewTemporaryConfig()

int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES_STRUCT Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.

Definition at line 1669 of file adaptmatch.cpp.

1671  {
1672  INT_CLASS_STRUCT *IClass;
1673  ADAPT_CLASS_STRUCT *Class;
1674  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1675  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1676  int NumOldProtos;
1677  int NumBadFeatures;
1678  int MaxProtoId, OldMaxProtoId;
1679  int MaskSize;
1680  int ConfigId;
1681  int i;
1682  int debug_level = NO_DEBUG;
1683 
1684  if (classify_learning_debug_level >= 3) {
1686  }
1687 
1688  IClass = ClassForClassId(Templates->Templates, ClassId);
1689  Class = Templates->Class[ClassId];
1690 
1691  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1692  ++NumAdaptationsFailed;
1693  if (classify_learning_debug_level >= 1) {
1694  tprintf("Cannot make new temporary config: maximum number exceeded.\n");
1695  }
1696  return -1;
1697  }
1698 
1699  OldMaxProtoId = IClass->NumProtos - 1;
1700 
1701  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, NumFeatures, Features,
1702  OldProtos, classify_adapt_proto_threshold, debug_level);
1703 
1704  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1705  zero_all_bits(TempProtoMask, MaskSize);
1706  for (i = 0; i < NumOldProtos; i++) {
1707  SET_BIT(TempProtoMask, OldProtos[i]);
1708  }
1709 
1710  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, NumFeatures, Features,
1711  BadFeatures, classify_adapt_feature_threshold, debug_level);
1712 
1713  MaxProtoId =
1714  MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, IClass, Class, TempProtoMask);
1715  if (MaxProtoId == NO_PROTO) {
1716  ++NumAdaptationsFailed;
1717  if (classify_learning_debug_level >= 1) {
1718  tprintf("Cannot make new temp protos: maximum number exceeded.\n");
1719  }
1720  return -1;
1721  }
1722 
1723  ConfigId = AddIntConfig(IClass);
1724  ConvertConfig(TempProtoMask, ConfigId, IClass);
1725  auto Config = new TEMP_CONFIG_STRUCT(MaxProtoId, FontinfoId);
1726  TempConfigFor(Class, ConfigId) = Config;
1727  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
1728 
1729  if (classify_learning_debug_level >= 1) {
1730  tprintf(
1731  "Making new temp config %d fontinfo id %d"
1732  " using %d old and %d new protos.\n",
1733  ConfigId, Config->FontinfoId, NumOldProtos, MaxProtoId - OldMaxProtoId);
1734  }
1735 
1736  return ConfigId;
1737 } /* MakeNewTemporaryConfig */
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:116
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:165
#define PRINT_PROTO_MATCHES
Definition: intproto.h:169
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:168
int16_t PROTO_ID
Definition: matchdefs.h:40
uint8_t FEATURE_ID
Definition: matchdefs.h:46
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS_STRUCT *IClass, ADAPT_CLASS_STRUCT *Class, BIT_VECTOR TempProtoMask)
int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:619
int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:555

◆ MakeNewTempProtos()

PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS_STRUCT IClass,
ADAPT_CLASS_STRUCT Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added.

Definition at line 1758 of file adaptmatch.cpp.

1760  {
1761  FEATURE_ID *ProtoStart;
1762  FEATURE_ID *ProtoEnd;
1763  FEATURE_ID *LastBad;
1764  PROTO_STRUCT *Proto;
1765  FEATURE F1, F2;
1766  float X1, X2, Y1, Y2;
1767  float A1, A2, AngleDelta;
1768  float SegmentLength;
1769  PROTO_ID Pid;
1770 
1771  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; ProtoStart < LastBad;
1772  ProtoStart = ProtoEnd) {
1773  F1 = Features->Features[*ProtoStart];
1774  X1 = F1->Params[PicoFeatX];
1775  Y1 = F1->Params[PicoFeatY];
1776  A1 = F1->Params[PicoFeatDir];
1777 
1778  for (ProtoEnd = ProtoStart + 1, SegmentLength = GetPicoFeatureLength(); ProtoEnd < LastBad;
1779  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1780  F2 = Features->Features[*ProtoEnd];
1781  X2 = F2->Params[PicoFeatX];
1782  Y2 = F2->Params[PicoFeatY];
1783  A2 = F2->Params[PicoFeatDir];
1784 
1785  AngleDelta = std::fabs(A1 - A2);
1786  if (AngleDelta > 0.5f) {
1787  AngleDelta = 1 - AngleDelta;
1788  }
1789 
1790  if (AngleDelta > matcher_clustering_max_angle_delta || std::fabs(X1 - X2) > SegmentLength ||
1791  std::fabs(Y1 - Y2) > SegmentLength) {
1792  break;
1793  }
1794  }
1795 
1796  F2 = Features->Features[*(ProtoEnd - 1)];
1797  X2 = F2->Params[PicoFeatX];
1798  Y2 = F2->Params[PicoFeatY];
1799  A2 = F2->Params[PicoFeatDir];
1800 
1801  Pid = AddIntProto(IClass);
1802  if (Pid == NO_PROTO) {
1803  return (NO_PROTO);
1804  }
1805 
1806  auto TempProto = new TEMP_PROTO_STRUCT;
1807  Proto = &(TempProto->Proto);
1808 
1809  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1810  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1811  instead of the -0.25 to 0.75 used in baseline normalization */
1812  Proto->Length = SegmentLength;
1813  Proto->Angle = A1;
1814  Proto->X = (X1 + X2) / 2;
1815  Proto->Y = (Y1 + Y2) / 2 - Y_DIM_OFFSET;
1816  FillABC(Proto);
1817 
1818  TempProto->ProtoId = Pid;
1819  SET_BIT(TempProtoMask, Pid);
1820 
1821  ConvertProto(Proto, Pid, IClass);
1822  AddProtoToProtoPruner(Proto, Pid, IClass, classify_learning_debug_level >= 2);
1823 
1824  Class->TempProtos = push(Class->TempProtos, TempProto);
1825  }
1826  return IClass->NumProtos - 1;
1827 } /* MakeNewTempProtos */

◆ MakePermanent()

void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES_STRUCT Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Definition at line 1839 of file adaptmatch.cpp.

1840  {
1841  UNICHAR_ID *Ambigs;
1842  PROTO_KEY ProtoKey;
1843 
1844  auto Class = Templates->Class[ClassId];
1845  auto Config = TempConfigFor(Class, ConfigId);
1846 
1847  MakeConfigPermanent(Class, ConfigId);
1848  if (Class->NumPermConfigs == 0) {
1849  Templates->NumPermClasses++;
1850  }
1851  Class->NumPermConfigs++;
1852 
1853  // Initialize permanent config.
1854  Ambigs = GetAmbiguities(Blob, ClassId);
1855  auto Perm = new PERM_CONFIG_STRUCT;
1856  Perm->Ambigs = Ambigs;
1857  Perm->FontinfoId = Config->FontinfoId;
1858 
1859  // Free memory associated with temporary config (since ADAPTED_CONFIG
1860  // is a union we need to clean up before we record permanent config).
1861  ProtoKey.Templates = Templates;
1862  ProtoKey.ClassId = ClassId;
1863  ProtoKey.ConfigId = ConfigId;
1864  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
1865  delete Config;
1866 
1867  // Record permanent config.
1868  PermConfigFor(Class, ConfigId) = Perm;
1869 
1870  if (classify_learning_debug_level >= 1) {
1871  tprintf(
1872  "Making config %d for %s (ClassId %d) permanent:"
1873  " fontinfo id %d, ambiguities '",
1874  ConfigId, getDict().getUnicharset().debug_str(ClassId).c_str(), ClassId,
1875  PermConfigFor(Class, ConfigId)->FontinfoId);
1876  for (UNICHAR_ID *AmbigsPointer = Ambigs; *AmbigsPointer >= 0; ++AmbigsPointer) {
1877  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
1878  }
1879  tprintf("'.\n");
1880  }
1881 } /* MakePermanent */
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:87
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:88
int MakeTempProtoPerm(void *item1, void *item2)
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)

◆ MasterMatcher()

void tesseract::Classify::MasterMatcher ( INT_TEMPLATES_STRUCT templates,
int16_t  num_features,
const INT_FEATURE_STRUCT features,
const uint8_t *  norm_factors,
ADAPT_CLASS_STRUCT **  classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const std::vector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1074 of file adaptmatch.cpp.

1078  {
1079  int top = blob_box.top();
1080  int bottom = blob_box.bottom();
1081  UnicharRating int_result;
1082  for (auto result : results) {
1083  CLASS_ID class_id = result.Class;
1084  BIT_VECTOR protos = classes != nullptr ? classes[class_id]->PermProtos : AllProtosOn;
1085  BIT_VECTOR configs = classes != nullptr ? classes[class_id]->PermConfigs : AllConfigsOn;
1086 
1087  int_result.unichar_id = class_id;
1088  im_.Match(ClassForClassId(templates, class_id), protos, configs, num_features, features,
1089  &int_result, classify_adapt_feature_threshold, debug, matcher_debug_separate_windows);
1090  bool is_debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1091  ExpandShapesAndApplyCorrections(classes, is_debug, class_id, bottom, top, result.Rating,
1092  final_results->BlobLength, matcher_multiplier, norm_factors,
1093  &int_result, final_results);
1094  }
1095 }

◆ NormalizeOutlines()

void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
float *  XScale,
float *  YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system. Outlines are changed and XScale and YScale are updated.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine

Definition at line 249 of file mfoutline.cpp.

249  {
250  MFOUTLINE Outline;
251 
252  switch (classify_norm_method) {
253  case character:
254  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
255  break;
256 
257  case baseline:
258  iterate(Outlines) {
259  Outline = static_cast<MFOUTLINE>(Outlines->first_node());
260  NormalizeOutline(Outline, 0.0);
261  }
262  *XScale = *YScale = MF_SCALE_FACTOR;
263  break;
264  }
265 } /* NormalizeOutlines */
void NormalizeOutline(MFOUTLINE Outline, float XOrigin)
Definition: mfoutline.cpp:218

◆ PrintAdaptedTemplates()

void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES_STRUCT Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none

Definition at line 153 of file adaptive.cpp.

153  {
154  INT_CLASS_STRUCT *IClass;
155  ADAPT_CLASS_STRUCT *AClass;
156 
157  fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
158  fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
159  Templates->NumPermClasses);
160  fprintf(File, " Id NC NPC NP NPP\n");
161  fprintf(File, "------------------------\n");
162 
163  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
164  IClass = Templates->Templates->Class[i];
165  AClass = Templates->Class[i];
166  if (!IsEmptyAdaptedClass(AClass)) {
167  fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
168  AClass->NumPermConfigs, IClass->NumProtos,
169  IClass->NumProtos - AClass->TempProtos->size());
170  }
171  }
172  fprintf(File, "\n");
173 
174 } /* PrintAdaptedTemplates */

◆ PrintAdaptiveMatchResults()

void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

Definition at line 1922 of file adaptmatch.cpp.

1922  {
1923  for (auto &it : results.match) {
1924  tprintf("%s ", unicharset.debug_str(it.unichar_id).c_str());
1925  it.Print();
1926  }
1927 } /* PrintAdaptiveMatchResults */

◆ PruneClasses()

int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uint8_t *  normalization_factors,
const uint16_t *  expected_num_features,
std::vector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

Definition at line 427 of file intmatcher.cpp.

431  {
432  ClassPruner pruner(int_templates->NumClasses);
433  // Compute initial match scores for all classes.
434  pruner.ComputeScores(int_templates, num_features, features);
435  // Adjust match scores for number of expected features.
436  pruner.AdjustForExpectedNumFeatures(expected_num_features, classify_cp_cutoff_strength);
437  // Apply disabled classes in unicharset - only works without a shape_table.
438  if (shape_table_ == nullptr) {
439  pruner.DisableDisabledClasses(unicharset);
440  }
441  // If fragments are disabled, remove them, also only without a shape table.
442  if (disable_character_fragments && shape_table_ == nullptr) {
443  pruner.DisableFragments(unicharset);
444  }
445 
446  // If we have good x-heights, apply the given normalization factors.
447  if (normalization_factors != nullptr) {
448  pruner.NormalizeForXheight(classify_class_pruner_multiplier, normalization_factors);
449  } else {
450  pruner.NoNormalization();
451  }
452  // Do the actual pruning and sort the short-list.
453  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this, shape_table_ == nullptr,
454  unicharset);
455 
456  if (classify_debug_level > 2) {
457  pruner.DebugMatch(*this, int_templates, features);
458  }
459  if (classify_debug_level > 1) {
460  pruner.SummarizeResult(*this, int_templates, expected_num_features,
461  classify_class_pruner_multiplier, normalization_factors);
462  }
463  // Convert to the expected output format.
464  return pruner.SetupResults(results);
465 }

◆ ReadAdaptedTemplates()

ADAPT_TEMPLATES_STRUCT * tesseract::Classify::ReadAdaptedTemplates ( TFile fp)

Read a set of adapted templates from file and return a ptr to the templates.

Parameters
fpopen text file to read adapted templates from
Returns
Ptr to adapted templates read from file.
Note
Globals: none

Definition at line 235 of file adaptive.cpp.

235  {
236  auto Templates = new ADAPT_TEMPLATES_STRUCT;
237 
238  /* first read the high level adaptive template struct */
239  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
240 
241  /* then read in the basic integer templates */
242  Templates->Templates = ReadIntTemplates(fp);
243 
244  /* then read in the adaptive info for each class */
245  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
246  Templates->Class[i] = ReadAdaptedClass(fp);
247  }
248  return (Templates);
249 
250 } /* ReadAdaptedTemplates */
ADAPT_CLASS_STRUCT * ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:186

◆ ReadIntTemplates()

INT_TEMPLATES_STRUCT * tesseract::Classify::ReadIntTemplates ( TFile fp)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
fpopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none

Definition at line 627 of file intproto.cpp.

627  {
628  int j, w, x, y, z;
629  INT_TEMPLATES_STRUCT *Templates;
630  CLASS_PRUNER_STRUCT *Pruner;
631  INT_CLASS_STRUCT *Class;
632 
633  /* variables for conversion from older inttemp formats */
634  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
635  CLASS_ID class_id, max_class_id;
636  std::vector<CLASS_ID> ClassIdFor(MAX_NUM_CLASSES);
637  std::vector<CLASS_PRUNER_STRUCT *> TempClassPruner(MAX_NUM_CLASS_PRUNERS);
638  uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS
639  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
640  uint32_t Mask, NewMask, ClassBits;
641  unsigned MaxNumConfigs = MAX_NUM_CONFIGS;
642  unsigned WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
643 
644  /* first read the high level template struct */
645  Templates = new INT_TEMPLATES_STRUCT;
646  // Read Templates in parts for 64 bit compatibility.
647  uint32_t unicharset_size;
648  if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) {
649  tprintf("Bad read of inttemp!\n");
650  }
651  int32_t version_id = 0;
652  if (fp->FReadEndian(&version_id, sizeof(version_id), 1) != 1 ||
653  fp->FReadEndian(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), 1) != 1) {
654  tprintf("Bad read of inttemp!\n");
655  }
656  if (version_id < 0) {
657  // This file has a version id!
658  version_id = -version_id;
659  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1) != 1) {
660  tprintf("Bad read of inttemp!\n");
661  }
662  } else {
663  Templates->NumClasses = version_id;
664  }
665 
666  if (version_id < 3) {
667  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
668  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
669  }
670 
671  if (version_id < 2) {
672  std::vector<int16_t> IndexFor(MAX_NUM_CLASSES);
673  if (fp->FReadEndian(&IndexFor[0], sizeof(IndexFor[0]), unicharset_size) != unicharset_size) {
674  tprintf("Bad read of inttemp!\n");
675  }
676  if (fp->FReadEndian(&ClassIdFor[0], sizeof(ClassIdFor[0]), Templates->NumClasses) !=
677  Templates->NumClasses) {
678  tprintf("Bad read of inttemp!\n");
679  }
680  }
681 
682  /* then read in the class pruners */
683  const unsigned kNumBuckets = NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
684  for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
685  Pruner = new CLASS_PRUNER_STRUCT;
686  if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) != kNumBuckets) {
687  tprintf("Bad read of inttemp!\n");
688  }
689  if (version_id < 2) {
690  TempClassPruner[i] = Pruner;
691  } else {
692  Templates->ClassPruners[i] = Pruner;
693  }
694  }
695 
696  /* fix class pruners if they came from an old version of inttemp */
697  if (version_id < 2) {
698  // Allocate enough class pruners to cover all the class ids.
699  max_class_id = 0;
700  for (unsigned i = 0; i < Templates->NumClasses; i++) {
701  if (ClassIdFor[i] > max_class_id) {
702  max_class_id = ClassIdFor[i];
703  }
704  }
705  for (int i = 0; i <= CPrunerIdFor(max_class_id); i++) {
706  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
707  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
708  }
709  // Convert class pruners from the old format (indexed by class index)
710  // to the new format (indexed by class id).
711  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
712  for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
713  for (x = 0; x < NUM_CP_BUCKETS; x++) {
714  for (y = 0; y < NUM_CP_BUCKETS; y++) {
715  for (z = 0; z < NUM_CP_BUCKETS; z++) {
716  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
717  if (TempClassPruner[i]->p[x][y][z][w] == 0) {
718  continue;
719  }
720  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
721  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
722  if (bit_number > last_cp_bit_number) {
723  break; // the rest of the bits in this word are not used
724  }
725  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
726  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
727  Mask = SetBitsForMask << b;
728  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
729  // Move these bits to the new position in which they should
730  // appear (indexed corresponding to the class_id).
731  new_i = CPrunerIdFor(class_id);
732  new_w = CPrunerWordIndexFor(class_id);
733  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
734  if (new_b > b) {
735  ClassBits <<= (new_b - b);
736  } else {
737  ClassBits >>= (b - new_b);
738  }
739  // Copy bits relating to class_id to the correct position
740  // in Templates->ClassPruner.
741  NewMask = SetBitsForMask << new_b;
742  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
743  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
744  }
745  }
746  }
747  }
748  }
749  }
750  for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
751  delete TempClassPruner[i];
752  }
753  }
754 
755  /* then read in each class */
756  for (unsigned i = 0; i < Templates->NumClasses; i++) {
757  /* first read in the high level struct for the class */
758  Class = new INT_CLASS_STRUCT;
759  if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
760  fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
761  fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1) {
762  tprintf("Bad read of inttemp!\n");
763  }
764  if (version_id == 0) {
765  // Only version 0 writes 5 pointless pointers to the file.
766  for (j = 0; j < 5; ++j) {
767  int32_t junk;
768  if (fp->FRead(&junk, sizeof(junk), 1) != 1) {
769  tprintf("Bad read of inttemp!\n");
770  }
771  }
772  }
773  unsigned num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
774  ASSERT_HOST(num_configs <= MaxNumConfigs);
775  if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) != num_configs) {
776  tprintf("Bad read of inttemp!\n");
777  }
778  if (version_id < 2) {
779  ClassForClassId(Templates, ClassIdFor[i]) = Class;
780  } else {
781  ClassForClassId(Templates, i) = Class;
782  }
783 
784  /* then read in the proto lengths */
785  Class->ProtoLengths.clear();
786  if (MaxNumIntProtosIn(Class) > 0) {
787  Class->ProtoLengths.resize(MaxNumIntProtosIn(Class));
788  if (fp->FRead(&Class->ProtoLengths[0], sizeof(uint8_t), MaxNumIntProtosIn(Class)) !=
789  MaxNumIntProtosIn(Class)) {
790  tprintf("Bad read of inttemp!\n");
791  }
792  }
793 
794  /* then read in the proto sets */
795  for (j = 0; j < Class->NumProtoSets; j++) {
796  auto ProtoSet = new PROTO_SET_STRUCT;
797  unsigned num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
798  if (fp->FReadEndian(&ProtoSet->ProtoPruner, sizeof(ProtoSet->ProtoPruner[0][0][0]),
799  num_buckets) != num_buckets) {
800  tprintf("Bad read of inttemp!\n");
801  }
802  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
803  if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A), 1) != 1 ||
804  fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B), 1) != 1 ||
805  fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C), 1) != 1 ||
806  fp->FRead(&ProtoSet->Protos[x].Angle, sizeof(ProtoSet->Protos[x].Angle), 1) != 1) {
807  tprintf("Bad read of inttemp!\n");
808  }
809  if (fp->FReadEndian(&ProtoSet->Protos[x].Configs, sizeof(ProtoSet->Protos[x].Configs[0]),
810  WerdsPerConfigVec) != WerdsPerConfigVec) {
811  tprintf("Bad read of inttemp!\n");
812  }
813  }
814  Class->ProtoSets[j] = ProtoSet;
815  }
816  if (version_id < 4) {
817  Class->font_set_id = -1;
818  } else {
819  fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
820  }
821  }
822 
823  if (version_id < 2) {
824  /* add an empty nullptr class with class id 0 */
825  assert(UnusedClassIdIn(Templates, 0));
826  ClassForClassId(Templates, 0) = new INT_CLASS_STRUCT(1, 1);
827  ClassForClassId(Templates, 0)->font_set_id = -1;
828  Templates->NumClasses++;
829  /* make sure the classes are contiguous */
830  for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
831  if (i < Templates->NumClasses) {
832  if (ClassForClassId(Templates, i) == nullptr) {
833  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
834  exit(1);
835  }
836  } else {
837  if (ClassForClassId(Templates, i) != nullptr) {
838  fprintf(stderr, "Class id %u exceeds NumClassesIn (Templates) %u\n", i,
839  Templates->NumClasses);
840  exit(1);
841  }
842  }
843  }
844  }
845  if (version_id >= 4) {
846  using namespace std::placeholders; // for _1, _2
847  this->fontinfo_table_.read(fp, std::bind(read_info, _1, _2));
848  if (version_id >= 5) {
849  this->fontinfo_table_.read(fp, std::bind(read_spacing_info, _1, _2));
850  }
851  this->fontset_table_.read(fp, [](auto *f, auto *fs) { return f->DeSerialize(*fs); } );
852  }
853 
854  return (Templates);
855 } /* ReadIntTemplates */
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:95
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:96
#define BITS_PER_CP_VECTOR
Definition: intproto.h:59
#define MaxNumIntProtosIn(C)
Definition: intproto.h:145
#define NUM_PP_PARAMS
Definition: intproto.h:51
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
#define BITS_PER_WERD
Definition: intproto.h:45
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:65
#define CPrunerWordIndexFor(c)
Definition: intproto.h:160
#define CPrunerIdFor(c)
Definition: intproto.h:158
#define CPrunerBitIndexFor(c)
Definition: intproto.h:161
#define NUM_CP_BUCKETS
Definition: intproto.h:53
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:60
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:49
#define NUM_PP_BUCKETS
Definition: intproto.h:52
#define NUM_BITS_PER_CLASS
Definition: intproto.h:55
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
bool read_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:143
bool read_spacing_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:163
bool read(tesseract::TFile *f, std::function< bool(tesseract::TFile *, T *)> cb)

◆ ReadNewCutoffs()

void tesseract::Classify::ReadNewCutoffs ( TFile fp,
uint16_t *  Cutoffs 
)

Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
fpfile containing cutoff definitions
Cutoffsarray to put cutoffs into

Definition at line 41 of file cutoffs.cpp.

41  {
42  int Cutoff;
43 
44  if (shape_table_ != nullptr) {
45  if (!fp->DeSerialize(shapetable_cutoffs_)) {
46  tprintf("Error during read of shapetable pffmtable!\n");
47  }
48  }
49  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
50  Cutoffs[i] = MAX_CUTOFF;
51  }
52 
53  const int kMaxLineSize = 100;
54  char line[kMaxLineSize];
55  while (fp->FGets(line, kMaxLineSize) != nullptr) {
56  std::string Class;
57  CLASS_ID ClassId;
58  std::istringstream stream(line);
59  stream.imbue(std::locale::classic());
60  stream >> Class >> Cutoff;
61  if (stream.fail()) {
62  break;
63  }
64  if (Class.compare("NULL") == 0) {
65  ClassId = unicharset.unichar_to_id(" ");
66  } else {
67  ClassId = unicharset.unichar_to_id(Class.c_str());
68  }
69  ASSERT_HOST(ClassId >= 0 && ClassId < MAX_NUM_CLASSES);
70  Cutoffs[ClassId] = Cutoff;
71  }
72 }
#define MAX_CUTOFF
Definition: cutoffs.cpp:30

◆ ReadNormProtos()

NORM_PROTOS * tesseract::Classify::ReadNormProtos ( TFile fp)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
fpopen text file to read normalization protos from Globals: none
Returns
Character normalization protos.

Definition at line 173 of file normmatch.cpp.

173  {
174  char unichar[2 * UNICHAR_LEN + 1];
175  UNICHAR_ID unichar_id;
176  LIST Protos;
177  int NumProtos;
178 
179  /* allocate and initialization data structure */
180  auto NormProtos = new NORM_PROTOS(unicharset.size());
181 
182  /* read file header and save in data structure */
185 
186  /* read protos for each class into a separate list */
187  const int kMaxLineSize = 100;
188  char line[kMaxLineSize];
189  while (fp->FGets(line, kMaxLineSize) != nullptr) {
190  std::istringstream stream(line);
191  stream.imbue(std::locale::classic());
192  stream >> unichar >> NumProtos;
193  if (stream.fail()) {
194  continue;
195  }
196  if (unicharset.contains_unichar(unichar)) {
197  unichar_id = unicharset.unichar_to_id(unichar);
198  Protos = NormProtos->Protos[unichar_id];
199  for (int i = 0; i < NumProtos; i++) {
200  Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams));
201  }
202  NormProtos->Protos[unichar_id] = Protos;
203  } else {
204  tprintf("Error: unichar %s in normproto file is not in unichar set.\n", unichar);
205  for (int i = 0; i < NumProtos; i++) {
207  }
208  }
209  }
210  return NormProtos;
211 } /* ReadNormProtos */
#define UNICHAR_LEN
Definition: unichar.h:33
uint16_t ReadSampleSize(TFile *fp)
Definition: clusttool.cpp:114
void FreePrototype(void *arg)
Definition: cluster.cpp:1609
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:192
PROTOTYPE * ReadPrototype(TFile *fp, uint16_t N)
Definition: clusttool.cpp:168
PARAM_DESC * ReadParamDesc(TFile *fp, uint16_t N)
Definition: clusttool.cpp:134

◆ RefreshDebugWindow()

void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 240 of file adaptmatch.cpp.

241  {
242  const int kSampleSpaceWidth = 500;
243  if (*win == nullptr) {
244  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200, kSampleSpaceWidth * 2,
245  200, true);
246  }
247  (*win)->Clear();
248  (*win)->Pen(64, 64, 64);
249  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset, kSampleSpaceWidth, kBlnBaselineOffset);
250  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset, kSampleSpaceWidth,
252  (*win)->ZoomToRectangle(wbox.left(), wbox.top(), wbox.right(), wbox.bottom());
253 }

◆ RemoveBadMatches()

void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"

Definition at line 1942 of file adaptmatch.cpp.

1942  {
1943  unsigned Next, NextGood;
1944  float BadMatchThreshold;
1945  static const char *romans = "i v x I V X";
1946  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
1947 
1948  if (classify_bln_numeric_mode) {
1949  UNICHAR_ID unichar_id_one =
1951  UNICHAR_ID unichar_id_zero =
1953  float scored_one = ScoredUnichar(unichar_id_one, *Results);
1954  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
1955 
1956  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
1957  const UnicharRating &match = Results->match[Next];
1958  if (match.rating >= BadMatchThreshold) {
1959  if (!unicharset.get_isalpha(match.unichar_id) ||
1960  strstr(romans, unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
1961  } else if (unicharset.eq(match.unichar_id, "l") && scored_one < BadMatchThreshold) {
1962  Results->match[Next].unichar_id = unichar_id_one;
1963  } else if (unicharset.eq(match.unichar_id, "O") && scored_zero < BadMatchThreshold) {
1964  Results->match[Next].unichar_id = unichar_id_zero;
1965  } else {
1966  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
1967  }
1968  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
1969  if (NextGood == Next) {
1970  ++NextGood;
1971  } else {
1972  Results->match[NextGood++] = Results->match[Next];
1973  }
1974  }
1975  }
1976  }
1977  } else {
1978  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
1979  if (Results->match[Next].rating >= BadMatchThreshold) {
1980  if (NextGood == Next) {
1981  ++NextGood;
1982  } else {
1983  Results->match[NextGood++] = Results->match[Next];
1984  }
1985  }
1986  }
1987  }
1988  Results->match.resize(NextGood);
1989 } /* RemoveBadMatches */
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:713

◆ RemoveExtraPuncs()

void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered

Definition at line 1999 of file adaptmatch.cpp.

1999  {
2000  unsigned Next, NextGood;
2001  int punc_count; /*no of garbage characters */
2002  int digit_count;
2003  /*garbage characters */
2004  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2005  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2006 
2007  punc_count = 0;
2008  digit_count = 0;
2009  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2010  const UnicharRating &match = Results->match[Next];
2011  bool keep = true;
2012  if (strstr(punc_chars, unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2013  if (punc_count >= 2) {
2014  keep = false;
2015  }
2016  punc_count++;
2017  } else {
2018  if (strstr(digit_chars, unicharset.id_to_unichar(match.unichar_id)) != nullptr) {
2019  if (digit_count >= 1) {
2020  keep = false;
2021  }
2022  digit_count++;
2023  }
2024  }
2025  if (keep) {
2026  if (NextGood == Next) {
2027  ++NextGood;
2028  } else {
2029  Results->match[NextGood++] = match;
2030  }
2031  }
2032  }
2033  Results->match.resize(NextGood);
2034 } /* RemoveExtraPuncs */

◆ ResetAdaptiveClassifierInternal()

void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 596 of file adaptmatch.cpp.

596  {
597  if (classify_learning_debug_level > 0) {
598  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n", NumAdaptationsFailed);
599  }
600  delete AdaptedTemplates;
601  AdaptedTemplates = new ADAPT_TEMPLATES_STRUCT(unicharset);
602  delete BackupAdaptedTemplates;
603  BackupAdaptedTemplates = nullptr;
604  NumAdaptationsFailed = 0;
605 }

◆ SetAdaptiveThreshold()

void tesseract::Classify::SetAdaptiveThreshold ( float  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating

Definition at line 2047 of file adaptmatch.cpp.

2047  {
2048  Threshold = (Threshold == matcher_good_threshold) ? 0.9f : (1 - Threshold);
2049  classify_adapt_proto_threshold.set_value(ClipToRange<int>(255 * Threshold, 0, 255));
2050  classify_adapt_feature_threshold.set_value(ClipToRange<int>(255 * Threshold, 0, 255));
2051 } /* SetAdaptiveThreshold */

◆ SetStaticClassifier()

void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 162 of file classify.cpp.

162  {
163  delete static_classifier_;
164  static_classifier_ = static_classifier;
165 }

◆ SettupPass1()

void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Definition at line 647 of file adaptmatch.cpp.

647  {
648  EnableLearning = classify_enable_learning;
649 
651 
652 } /* SettupPass1 */
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:362

◆ SettupPass2()

void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Definition at line 663 of file adaptmatch.cpp.

663  {
664  EnableLearning = false;
666 
667 } /* SettupPass2 */
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:366

◆ SetupBLCNDenorms()

void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 129 of file intfx.cpp.

130  {
131  // Compute 1st and 2nd moments of the original outline.
132  FCOORD center, second_moments;
133  int length = blob.ComputeMoments(&center, &second_moments);
134  if (fx_info != nullptr) {
135  fx_info->Length = length;
136  fx_info->Rx = IntCastRounded(second_moments.y());
137  fx_info->Ry = IntCastRounded(second_moments.x());
138 
139  fx_info->Xmean = IntCastRounded(center.x());
140  fx_info->Ymean = IntCastRounded(center.y());
141  }
142  // Setup the denorm for Baseline normalization.
143  bl_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), 128.0f, 1.0f, 1.0f,
144  128.0f, 128.0f);
145  // Setup the denorm for character normalization.
146  if (nonlinear_norm) {
147  std::vector<std::vector<int>> x_coords;
148  std::vector<std::vector<int>> y_coords;
149  TBOX box;
150  blob.GetPreciseBoundingBox(&box);
151  box.pad(1, 1);
152  blob.GetEdgeCoords(box, x_coords, y_coords);
153  cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, 0.0f, 0.0f, x_coords,
154  y_coords);
155  } else {
156  cn_denorm->SetupNormalization(nullptr, nullptr, &blob.denorm(), center.x(), center.y(),
157  51.2f / second_moments.x(), 51.2f / second_moments.y(), 128.0f,
158  128.0f);
159  }
160 }

◆ shape_table()

const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 102 of file classify.h.

102  {
103  return shape_table_;
104  }

◆ ShapeIDToClassID()

int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2121 of file adaptmatch.cpp.

2121  {
2122  for (unsigned id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2123  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2124  ASSERT_HOST(font_set_id >= 0);
2125  const FontSet &fs = fontset_table_.at(font_set_id);
2126  for (auto f : fs) {
2127  if (f == shape_id) {
2128  return id;
2129  }
2130  }
2131  }
2132  tprintf("Shape %d not found\n", shape_id);
2133  return -1;
2134 }

◆ ShowBestMatchFor()

void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.

Definition at line 2065 of file adaptmatch.cpp.

2066  {
2067  uint32_t config_mask;
2068  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2069  tprintf("No built-in templates for class/shape %d\n", shape_id);
2070  return;
2071  }
2072  if (num_features <= 0) {
2073  tprintf("Illegal blob (char norm features)!\n");
2074  return;
2075  }
2076  UnicharRating cn_result;
2077  classify_norm_method.set_value(character);
2079  features, &cn_result, classify_adapt_feature_threshold, NO_DEBUG,
2080  matcher_debug_separate_windows);
2081  tprintf("\n");
2082  config_mask = 1 << cn_result.config;
2083 
2084  tprintf("Static Shape ID: %d\n", shape_id);
2085  ShowMatchDisplay();
2086  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn, &config_mask, num_features,
2087  features, &cn_result, classify_adapt_feature_threshold, matcher_debug_flags,
2088  matcher_debug_separate_windows);
2090 } /* ShowBestMatchFor */

◆ ShowMatchDisplay()

void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches

Definition at line 866 of file intproto.cpp.

866  {
868  if (ProtoDisplayWindow) {
869  ProtoDisplayWindow->Clear();
870  }
871  if (FeatureDisplayWindow) {
872  FeatureDisplayWindow->Clear();
873  }
874  ClearFeatureSpaceWindow(static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
875  IntMatchWindow);
877  if (ProtoDisplayWindow) {
878  ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, INT_MAX_Y);
879  }
880  if (FeatureDisplayWindow) {
881  FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, INT_MAX_Y);
882  }
883 } /* ShowMatchDisplay */
#define INT_MAX_Y
Definition: intproto.cpp:64
#define INT_MIN_Y
Definition: intproto.cpp:62
#define INT_MIN_X
Definition: intproto.cpp:61
#define INT_MAX_X
Definition: intproto.cpp:63
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1587
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:887
void void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:759

◆ StartBackupAdaptiveClassifier()

void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 625 of file adaptmatch.cpp.

625  {
626  delete BackupAdaptedTemplates;
627  BackupAdaptedTemplates = new ADAPT_TEMPLATES_STRUCT(unicharset);
628 }

◆ STRING_VAR_H()

tesseract::Classify::STRING_VAR_H ( classify_learn_debug_str  )

◆ SwitchAdaptiveClassifier()

void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 609 of file adaptmatch.cpp.

609  {
610  if (BackupAdaptedTemplates == nullptr) {
612  return;
613  }
614  if (classify_learning_debug_level > 0) {
615  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
616  NumAdaptationsFailed);
617  }
618  delete AdaptedTemplates;
620  BackupAdaptedTemplates = nullptr;
621  NumAdaptationsFailed = 0;
622 }
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:596

◆ TempConfigReliable()

bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG_STRUCT config 
)

Definition at line 2138 of file adaptmatch.cpp.

2138  {
2139  if (classify_learning_debug_level >= 1) {
2140  tprintf("NumTimesSeen for config of %s is %d\n",
2141  getDict().getUnicharset().debug_str(class_id).c_str(), config->NumTimesSeen);
2142  }
2143  if (config->NumTimesSeen >= matcher_sufficient_examples_for_prototyping) {
2144  return true;
2145  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2146  return false;
2147  } else if (use_ambigs_for_adaption) {
2148  // Go through the ambigs vector and see whether we have already seen
2149  // enough times all the characters represented by the ambigs vector.
2150  const UnicharIdVector *ambigs = getDict().getUnicharAmbigs().AmbigsForAdaption(class_id);
2151  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2152  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2153  ADAPT_CLASS_STRUCT *ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2154  assert(ambig_class != nullptr);
2155  if (ambig_class->NumPermConfigs == 0 &&
2156  ambig_class->MaxNumTimesSeen < matcher_min_examples_for_prototyping) {
2157  if (classify_learning_debug_level >= 1) {
2158  tprintf(
2159  "Ambig %s has not been seen enough times,"
2160  " not making config for %s permanent\n",
2161  getDict().getUnicharset().debug_str((*ambigs)[ambig]).c_str(),
2162  getDict().getUnicharset().debug_str(class_id).c_str());
2163  }
2164  return false;
2165  }
2166  }
2167  }
2168  return true;
2169 }
std::vector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:38
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:198
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:111

◆ UpdateAmbigsGroup()

void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2171 of file adaptmatch.cpp.

2171  {
2172  const UnicharIdVector *ambigs = getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id);
2173  int ambigs_size = (ambigs == nullptr) ? 0 : ambigs->size();
2174  if (classify_learning_debug_level >= 1) {
2175  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2176  getDict().getUnicharset().debug_str(class_id).c_str(), class_id);
2177  }
2178  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2179  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2180  const ADAPT_CLASS_STRUCT *ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2181  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2182  if (ConfigIsPermanent(ambigs_class, cfg)) {
2183  continue;
2184  }
2185  const TEMP_CONFIG_STRUCT *config = TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2186  if (config != nullptr && TempConfigReliable(ambig_class_id, config)) {
2187  if (classify_learning_debug_level >= 1) {
2188  tprintf("Making config %d of %s permanent\n", cfg,
2189  getDict().getUnicharset().debug_str(ambig_class_id).c_str());
2190  }
2191  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2192  }
2193  }
2194  }
2195 }
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:208

◆ WriteAdaptedTemplates()

void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES_STRUCT Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none

Definition at line 345 of file adaptive.cpp.

345  {
346  /* first write the high level adaptive template struct */
347  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
348 
349  /* then write out the basic integer templates */
350  WriteIntTemplates(File, Templates->Templates, unicharset);
351 
352  /* then write out the adaptive info for each class */
353  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
354  WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
355  }
356 } /* WriteAdaptedTemplates */
void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs)
Definition: adaptive.cpp:307
void WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:917

◆ WriteIntTemplates()

void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES_STRUCT Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use

Definition at line 917 of file intproto.cpp.

918  {
919  INT_CLASS_STRUCT *Class;
920  auto unicharset_size = target_unicharset.size();
921  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
922 
923  if (Templates->NumClasses != unicharset_size) {
924  tprintf(
925  "Warning: executing WriteIntTemplates() with %d classes in"
926  " Templates, while target_unicharset size is %zu\n",
927  Templates->NumClasses, unicharset_size);
928  }
929 
930  /* first write the high level template struct */
931  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
932  fwrite(&version_id, sizeof(version_id), 1, File);
933  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), 1, File);
934  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
935 
936  /* then write out the class pruners */
937  for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
938  fwrite(Templates->ClassPruners[i], sizeof(CLASS_PRUNER_STRUCT), 1, File);
939  }
940 
941  /* then write out each class */
942  for (unsigned i = 0; i < Templates->NumClasses; i++) {
943  Class = Templates->Class[i];
944 
945  /* first write out the high level struct for the class */
946  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
947  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
948  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.at(Class->font_set_id).size());
949  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
950  for (int j = 0; j < Class->NumConfigs; ++j) {
951  fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File);
952  }
953 
954  /* then write out the proto lengths */
955  if (MaxNumIntProtosIn(Class) > 0) {
956  fwrite(&Class->ProtoLengths[0], sizeof(uint8_t), MaxNumIntProtosIn(Class), File);
957  }
958 
959  /* then write out the proto sets */
960  for (int j = 0; j < Class->NumProtoSets; j++) {
961  fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File);
962  }
963 
964  /* then write the fonts info */
965  fwrite(&Class->font_set_id, sizeof(int), 1, File);
966  }
967 
968  /* Write the fonts info tables */
969  using namespace std::placeholders; // for _1, _2
970  this->fontinfo_table_.write(File, std::bind(write_info, _1, _2));
971  this->fontinfo_table_.write(File, std::bind(write_spacing_info, _1, _2));
972  this->fontset_table_.write(File, std::bind(write_set, _1, _2));
973 } /* WriteIntTemplates */
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:222
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:157
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:194
bool write(FILE *f, std::function< bool(FILE *, const T &)> cb) const

◆ WriteTRFile()

bool tesseract::Classify::WriteTRFile ( const char *  filename)

Definition at line 60 of file blobclass.cpp.

60  {
61  bool result = false;
62  std::string tr_filename = filename;
63  tr_filename += ".tr";
64  FILE *fp = fopen(tr_filename.c_str(), "wb");
65  if (fp) {
66  result = tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length());
67  fclose(fp);
68  }
69  tr_file_data_.resize(0);
70  return result;
71 }
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251

Member Data Documentation

◆ AdaptedTemplates

ADAPT_TEMPLATES_STRUCT* tesseract::Classify::AdaptedTemplates = nullptr

Definition at line 421 of file classify.h.

◆ AllConfigsOff

BIT_VECTOR tesseract::Classify::AllConfigsOff = nullptr

Definition at line 430 of file classify.h.

◆ AllConfigsOn

BIT_VECTOR tesseract::Classify::AllConfigsOn = nullptr

Definition at line 429 of file classify.h.

◆ AllProtosOn

BIT_VECTOR tesseract::Classify::AllProtosOn = nullptr

Definition at line 428 of file classify.h.

◆ BackupAdaptedTemplates

ADAPT_TEMPLATES_STRUCT* tesseract::Classify::BackupAdaptedTemplates = nullptr

Definition at line 425 of file classify.h.

◆ EnableLearning

bool tesseract::Classify::EnableLearning = true

Definition at line 485 of file classify.h.

◆ feature_defs_

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 447 of file classify.h.

◆ fontinfo_table_

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 435 of file classify.h.

◆ fontset_table_

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 443 of file classify.h.

◆ im_

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 446 of file classify.h.

◆ NormProtos

NORM_PROTOS* tesseract::Classify::NormProtos = nullptr

Definition at line 433 of file classify.h.

◆ PreTrainedTemplates

INT_TEMPLATES_STRUCT* tesseract::Classify::PreTrainedTemplates = nullptr

Definition at line 420 of file classify.h.

◆ shape_table_

ShapeTable* tesseract::Classify::shape_table_ = nullptr
protected

Definition at line 452 of file classify.h.

◆ TempProtoMask

BIT_VECTOR tesseract::Classify::TempProtoMask = nullptr

Definition at line 431 of file classify.h.


The documentation for this class was generated from the following files: