tesseract  5.0.0
tesseract::Wordrec Class Reference

#include <wordrec.h>

Inheritance diagram for tesseract::Wordrec:
tesseract::Classify tesseract::CCStruct tesseract::CCUtil tesseract::Tesseract

Public Member Functions

 BOOL_VAR_H (merge_fragments_in_matrix)
 
 BOOL_VAR_H (wordrec_enable_assoc)
 
 BOOL_VAR_H (force_word_assoc)
 
 INT_VAR_H (repair_unchopped_blobs)
 
 double_VAR_H (tessedit_certainty_threshold)
 
 INT_VAR_H (chop_debug)
 
 BOOL_VAR_H (chop_enable)
 
 BOOL_VAR_H (chop_vertical_creep)
 
 INT_VAR_H (chop_split_length)
 
 INT_VAR_H (chop_same_distance)
 
 INT_VAR_H (chop_min_outline_points)
 
 INT_VAR_H (chop_seam_pile_size)
 
 BOOL_VAR_H (chop_new_seam_pile)
 
 INT_VAR_H (chop_inside_angle)
 
 INT_VAR_H (chop_min_outline_area)
 
 double_VAR_H (chop_split_dist_knob)
 
 double_VAR_H (chop_overlap_knob)
 
 double_VAR_H (chop_center_knob)
 
 INT_VAR_H (chop_centered_maxwidth)
 
 double_VAR_H (chop_sharpness_knob)
 
 double_VAR_H (chop_width_change_knob)
 
 double_VAR_H (chop_ok_split)
 
 double_VAR_H (chop_good_split)
 
 INT_VAR_H (chop_x_y_weight)
 
 BOOL_VAR_H (assume_fixed_pitch_char_segment)
 
 INT_VAR_H (wordrec_debug_level)
 
 INT_VAR_H (wordrec_max_join_chunks)
 
 BOOL_VAR_H (wordrec_skip_no_truth_words)
 
 BOOL_VAR_H (wordrec_debug_blamer)
 
 BOOL_VAR_H (wordrec_run_blamer)
 
 INT_VAR_H (segsearch_debug_level)
 
 INT_VAR_H (segsearch_max_pain_points)
 
 INT_VAR_H (segsearch_max_futile_classifications)
 
 double_VAR_H (segsearch_max_char_wh_ratio)
 
 BOOL_VAR_H (save_alt_choices)
 
 Wordrec ()
 
 ~Wordrec () override=default
 
void SaveAltChoices (const LIST &best_choices, WERD_RES *word)
 
void FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams)
 
void choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
 
void combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
 
SEAMpick_good_seam (TBLOB *blob)
 
void try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
void try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
PRIORITY grade_split_length (SPLIT *split)
 
PRIORITY grade_sharpness (SPLIT *split)
 
bool near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
 
virtual BLOB_CHOICE_LIST * classify_piece (const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
 
program_editup

Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.

void program_editup (const std::string &textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
 
cc_recog

Recognize a word.

void cc_recog (WERD_RES *word)
 
program_editdown

This function holds any necessary post processing for the Wise Owl program.

void program_editdown (int32_t elasped_time)
 
set_pass1

Get ready to do some pass 1 stuff.

void set_pass1 ()
 
set_pass2

Get ready to do some pass 2 stuff.

void set_pass2 ()
 
end_recog

Cleanup and exit the recog program.

int end_recog ()
 
call_matcher

Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.

BLOB_CHOICE_LIST * call_matcher (TBLOB *blob)
 
dict_word()

Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.

int dict_word (const WERD_CHOICE &word)
 
classify_blob

Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.

Parameters
blobCurrent blob
stringThe string to display in ScrollView
colorThe colour to use when displayed with ScrollView
BLOB_CHOICE_LIST * classify_blob (TBLOB *blob, const char *string, ScrollView::Color color, BlamerBundle *blamer_bundle)
 
point_priority

Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.

PRIORITY point_priority (EDGEPT *point)
 
add_point_to_list

Add an edge point to a POINT_GROUP containing a list of other points.

void add_point_to_list (PointHeap *point_heap, EDGEPT *point)
 
bool is_inside_angle (EDGEPT *pt)
 
angle_change

Return the change in angle (degrees) of the line segments between points one and two, and two and three.

int angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
 
pick_close_point

Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.

EDGEPTpick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
 
prioritize_points

Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.

void prioritize_points (TESSLINE *outline, PointHeap *points)
 
new_min_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_min_point (EDGEPT *local_min, PointHeap *points)
 
new_max_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_max_point (EDGEPT *local_max, PointHeap *points)
 
vertical_projection_point

For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.

void vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
 
attempt_blob_chop

Try to split the this blob after this one. Check to make sure that it was successful.

SEAMattempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
 
SEAMchop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
 
SEAMchop_overlapping_blob (const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
 
improve_one_blob

Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition.

SEAMimprove_one_blob (const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
 
chop_one_blob

Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.

SEAMchop_one_blob (const std::vector< TBOX > &boxes, const std::vector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, unsigned *blob_number)
 
chop_word_main

Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. The results are returned in the WERD_RES.

void chop_word_main (WERD_RES *word)
 
improve_by_chopping

Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found.

void improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
 
int select_blob_to_split (const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
 
int select_blob_to_split_from_fixpt (DANGERR *fixpt)
 
- Public Member Functions inherited from tesseract::Classify
 Classify ()
 
 ~Classify () override
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
int GetFontinfoId (ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, std::vector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, uint16_t *Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
 
ADAPT_TEMPLATES_STRUCTReadAdaptedTemplates (TFile *File)
 
void ConvertProto (PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *Class)
 
INT_TEMPLATES_STRUCTCreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS_STRUCT *Class, ADAPT_TEMPLATES_STRUCT *Templates)
 
void AmbigClassifier (const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES_STRUCT *templates, ADAPT_CLASS_STRUCT **classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES_STRUCT *templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS_STRUCT **classes, int debug, int matcher_multiplier, const TBOX &blob_box, const std::vector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS_STRUCT **classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS_STRUCT *IClass, ADAPT_CLASS_STRUCT *Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
std::string ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const std::vector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, std::vector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES_STRUCT *adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES_STRUCT *templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG_STRUCT *config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES_STRUCTReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const std::string &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const char *filename)
 
 BOOL_VAR_H (allow_blob_division)
 
 BOOL_VAR_H (prioritize_division)
 
 BOOL_VAR_H (classify_enable_learning)
 
 INT_VAR_H (classify_debug_level)
 
 INT_VAR_H (classify_norm_method)
 
 double_VAR_H (classify_char_norm_range)
 
 double_VAR_H (classify_max_rating_ratio)
 
 double_VAR_H (classify_max_certainty_margin)
 
 BOOL_VAR_H (tess_cn_matching)
 
 BOOL_VAR_H (tess_bn_matching)
 
 BOOL_VAR_H (classify_enable_adaptive_matcher)
 
 BOOL_VAR_H (classify_use_pre_adapted_templates)
 
 BOOL_VAR_H (classify_save_adapted_templates)
 
 BOOL_VAR_H (classify_enable_adaptive_debugger)
 
 BOOL_VAR_H (classify_nonlinear_norm)
 
 INT_VAR_H (matcher_debug_level)
 
 INT_VAR_H (matcher_debug_flags)
 
 INT_VAR_H (classify_learning_debug_level)
 
 double_VAR_H (matcher_good_threshold)
 
 double_VAR_H (matcher_reliable_adaptive_result)
 
 double_VAR_H (matcher_perfect_threshold)
 
 double_VAR_H (matcher_bad_match_pad)
 
 double_VAR_H (matcher_rating_margin)
 
 double_VAR_H (matcher_avg_noise_size)
 
 INT_VAR_H (matcher_permanent_classes_min)
 
 INT_VAR_H (matcher_min_examples_for_prototyping)
 
 INT_VAR_H (matcher_sufficient_examples_for_prototyping)
 
 double_VAR_H (matcher_clustering_max_angle_delta)
 
 double_VAR_H (classify_misfit_junk_penalty)
 
 double_VAR_H (rating_scale)
 
 double_VAR_H (certainty_scale)
 
 double_VAR_H (tessedit_class_miss_scale)
 
 double_VAR_H (classify_adapted_pruning_factor)
 
 double_VAR_H (classify_adapted_pruning_threshold)
 
 INT_VAR_H (classify_adapt_proto_threshold)
 
 INT_VAR_H (classify_adapt_feature_threshold)
 
 BOOL_VAR_H (disable_character_fragments)
 
 double_VAR_H (classify_character_fragments_garbage_certainty_threshold)
 
 BOOL_VAR_H (classify_debug_character_fragments)
 
 BOOL_VAR_H (matcher_debug_separate_windows)
 
 STRING_VAR_H (classify_learn_debug_str)
 
 INT_VAR_H (classify_class_pruner_threshold)
 
 INT_VAR_H (classify_class_pruner_multiplier)
 
 INT_VAR_H (classify_cp_cutoff_strength)
 
 INT_VAR_H (classify_integer_matcher_multiplier)
 
 BOOL_VAR_H (classify_bln_numeric_mode)
 
 double_VAR_H (speckle_large_max_size)
 
 double_VAR_H (speckle_rating_penalty)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
 ~CCStruct () override
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const std::string &argv0, const std::string &basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 
 INT_VAR_H (ambigs_debug_level)
 
 BOOL_VAR_H (use_ambigs_for_adaption)
 

Public Attributes

std::unique_ptr< LanguageModellanguage_model_
 
PRIORITY pass2_ok_split
 
WERD_CHOICEprev_word_best_choice_
 
void(Wordrec::* fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
- Public Attributes inherited from tesseract::Classify
INT_TEMPLATES_STRUCTPreTrainedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTAdaptedTemplates = nullptr
 
ADAPT_TEMPLATES_STRUCTBackupAdaptedTemplates = nullptr
 
BIT_VECTOR AllProtosOn = nullptr
 
BIT_VECTOR AllConfigsOn = nullptr
 
BIT_VECTOR AllConfigsOff = nullptr
 
BIT_VECTOR TempProtoMask = nullptr
 
NORM_PROTOSNormProtos = nullptr
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
bool EnableLearning = true
 
- Public Attributes inherited from tesseract::CCUtil
std::string datadir
 
std::string imagebasename
 
std::string lang
 
std::string language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
std::string imagefile
 
std::string directory
 

Protected Member Functions

bool SegSearchDone (int num_futile_classifications)
 
void UpdateSegSearchNodes (float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
 
void ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
 
void InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, std::string &blamer_debug)
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Classify
static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, std::vector< INT_FEATURE_STRUCT > *bl_features, std::vector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, std::vector< int > *outline_cn_counts)
 
- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 
- Protected Attributes inherited from tesseract::Classify
IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_ = nullptr
 

Detailed Description

Definition at line 186 of file wordrec.h.

Constructor & Destructor Documentation

◆ Wordrec()

tesseract::Wordrec::Wordrec ( )

Definition at line 46 of file wordrec.cpp.

47  : // control parameters
48  BOOL_MEMBER(merge_fragments_in_matrix, true,
49  "Merge the fragments in the ratings matrix and delete them"
50  " after merging",
51  params())
52  , BOOL_MEMBER(wordrec_enable_assoc, true, "Associator Enable", params())
53  , BOOL_MEMBER(force_word_assoc, false,
54  "force associator to run regardless of what enable_assoc is."
55  " This is used for CJK where component grouping is necessary.",
57  , INT_MEMBER(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped", params())
58  , double_MEMBER(tessedit_certainty_threshold, -2.25, "Good blob limit", params())
59  , INT_MEMBER(chop_debug, 0, "Chop debug", params())
60  , BOOL_MEMBER(chop_enable, 1, "Chop enable", params())
61  , BOOL_MEMBER(chop_vertical_creep, 0, "Vertical creep", params())
62  , INT_MEMBER(chop_split_length, 10000, "Split Length", params())
63  , INT_MEMBER(chop_same_distance, 2, "Same distance", params())
64  , INT_MEMBER(chop_min_outline_points, 6, "Min Number of Points on Outline", params())
65  , INT_MEMBER(chop_seam_pile_size, 150, "Max number of seams in seam_pile", params())
66  , BOOL_MEMBER(chop_new_seam_pile, 1, "Use new seam_pile", params())
67  , INT_MEMBER(chop_inside_angle, -50, "Min Inside Angle Bend", params())
68  , INT_MEMBER(chop_min_outline_area, 2000, "Min Outline Area", params())
69  , double_MEMBER(chop_split_dist_knob, 0.5, "Split length adjustment", params())
70  , double_MEMBER(chop_overlap_knob, 0.9, "Split overlap adjustment", params())
71  , double_MEMBER(chop_center_knob, 0.15, "Split center adjustment", params())
72  , INT_MEMBER(chop_centered_maxwidth, 90,
73  "Width of (smaller) chopped blobs "
74  "above which we don't care that a chop is not near the center.",
75  params())
76  , double_MEMBER(chop_sharpness_knob, 0.06, "Split sharpness adjustment", params())
77  , double_MEMBER(chop_width_change_knob, 5.0, "Width change adjustment", params())
78  , double_MEMBER(chop_ok_split, 100.0, "OK split limit", params())
79  , double_MEMBER(chop_good_split, 50.0, "Good split limit", params())
80  , INT_MEMBER(chop_x_y_weight, 3, "X / Y length weight", params())
81  , BOOL_MEMBER(assume_fixed_pitch_char_segment, false,
82  "include fixed-pitch heuristics in char segmentation", params())
83  , INT_MEMBER(wordrec_debug_level, 0, "Debug level for wordrec", params())
84  , INT_MEMBER(wordrec_max_join_chunks, 4, "Max number of broken pieces to associate", params())
85  , BOOL_MEMBER(wordrec_skip_no_truth_words, false,
86  "Only run OCR for words that had truth recorded in BlamerBundle", params())
87  , BOOL_MEMBER(wordrec_debug_blamer, false, "Print blamer debug messages", params())
88  , BOOL_MEMBER(wordrec_run_blamer, false, "Try to set the blame for errors", params())
89  , INT_MEMBER(segsearch_debug_level, 0, "SegSearch debug level", params())
90  , INT_MEMBER(segsearch_max_pain_points, 2000,
91  "Maximum number of pain points stored in the queue", params())
92  , INT_MEMBER(segsearch_max_futile_classifications, 20,
93  "Maximum number of pain point classifications per chunk that"
94  " did not result in finding a better word choice.",
95  params())
96  , double_MEMBER(segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio",
97  params())
98  , BOOL_MEMBER(save_alt_choices, true,
99  "Save alternative paths found during chopping"
100  " and segmentation search",
101  params())
102  , pass2_ok_split(0.0f) {
103  prev_word_best_choice_ = nullptr;
104  language_model_ = std::make_unique<LanguageModel>(&get_fontinfo_table(), &(getDict()));
105  fill_lattice_ = nullptr;
106 }
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:368
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:374
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:370
ParamsVectors * params()
Definition: ccutil.h:53
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:324
virtual Dict & getDict()
Definition: classify.h:98
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:390
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:387
PRIORITY pass2_ok_split
Definition: wordrec.h:383
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:382

◆ ~Wordrec()

tesseract::Wordrec::~Wordrec ( )
overridedefault

Member Function Documentation

◆ add_point_to_list()

void tesseract::Wordrec::add_point_to_list ( PointHeap point_heap,
EDGEPT point 
)

Definition at line 73 of file chop.cpp.

73  {
74  if (point_heap->size() < MAX_NUM_POINTS - 2) {
75  PointPair pair(point_priority(point), point);
76  point_heap->Push(&pair);
77  }
78 
79 #ifndef GRAPHICS_DISABLED
80  if (chop_debug > 2) {
81  mark_outline(point);
82  }
83 #endif
84 }
#define MAX_NUM_POINTS
Definition: chop.h:28
void mark_outline(EDGEPT *edgept)
Definition: plotedges.cpp:83
KDPairInc< float, EDGEPT * > PointPair
Definition: chop.h:31
PRIORITY point_priority(EDGEPT *point)
Definition: chop.cpp:64

◆ add_seam_to_queue()

void tesseract::Wordrec::add_seam_to_queue ( float  new_priority,
SEAM new_seam,
SeamQueue seams 
)

Definition at line 64 of file findseam.cpp.

64  {
65  if (new_seam == nullptr) {
66  return;
67  }
68  if (chop_debug) {
69  tprintf("Pushing new seam with priority %g :", new_priority);
70  new_seam->Print("seam: ");
71  }
72  if (seams->size() >= MAX_NUM_SEAMS) {
73  SeamPair old_pair(0, nullptr);
74  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
75  if (chop_debug) {
76  tprintf("Old seam staying with priority %g\n", old_pair.key());
77  }
78  delete new_seam;
79  seams->Push(&old_pair);
80  return;
81  } else if (chop_debug) {
82  tprintf("New seam with priority %g beats old worst seam with %g\n", new_priority,
83  old_pair.key());
84  }
85  }
86  SeamPair new_pair(new_priority, new_seam);
87  seams->Push(&new_pair);
88 }
#define MAX_NUM_SEAMS
Definition: findseam.cpp:47
KDPtrPairInc< float, SEAM > SeamPair
Definition: findseam.h:30
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ angle_change()

int tesseract::Wordrec::angle_change ( EDGEPT point1,
EDGEPT point2,
EDGEPT point3 
)

Definition at line 98 of file chop.cpp.

98  {
99  VECTOR vector1;
100  VECTOR vector2;
101 
102  int angle;
103 
104  /* Compute angle */
105  vector1.x = point2->pos.x - point1->pos.x;
106  vector1.y = point2->pos.y - point1->pos.y;
107  vector2.x = point3->pos.x - point2->pos.x;
108  vector2.y = point3->pos.y - point2->pos.y;
109  /* Use cross product */
110  float length = std::sqrt(static_cast<float>(vector1.length()) * vector2.length());
111  if (static_cast<int>(length) == 0) {
112  return (0);
113  }
114  angle = static_cast<int>(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
115 
116  /* Use dot product */
117  if (vector1.dot(vector2) < 0) {
118  angle = 180 - angle;
119  }
120  /* Adjust angle */
121  if (angle > 180) {
122  angle -= 360;
123  }
124  if (angle <= -180) {
125  angle += 360;
126  }
127  return (angle);
128 }
TPOINT VECTOR
Definition: blobs.h:93
TDimension x
Definition: blobs.h:89

◆ attempt_blob_chop()

SEAM * tesseract::Wordrec::attempt_blob_chop ( TWERD word,
TBLOB blob,
int32_t  blob_number,
bool  italic_blob,
const std::vector< SEAM * > &  seams 
)

Definition at line 207 of file chopper.cpp.

208  {
209  if (repair_unchopped_blobs) {
210  preserve_outline_tree(blob->outlines);
211  }
212  TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
213  // Insert it into the word.
214  word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
215 
216  SEAM *seam = nullptr;
217  if (prioritize_division) {
218  TPOINT location;
219  if (divisible_blob(blob, italic_blob, &location)) {
220  seam = new SEAM(0.0f, location);
221  }
222  }
223  if (seam == nullptr) {
224  seam = pick_good_seam(blob);
225  }
226  if (chop_debug) {
227  if (seam != nullptr) {
228  seam->Print("Good seam picked=");
229  } else {
230  tprintf("\n** no seam picked *** \n");
231  }
232  }
233  if (seam) {
234  seam->ApplySeam(italic_blob, blob, other_blob);
235  }
236 
237  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
238  if (seam == nullptr) {
239  if (repair_unchopped_blobs) {
240  restore_outline_tree(blob->outlines);
241  }
242  if (allow_blob_division && !prioritize_division) {
243  // If the blob can simply be divided into outlines, then do that.
244  TPOINT location;
245  if (divisible_blob(blob, italic_blob, &location)) {
246  other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
247  word->blobs.insert(word->blobs.begin() + blob_number + 1, other_blob);
248  seam = new SEAM(0.0f, location);
249  seam->ApplySeam(italic_blob, blob, other_blob);
250  seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob, seams, seam);
251  }
252  }
253  }
254  if (seam != nullptr) {
255  // Make sure this seam doesn't get chopped again.
256  seam->Finalize();
257  }
258  return seam;
259 }
@ TPOINT
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:923
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:342
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:214

◆ BOOL_VAR_H() [1/11]

tesseract::Wordrec::BOOL_VAR_H ( assume_fixed_pitch_char_segment  )

◆ BOOL_VAR_H() [2/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_enable  )

◆ BOOL_VAR_H() [3/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_new_seam_pile  )

◆ BOOL_VAR_H() [4/11]

tesseract::Wordrec::BOOL_VAR_H ( chop_vertical_creep  )

◆ BOOL_VAR_H() [5/11]

tesseract::Wordrec::BOOL_VAR_H ( force_word_assoc  )

◆ BOOL_VAR_H() [6/11]

tesseract::Wordrec::BOOL_VAR_H ( merge_fragments_in_matrix  )

◆ BOOL_VAR_H() [7/11]

tesseract::Wordrec::BOOL_VAR_H ( save_alt_choices  )

◆ BOOL_VAR_H() [8/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_debug_blamer  )

◆ BOOL_VAR_H() [9/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_enable_assoc  )

◆ BOOL_VAR_H() [10/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_run_blamer  )

◆ BOOL_VAR_H() [11/11]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_skip_no_truth_words  )

◆ call_matcher()

BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher ( TBLOB blob)

Definition at line 132 of file tface.cpp.

132  {
133  // Rotate the blob for classification if necessary.
134  TBLOB *rotated_blob = tessblob->ClassifyNormalizeIfNeeded();
135  if (rotated_blob == nullptr) {
136  rotated_blob = tessblob;
137  }
138  auto *ratings = new BLOB_CHOICE_LIST(); // matcher result
139  AdaptiveClassifier(rotated_blob, ratings);
140  if (rotated_blob != tessblob) {
141  delete rotated_blob;
142  }
143  return ratings;
144 }
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:202

◆ CallFillLattice()

void tesseract::Wordrec::CallFillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)
inline

Definition at line 240 of file wordrec.h.

241  {
242  (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
243  }
UNICHARSET unicharset
Definition: ccutil.h:61

◆ cc_recog()

void tesseract::Wordrec::cc_recog ( WERD_RES word)

Definition at line 119 of file tface.cpp.

119  {
120  getDict().reset_hyphen_vars(word->word->flag(W_EOL));
121  chop_word_main(word);
122  word->DebugWordChoices(getDict().stopper_debug_level >= 1, getDict().word_to_debug.c_str());
123  ASSERT_HOST(word->StatesAllValid());
124 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
@ W_EOL
end of line
Definition: werd.h:35
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:27
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:385

◆ choose_best_seam()

void tesseract::Wordrec::choose_best_seam ( SeamQueue seam_queue,
const SPLIT split,
PRIORITY  priority,
SEAM **  seam_result,
TBLOB blob,
SeamPile seam_pile 
)

Definition at line 103 of file findseam.cpp.

104  {
105  SEAM *seam;
106  char str[80];
107  float my_priority;
108  /* Add seam of split */
109  my_priority = priority;
110  if (split != nullptr) {
111  TPOINT split_point = split->point1->pos;
112  split_point += split->point2->pos;
113  split_point /= 2;
114  seam = new SEAM(my_priority, split_point, *split);
115  if (chop_debug > 1) {
116  seam->Print("Partial priority ");
117  }
118  add_seam_to_queue(my_priority, seam, seam_queue);
119 
120  if (my_priority > chop_good_split) {
121  return;
122  }
123  }
124 
125  TBOX bbox = blob->bounding_box();
126  /* Queue loop */
127  while (!seam_queue->empty()) {
128  SeamPair seam_pair;
129  seam_queue->Pop(&seam_pair);
130  seam = seam_pair.extract_data();
131  /* Set full priority */
132  my_priority =
133  seam->FullPriority(bbox.left(), bbox.right(), chop_overlap_knob, chop_centered_maxwidth,
134  chop_center_knob, chop_width_change_knob);
135  if (chop_debug) {
136  sprintf(str, "Full my_priority %0.0f, ", my_priority);
137  seam->Print(str);
138  }
139 
140  if ((*seam_result == nullptr || (*seam_result)->priority() > my_priority) &&
141  my_priority < chop_ok_split) {
142  /* No crossing */
143  if (seam->IsHealthy(*blob, chop_min_outline_points, chop_min_outline_area)) {
144  delete *seam_result;
145  *seam_result = new SEAM(*seam);
146  (*seam_result)->set_priority(my_priority);
147  } else {
148  delete seam;
149  seam = nullptr;
150  my_priority = BAD_PRIORITY;
151  }
152  }
153 
154  if (my_priority < chop_good_split) {
155  delete seam;
156  return; /* Made good answer */
157  }
158 
159  if (seam) {
160  /* Combine with others */
161  if (seam_pile->size() < chop_seam_pile_size) {
162  combine_seam(*seam_pile, seam, seam_queue);
163  SeamDecPair pair(seam_pair.key(), seam);
164  seam_pile->Push(&pair);
165  } else if (chop_new_seam_pile && seam_pile->size() == chop_seam_pile_size &&
166  seam_pile->PeekTop().key() > seam_pair.key()) {
167  combine_seam(*seam_pile, seam, seam_queue);
168  SeamDecPair pair;
169  seam_pile->Pop(&pair); // pop the worst.
170  // Replace the seam in pair (deleting the old one) with
171  // the new seam and score, then push back into the heap.
172  pair.set_key(seam_pair.key());
173  pair.set_data(seam);
174  seam_pile->Push(&pair);
175  } else {
176  delete seam;
177  }
178  }
179 
180  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY : seam_queue->PeekTop().key();
181  if ((my_priority > chop_ok_split) || (my_priority > chop_good_split && split)) {
182  return;
183  }
184  }
185 }
#define NO_FULL_PRIORITY
Definition: findseam.cpp:49
#define BAD_PRIORITY
Definition: findseam.cpp:51
@ TBOX
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:41
KDPtrPairDec< float, SEAM > SeamDecPair
Definition: findseam.h:33
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:64
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:194

◆ chop_numbered_blob()

SEAM * tesseract::Wordrec::chop_numbered_blob ( TWERD word,
int32_t  blob_number,
bool  italic_blob,
const std::vector< SEAM * > &  seams 
)

Definition at line 261 of file chopper.cpp.

262  {
263  return attempt_blob_chop(word, word->blobs[blob_number], blob_number, italic_blob, seams);
264 }
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:207

◆ chop_one_blob()

SEAM * tesseract::Wordrec::chop_one_blob ( const std::vector< TBOX > &  boxes,
const std::vector< BLOB_CHOICE * > &  blob_choices,
WERD_RES word_res,
unsigned *  blob_number 
)

Definition at line 367 of file chopper.cpp.

369  {
370  if (prioritize_division) {
371  return chop_overlapping_blob(boxes, true, word_res, blob_number);
372  } else {
373  return improve_one_blob(blob_choices, nullptr, false, true, word_res, blob_number);
374  }
375 }
SEAM * improve_one_blob(const std::vector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, unsigned *blob_number)
Definition: chopper.cpp:320
SEAM * chop_overlapping_blob(const std::vector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, unsigned *blob_number)
Definition: chopper.cpp:266

◆ chop_overlapping_blob()

SEAM * tesseract::Wordrec::chop_overlapping_blob ( const std::vector< TBOX > &  boxes,
bool  italic_blob,
WERD_RES word_res,
unsigned *  blob_number 
)

Definition at line 266 of file chopper.cpp.

267  {
268  TWERD *word = word_res->chopped_word;
269  for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
270  TBLOB *blob = word->blobs[*blob_number];
271  TPOINT topleft, botright;
272  topleft.x = blob->bounding_box().left();
273  topleft.y = blob->bounding_box().top();
274  botright.x = blob->bounding_box().right();
275  botright.y = blob->bounding_box().bottom();
276 
277  TPOINT original_topleft, original_botright;
278  word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
279  word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
280 
281  TBOX original_box =
282  TBOX(original_topleft.x, original_botright.y, original_botright.x, original_topleft.y);
283 
284  bool almost_equal_box = false;
285  int num_overlap = 0;
286  for (auto boxe : boxes) {
287  if (original_box.overlap_fraction(boxe) > 0.125) {
288  num_overlap++;
289  }
290  if (original_box.almost_equal(boxe, 3)) {
291  almost_equal_box = true;
292  }
293  }
294 
295  TPOINT location;
296  if (divisible_blob(blob, italic_blob, &location) || (!almost_equal_box && num_overlap > 1)) {
297  SEAM *seam = attempt_blob_chop(word, blob, *blob_number, italic_blob, word_res->seam_array);
298  if (seam != nullptr) {
299  return seam;
300  }
301  }
302  }
303 
304  *blob_number = UINT_MAX;
305  return nullptr;
306 }

◆ chop_word_main()

void tesseract::Wordrec::chop_word_main ( WERD_RES word)

Definition at line 385 of file chopper.cpp.

385  {
386  int num_blobs = word->chopped_word->NumBlobs();
387  if (word->ratings == nullptr) {
388  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
389  }
390  if (word->ratings->get(0, 0) == nullptr) {
391  // Run initial classification.
392  for (int b = 0; b < num_blobs; ++b) {
393  BLOB_CHOICE_LIST *choices = classify_piece(
394  word->seam_array, b, b, "Initial:", word->chopped_word, word->blamer_bundle);
395  word->ratings->put(b, b, choices);
396  }
397  } else {
398  // Blobs have been pre-classified. Set matrix cell for all blob choices
399  for (int col = 0; col < word->ratings->dimension(); ++col) {
400  for (int row = col;
401  row < word->ratings->dimension() && row < col + word->ratings->bandwidth(); ++row) {
402  BLOB_CHOICE_LIST *choices = word->ratings->get(col, row);
403  if (choices != nullptr) {
404  BLOB_CHOICE_IT bc_it(choices);
405  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
406  bc_it.data()->set_matrix_cell(col, row);
407  }
408  }
409  }
410  }
411  }
412 
413  // Run Segmentation Search.
414  BestChoiceBundle best_choice_bundle(word->ratings->dimension());
415  SegSearch(word, &best_choice_bundle, word->blamer_bundle);
416 
417  if (word->best_choice == nullptr) {
418  // SegSearch found no valid paths, so just use the leading diagonal.
419  word->FakeWordFromRatings(TOP_CHOICE_PERM);
420  }
421  word->RebuildBestState();
422  // If we finished without a hyphen at the end of the word, let the next word
423  // be found in the dictionary.
424  if (word->word->flag(W_EOL) && !getDict().has_hyphen_end(*word->best_choice)) {
425  getDict().reset_hyphen_vars(true);
426  }
427 
428  if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
429  CallFillLattice(*word->ratings, word->best_choices, *word->uch_set, word->blamer_bundle);
430  }
431  if (wordrec_debug_level > 0) {
432  tprintf("Final Ratings Matrix:\n");
433  word->ratings->print(getDict().getUnicharset());
434  }
435  word->FilterWordChoices(getDict().stopper_debug_level);
436 }
@ TOP_CHOICE_PERM
Definition: ratngs.h:234
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:240
virtual BLOB_CHOICE_LIST * classify_piece(const std::vector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:49
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:33

◆ classify_blob()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob ( TBLOB blob,
const char *  string,
ScrollView::Color  color,
BlamerBundle blamer_bundle 
)

Definition at line 52 of file wordclass.cpp.

53  {
54 #ifndef GRAPHICS_DISABLED
56  display_blob(blob, color);
57  }
58 #endif
59  // TODO(rays) collapse with call_matcher and move all to wordrec.cpp.
60  BLOB_CHOICE_LIST *choices = call_matcher(blob);
61  // If a blob with the same bounding box as one of the truth character
62  // bounding boxes is not classified as the corresponding truth character
63  // blame character classifier for incorrect answer.
64  if (blamer_bundle != nullptr) {
65  blamer_bundle->BlameClassifier(getDict().getUnicharset(), blob->bounding_box(), *choices,
66  wordrec_debug_blamer);
67  }
68 #ifndef GRAPHICS_DISABLED
69  if (classify_debug_level && string) {
70  print_ratings_list(string, choices, getDict().getUnicharset());
71  }
72 
73  if (wordrec_blob_pause) {
74  blob_window->Wait();
75  }
76 #endif
77 
78  return choices;
79 }
ScrollView * blob_window
Definition: render.cpp:36
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:804
bool wordrec_display_all_blobs
Definition: render.cpp:41
bool wordrec_blob_pause
Definition: render.cpp:43
void display_blob(TBLOB *blob, ScrollView::Color color)
Definition: render.cpp:54
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
Definition: tface.cpp:132

◆ classify_piece()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece ( const std::vector< SEAM * > &  seams,
int16_t  start,
int16_t  end,
const char *  description,
TWERD word,
BlamerBundle blamer_bundle 
)
virtual

Definition at line 49 of file pieces.cpp.

51  {
52  if (end > start) {
53  SEAM::JoinPieces(seams, word->blobs, start, end);
54  }
55  BLOB_CHOICE_LIST *choices =
56  classify_blob(word->blobs[start], description, ScrollView::WHITE, blamer_bundle);
57  // Set the matrix_cell_ entries in all the BLOB_CHOICES.
58  BLOB_CHOICE_IT bc_it(choices);
59  for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
60  bc_it.data()->set_matrix_cell(start, end);
61  }
62 
63  if (end > start) {
64  SEAM::BreakPieces(seams, word->blobs, start, end);
65  }
66 
67  return (choices);
68 }
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:204
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:181
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, ScrollView::Color color, BlamerBundle *blamer_bundle)
Definition: wordclass.cpp:52

◆ combine_seam()

void tesseract::Wordrec::combine_seam ( const SeamPile seam_pile,
const SEAM seam,
SeamQueue seam_queue 
)

Definition at line 194 of file findseam.cpp.

194  {
195  for (int x = 0; x < seam_pile.size(); ++x) {
196  const SEAM *this_one = seam_pile.get(x).data();
197  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
198  SEAM *new_one = new SEAM(*seam);
199  new_one->CombineWith(*this_one);
200  if (chop_debug > 1) {
201  new_one->Print("Combo priority ");
202  }
203  add_seam_to_queue(new_one->priority(), new_one, seam_queue);
204  }
205  }
206 }
#define SPLIT_CLOSENESS
Definition: findseam.cpp:45

◆ dict_word()

int tesseract::Wordrec::dict_word ( const WERD_CHOICE word)

Definition at line 86 of file tface.cpp.

86  {
87  return getDict().valid_word(word);
88 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:801

◆ double_VAR_H() [1/9]

tesseract::Wordrec::double_VAR_H ( chop_center_knob  )

◆ double_VAR_H() [2/9]

tesseract::Wordrec::double_VAR_H ( chop_good_split  )

◆ double_VAR_H() [3/9]

tesseract::Wordrec::double_VAR_H ( chop_ok_split  )

◆ double_VAR_H() [4/9]

tesseract::Wordrec::double_VAR_H ( chop_overlap_knob  )

◆ double_VAR_H() [5/9]

tesseract::Wordrec::double_VAR_H ( chop_sharpness_knob  )

◆ double_VAR_H() [6/9]

tesseract::Wordrec::double_VAR_H ( chop_split_dist_knob  )

◆ double_VAR_H() [7/9]

tesseract::Wordrec::double_VAR_H ( chop_width_change_knob  )

◆ double_VAR_H() [8/9]

tesseract::Wordrec::double_VAR_H ( segsearch_max_char_wh_ratio  )

◆ double_VAR_H() [9/9]

tesseract::Wordrec::double_VAR_H ( tessedit_certainty_threshold  )

◆ end_recog()

int tesseract::Wordrec::end_recog ( )

Definition at line 61 of file tface.cpp.

61  {
63 
64  return (0);
65 }
void program_editdown(int32_t elasped_time)
Definition: tface.cpp:73

◆ FillLattice()

void tesseract::Wordrec::FillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)

◆ grade_sharpness()

PRIORITY tesseract::Wordrec::grade_sharpness ( SPLIT split)

Definition at line 67 of file gradechop.cpp.

67  {
68  PRIORITY grade;
69 
70  grade = point_priority(split->point1) + point_priority(split->point2);
71 
72  if (grade < -360.0) {
73  grade = 0;
74  } else {
75  grade += 360.0;
76  }
77 
78  grade *= chop_sharpness_knob; /* Values 0 to -360 */
79 
80  return (grade);
81 }
float PRIORITY
Definition: seam.h:31

◆ grade_split_length()

PRIORITY tesseract::Wordrec::grade_split_length ( SPLIT split)

Definition at line 45 of file gradechop.cpp.

45  {
46  PRIORITY grade;
47  float split_length;
48 
49  split_length = split->point1->WeightedDistance(*split->point2, chop_x_y_weight);
50 
51  if (split_length <= 0) {
52  grade = 0;
53  } else {
54  grade = std::sqrt(split_length) * chop_split_dist_knob;
55  }
56 
57  return (std::max(0.0f, grade));
58 }

◆ improve_by_chopping()

void tesseract::Wordrec::improve_by_chopping ( float  rating_cert_scale,
WERD_RES word,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle,
LMPainPoints pain_points,
std::vector< SegSearchPending > *  pending 
)

Definition at line 445 of file chopper.cpp.

448  {
449  unsigned blob_number;
450  do { // improvement loop.
451  // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
452  // one to chop.
453  std::vector<BLOB_CHOICE *> blob_choices;
454  int num_blobs = word->ratings->dimension();
455  for (int i = 0; i < num_blobs; ++i) {
456  BLOB_CHOICE_LIST *choices = word->ratings->get(i, i);
457  if (choices == nullptr || choices->empty()) {
458  blob_choices.push_back(nullptr);
459  } else {
460  BLOB_CHOICE_IT bc_it(choices);
461  blob_choices.push_back(bc_it.data());
462  }
463  }
464  SEAM *seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt, false, false, word,
465  &blob_number);
466  if (seam == nullptr) {
467  break;
468  }
469  // A chop has been made. We have to correct all the data structures to
470  // take into account the extra bottom-level blob.
471  // Put the seam into the seam_array and correct everything else on the
472  // word: ratings matrix (including matrix location in the BLOB_CHOICES),
473  // states in WERD_CHOICEs, and blob widths.
474  word->InsertSeam(blob_number, seam);
475  // Insert a new entry in the beam array.
476  best_choice_bundle->beam.insert(best_choice_bundle->beam.begin() + blob_number, new LanguageModelState);
477  // Fixpts are outdated, but will get recalculated.
478  best_choice_bundle->fixpt.clear();
479  // Remap existing pain points.
480  pain_points->RemapForSplit(blob_number);
481  // Insert a new pending at the chop point.
482  pending->insert(pending->begin() + blob_number, SegSearchPending());
483 
484  // Classify the two newly created blobs using ProcessSegSearchPainPoint,
485  // as that updates the pending correctly and adds new pain points.
486  MATRIX_COORD pain_point(blob_number, blob_number);
487  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word, pain_points, blamer_bundle);
488  pain_point.col = blob_number + 1;
489  pain_point.row = blob_number + 1;
490  ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word, pain_points, blamer_bundle);
491  if (language_model_->language_model_ngram_on) {
492  // N-gram evaluation depends on the number of blobs in a chunk, so we
493  // have to re-evaluate everything in the word.
494  ResetNGramSearch(word, best_choice_bundle, *pending);
495  blob_number = 0;
496  }
497  // Run language model incrementally. (Except with the n-gram model on.)
498  UpdateSegSearchNodes(rating_cert_scale, blob_number, pending, word, pain_points,
499  best_choice_bundle, blamer_bundle);
500  } while (!language_model_->AcceptableChoiceFound() && word->ratings->dimension() < kMaxNumChunks);
501 
502  // If after running only the chopper best_choice is incorrect and no blame
503  // has been yet set, blame the classifier if best_choice is classifier's
504  // top choice and is a dictionary word (i.e. language model could not have
505  // helped). Otherwise blame the tradeoff between the classifier and
506  // the old language model (permuters).
507  if (word->blamer_bundle != nullptr &&
508  word->blamer_bundle->incorrect_result_reason() == IRR_CORRECT &&
509  !word->blamer_bundle->ChoiceIsCorrect(word->best_choice)) {
510  bool valid_permuter = word->best_choice != nullptr &&
511  Dict::valid_word_permuter(word->best_choice->permuter(), false);
512  word->blamer_bundle->BlameClassifierOrLangModel(word, getDict().getUnicharset(), valid_permuter,
513  wordrec_debug_blamer);
514  }
515 }
@ IRR_CORRECT
Definition: blamer.h:58
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:437
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:162
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, std::vector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:222
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, std::vector< SegSearchPending > &pending)
Definition: segsearch.cpp:279

◆ improve_one_blob()

SEAM * tesseract::Wordrec::improve_one_blob ( const std::vector< BLOB_CHOICE * > &  blob_choices,
DANGERR fixpt,
bool  split_next_to_fragment,
bool  italic_blob,
WERD_RES word,
unsigned *  blob_number 
)

Definition at line 320 of file chopper.cpp.

322  {
323  float rating_ceiling = FLT_MAX;
324  SEAM *seam = nullptr;
325  do {
326  auto blob = select_blob_to_split_from_fixpt(fixpt);
327  if (chop_debug) {
328  tprintf("blob_number from fixpt = %d\n", blob);
329  }
330  bool split_point_from_dict = (blob != -1);
331  if (split_point_from_dict) {
332  fixpt->clear();
333  } else {
334  blob = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
335  }
336  if (chop_debug) {
337  tprintf("blob_number = %d\n", blob);
338  }
339  *blob_number = blob;
340  if (blob == -1) {
341  return nullptr;
342  }
343 
344  // TODO(rays) it may eventually help to allow italic_blob to be true,
345  seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob, word->seam_array);
346  if (seam != nullptr) {
347  return seam; // Success!
348  }
349  if (blob_choices[*blob_number] == nullptr) {
350  return nullptr;
351  }
352  if (!split_point_from_dict) {
353  // We chopped the worst rated blob, try something else next time.
354  rating_ceiling = blob_choices[*blob_number]->rating();
355  }
356  } while (true);
357  return seam;
358 }
int select_blob_to_split(const std::vector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:523
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const std::vector< SEAM * > &seams)
Definition: chopper.cpp:261
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:605

◆ InitBlamerForSegSearch()

void tesseract::Wordrec::InitBlamerForSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle,
std::string &  blamer_debug 
)
protected

Definition at line 296 of file segsearch.cpp.

297  {
298  pain_points->Clear(); // Clear pain points heap.
299  blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings, getDict().WildcardID(),
300  wordrec_debug_blamer, blamer_debug, pain_points,
301  segsearch_max_char_wh_ratio, word_res);
302 }

◆ InitialSegSearch()

void tesseract::Wordrec::InitialSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
std::vector< SegSearchPending > *  pending,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 120 of file segsearch.cpp.

122  {
123  if (segsearch_debug_level > 0) {
124  tprintf("Starting SegSearch on ratings matrix%s:\n",
125  wordrec_enable_assoc ? " (with assoc)" : "");
126  word_res->ratings->print(getDict().getUnicharset());
127  }
128 
129  pain_points->GenerateInitial(word_res);
130 
131  // Compute scaling factor that will help us recover blob outline length
132  // from classifier rating and certainty for the blob.
133  float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
134 
135  language_model_->InitForWord(prev_word_best_choice_, assume_fixed_pitch_char_segment,
136  segsearch_max_char_wh_ratio, rating_cert_scale);
137 
138  // Initialize blamer-related information: map character boxes recorded in
139  // blamer_bundle->norm_truth_word to the corresponding i,j indices in the
140  // ratings matrix. We expect this step to succeed, since when running the
141  // chopper we checked that the correct chops are present.
142  if (blamer_bundle != nullptr) {
143  blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word, wordrec_debug_blamer);
144  }
145 
146  // pending[col] tells whether there is update work to do to combine
147  // best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
148  // As the language model state is updated, pending entries are modified to
149  // minimize duplication of work. It is important that during the update the
150  // children are considered in the non-decreasing order of their column, since
151  // this guarantees that all the parents would be up to date before an update
152  // of a child is done.
153  pending->clear();
154  pending->resize(word_res->ratings->dimension(), SegSearchPending());
155 
156  // Search the ratings matrix for the initial best path.
157  (*pending)[0].SetColumnClassified();
158  UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res, pain_points, best_choice_bundle,
159  blamer_bundle);
160 }

◆ INT_VAR_H() [1/15]

tesseract::Wordrec::INT_VAR_H ( chop_centered_maxwidth  )

◆ INT_VAR_H() [2/15]

tesseract::Wordrec::INT_VAR_H ( chop_debug  )

◆ INT_VAR_H() [3/15]

tesseract::Wordrec::INT_VAR_H ( chop_inside_angle  )

◆ INT_VAR_H() [4/15]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_area  )

◆ INT_VAR_H() [5/15]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_points  )

◆ INT_VAR_H() [6/15]

tesseract::Wordrec::INT_VAR_H ( chop_same_distance  )

◆ INT_VAR_H() [7/15]

tesseract::Wordrec::INT_VAR_H ( chop_seam_pile_size  )

◆ INT_VAR_H() [8/15]

tesseract::Wordrec::INT_VAR_H ( chop_split_length  )

◆ INT_VAR_H() [9/15]

tesseract::Wordrec::INT_VAR_H ( chop_x_y_weight  )

◆ INT_VAR_H() [10/15]

tesseract::Wordrec::INT_VAR_H ( repair_unchopped_blobs  )

◆ INT_VAR_H() [11/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_debug_level  )

◆ INT_VAR_H() [12/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_futile_classifications  )

◆ INT_VAR_H() [13/15]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_pain_points  )

◆ INT_VAR_H() [14/15]

tesseract::Wordrec::INT_VAR_H ( wordrec_debug_level  )

◆ INT_VAR_H() [15/15]

tesseract::Wordrec::INT_VAR_H ( wordrec_max_join_chunks  )

◆ is_inside_angle()

bool tesseract::Wordrec::is_inside_angle ( EDGEPT pt)

Definition at line 88 of file chop.cpp.

88  {
89  return angle_change(pt->prev, pt, pt->next) < chop_inside_angle;
90 }
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
Definition: chop.cpp:98

◆ near_point()

bool tesseract::Wordrec::near_point ( EDGEPT point,
EDGEPT line_pt_0,
EDGEPT line_pt_1,
EDGEPT **  near_pt 
)

Definition at line 36 of file outlines.cpp.

36  {
37  TPOINT p;
38 
39  float slope;
40  float intercept;
41 
42  float x0 = line_pt_0->pos.x;
43  float x1 = line_pt_1->pos.x;
44  float y0 = line_pt_0->pos.y;
45  float y1 = line_pt_1->pos.y;
46 
47  if (x0 == x1) {
48  /* Handle vertical line */
49  p.x = static_cast<int16_t>(x0);
50  p.y = point->pos.y;
51  } else {
52  /* Slope and intercept */
53  slope = (y0 - y1) / (x0 - x1);
54  intercept = y1 - x1 * slope;
55 
56  /* Find perpendicular */
57  p.x = static_cast<int16_t>((point->pos.x + (point->pos.y - intercept) * slope) /
58  (slope * slope + 1));
59  p.y = static_cast<int16_t>(slope * p.x + intercept);
60  }
61 
62  if (is_on_line(p, line_pt_0->pos, line_pt_1->pos) && (!same_point(p, line_pt_0->pos)) &&
63  (!same_point(p, line_pt_1->pos))) {
64  /* Intersection on line */
65  *near_pt = make_edgept(p.x, p.y, line_pt_1, line_pt_0);
66  return true;
67  } else { /* Intersection not on line */
68  *near_pt = closest(point, line_pt_0, line_pt_1);
69  return false;
70  }
71 }
#define same_point(p1, p2)
Definition: outlines.h:44
#define is_on_line(p, p0, p1)
Definition: outlines.h:103
#define closest(test_p, p1, p2)
Definition: outlines.h:63
EDGEPT * make_edgept(TDimension x, TDimension y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:138

◆ new_max_point()

void tesseract::Wordrec::new_max_point ( EDGEPT local_max,
PointHeap points 
)

Definition at line 249 of file chop.cpp.

249  {
250  int16_t dir;
251 
252  dir = direction(local_max);
253 
254  if (dir > 0) {
255  add_point_to_list(points, local_max);
256  return;
257  }
258 
259  if (dir == 0 && point_priority(local_max) < 0) {
260  add_point_to_list(points, local_max);
261  return;
262  }
263 }
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
Definition: chop.cpp:73

◆ new_min_point()

void tesseract::Wordrec::new_min_point ( EDGEPT local_min,
PointHeap points 
)

Definition at line 226 of file chop.cpp.

226  {
227  int16_t dir;
228 
229  dir = direction(local_min);
230 
231  if (dir < 0) {
232  add_point_to_list(points, local_min);
233  return;
234  }
235 
236  if (dir == 0 && point_priority(local_min) < 0) {
237  add_point_to_list(points, local_min);
238  return;
239  }
240 }

◆ pick_close_point()

EDGEPT * tesseract::Wordrec::pick_close_point ( EDGEPT critical_point,
EDGEPT vertical_point,
int *  best_dist 
)

Definition at line 136 of file chop.cpp.

136  {
137  EDGEPT *best_point = nullptr;
138  int this_distance;
139  bool found_better;
140 
141  do {
142  found_better = false;
143 
144  this_distance = edgept_dist(critical_point, vertical_point);
145  if (this_distance <= *best_dist) {
146  if (!(same_point(critical_point->pos, vertical_point->pos) ||
147  same_point(critical_point->pos, vertical_point->next->pos) ||
148  (best_point && same_point(best_point->pos, vertical_point->pos)) ||
149  is_exterior_point(critical_point, vertical_point))) {
150  *best_dist = this_distance;
151  best_point = vertical_point;
152  if (chop_vertical_creep) {
153  found_better = true;
154  }
155  }
156  }
157  vertical_point = vertical_point->next;
158  } while (found_better == true);
159 
160  return (best_point);
161 }
#define edgept_dist(p1, p2)
Definition: outlines.h:74
#define is_exterior_point(edge, point)
Definition: outlines.h:83

◆ pick_good_seam()

SEAM * tesseract::Wordrec::pick_good_seam ( TBLOB blob)

Definition at line 214 of file findseam.cpp.

214  {
215  SeamPile seam_pile(chop_seam_pile_size);
216  EDGEPT *points[MAX_NUM_POINTS];
217  EDGEPT_CLIST new_points;
218  SEAM *seam = nullptr;
219  TESSLINE *outline;
220  int16_t num_points = 0;
221 
222 #ifndef GRAPHICS_DISABLED
223  if (chop_debug > 2) {
224  wordrec_display_splits.set_value(true);
225  }
226 
227  draw_blob_edges(blob);
228 #endif
229 
230  PointHeap point_heap(MAX_NUM_POINTS);
231  for (outline = blob->outlines; outline; outline = outline->next) {
232  prioritize_points(outline, &point_heap);
233  }
234 
235  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
236  points[num_points++] = point_heap.PeekTop().data();
237  point_heap.Pop(nullptr);
238  }
239 
240  /* Initialize queue */
241  SeamQueue seam_queue(MAX_NUM_SEAMS);
242 
243  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
244  try_vertical_splits(points, num_points, &new_points, &seam_queue, &seam_pile, &seam, blob);
245 
246  if (seam == nullptr) {
247  choose_best_seam(&seam_queue, nullptr, BAD_PRIORITY, &seam, blob, &seam_pile);
248  } else if (seam->priority() > chop_good_split) {
249  choose_best_seam(&seam_queue, nullptr, seam->priority(), &seam, blob, &seam_pile);
250  }
251 
252  EDGEPT_C_IT it(&new_points);
253  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
254  EDGEPT *inserted_point = it.data();
255  if (seam == nullptr || !seam->UsesPoint(inserted_point)) {
256  for (outline = blob->outlines; outline; outline = outline->next) {
257  if (outline->loop == inserted_point) {
258  outline->loop = outline->loop->next;
259  }
260  }
261  remove_edgept(inserted_point);
262  }
263  }
264 
265  if (seam) {
266  if (seam->priority() > chop_ok_split) {
267  delete seam;
268  seam = nullptr;
269  }
270 #ifndef GRAPHICS_DISABLED
271  else if (wordrec_display_splits) {
272  seam->Mark(edge_window);
273  if (chop_debug > 2) {
274  edge_window->Update();
275  edge_window->Wait();
276  }
277  }
278 #endif
279  }
280 
281  if (chop_debug) {
282  wordrec_display_splits.set_value(false);
283  }
284 
285  return (seam);
286 }
GenericHeap< SeamPair > SeamQueue
Definition: findseam.h:31
void remove_edgept(EDGEPT *point)
Definition: split.cpp:199
GenericHeap< PointPair > PointHeap
Definition: chop.h:32
ScrollView * edge_window
Definition: plotedges.cpp:37
bool wordrec_display_splits
Definition: split.cpp:41
GenericHeap< SeamDecPair > SeamPile
Definition: findseam.h:34
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:67
static void Update()
Definition: scrollview.cpp:713
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:170
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:295
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:327
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:103

◆ point_priority()

PRIORITY tesseract::Wordrec::point_priority ( EDGEPT point)

Definition at line 64 of file chop.cpp.

64  {
65  return static_cast<PRIORITY>(angle_change(point->prev, point, point->next));
66 }

◆ prioritize_points()

void tesseract::Wordrec::prioritize_points ( TESSLINE outline,
PointHeap points 
)

Definition at line 170 of file chop.cpp.

170  {
171  EDGEPT *this_point;
172  EDGEPT *local_min = nullptr;
173  EDGEPT *local_max = nullptr;
174 
175  this_point = outline->loop;
176  local_min = this_point;
177  local_max = this_point;
178  do {
179  if (this_point->vec.y < 0) {
180  /* Look for minima */
181  if (local_max != nullptr) {
182  new_max_point(local_max, points);
183  } else if (is_inside_angle(this_point)) {
184  add_point_to_list(points, this_point);
185  }
186  local_max = nullptr;
187  local_min = this_point->next;
188  } else if (this_point->vec.y > 0) {
189  /* Look for maxima */
190  if (local_min != nullptr) {
191  new_min_point(local_min, points);
192  } else if (is_inside_angle(this_point)) {
193  add_point_to_list(points, this_point);
194  }
195  local_min = nullptr;
196  local_max = this_point->next;
197  } else {
198  /* Flat area */
199  if (local_max != nullptr) {
200  if (local_max->prev->vec.y != 0) {
201  new_max_point(local_max, points);
202  }
203  local_max = this_point->next;
204  local_min = nullptr;
205  } else {
206  if (local_min->prev->vec.y != 0) {
207  new_min_point(local_min, points);
208  }
209  local_min = this_point->next;
210  local_max = nullptr;
211  }
212  }
213 
214  /* Next point */
215  this_point = this_point->next;
216  } while (this_point != outline->loop);
217 }
bool is_inside_angle(EDGEPT *pt)
Definition: chop.cpp:88
void new_min_point(EDGEPT *local_min, PointHeap *points)
Definition: chop.cpp:226
void new_max_point(EDGEPT *local_max, PointHeap *points)
Definition: chop.cpp:249

◆ ProcessSegSearchPainPoint()

void tesseract::Wordrec::ProcessSegSearchPainPoint ( float  pain_point_priority,
const MATRIX_COORD pain_point,
const char *  pain_point_type,
std::vector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle 
)
protected

Definition at line 222 of file segsearch.cpp.

226  {
227  if (segsearch_debug_level > 0) {
228  tprintf("Classifying pain point %s priority=%.4f, col=%d, row=%d\n", pain_point_type,
229  pain_point_priority, pain_point.col, pain_point.row);
230  }
231  ASSERT_HOST(pain_points != nullptr);
232  MATRIX *ratings = word_res->ratings;
233  // Classify blob [pain_point.col pain_point.row]
234  if (!pain_point.Valid(*ratings)) {
235  ratings->IncreaseBandSize(pain_point.row + 1 - pain_point.col);
236  }
237  ASSERT_HOST(pain_point.Valid(*ratings));
238  BLOB_CHOICE_LIST *classified =
239  classify_piece(word_res->seam_array, pain_point.col, pain_point.row, pain_point_type,
240  word_res->chopped_word, blamer_bundle);
241  BLOB_CHOICE_LIST *lst = ratings->get(pain_point.col, pain_point.row);
242  if (lst == nullptr) {
243  ratings->put(pain_point.col, pain_point.row, classified);
244  } else {
245  // We can not delete old BLOB_CHOICEs, since they might contain
246  // ViterbiStateEntries that are parents of other "active" entries.
247  // Thus if the matrix cell already contains classifications we add
248  // the new ones to the beginning of the list.
249  BLOB_CHOICE_IT it(lst);
250  it.add_list_before(classified);
251  delete classified; // safe to delete, since empty after add_list_before()
252  classified = nullptr;
253  }
254 
255  if (segsearch_debug_level > 0) {
256  print_ratings_list("Updated ratings matrix with a new entry:",
257  ratings->get(pain_point.col, pain_point.row), getDict().getUnicharset());
258  ratings->print(getDict().getUnicharset());
259  }
260 
261  // Insert initial "pain points" to join the newly classified blob
262  // with its left and right neighbors.
263  if (classified != nullptr && !classified->empty()) {
264  if (pain_point.col > 0) {
265  pain_points->GeneratePainPoint(pain_point.col - 1, pain_point.row, LM_PPTYPE_SHAPE, 0.0, true,
266  segsearch_max_char_wh_ratio, word_res);
267  }
268  if (pain_point.row + 1 < ratings->dimension()) {
269  pain_points->GeneratePainPoint(pain_point.col, pain_point.row + 1, LM_PPTYPE_SHAPE, 0.0, true,
270  segsearch_max_char_wh_ratio, word_res);
271  }
272  }
273  (*pending)[pain_point.col].SetBlobClassified(pain_point.row);
274 }

◆ program_editdown()

void tesseract::Wordrec::program_editdown ( int32_t  elasped_time)

Definition at line 73 of file tface.cpp.

73  {
74 #ifndef DISABLED_LEGACY_ENGINE
76 #endif // ndef DISABLED_LEGACY_ENGINE
77  getDict().End();
78 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:464
void End()
Definition: dict.cpp:379

◆ program_editup()

void tesseract::Wordrec::program_editup ( const std::string &  textbase,
TessdataManager init_classifier,
TessdataManager init_dict 
)

Definition at line 39 of file tface.cpp.

40  {
41  if (!textbase.empty()) {
42  imagefile = textbase;
43  }
44 #ifndef DISABLED_LEGACY_ENGINE
46  InitAdaptiveClassifier(init_classifier);
47  if (init_dict) {
49  getDict().Load(lang, init_dict);
50  getDict().FinishLoad();
51  }
52  pass2_ok_split = chop_ok_split;
53 #endif // ndef DISABLED_LEGACY_ENGINE
54 }
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:87
std::string imagefile
Definition: ccutil.h:65
std::string lang
Definition: ccutil.h:59
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:447
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:172
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:180
bool FinishLoad()
Definition: dict.cpp:357
void Load(const std::string &lang, TessdataManager *data_file)
Definition: dict.cpp:200

◆ ResetNGramSearch()

void tesseract::Wordrec::ResetNGramSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
std::vector< SegSearchPending > &  pending 
)
protected

Definition at line 279 of file segsearch.cpp.

280  {
281  // TODO(rays) More refactoring required here.
282  // Delete existing viterbi states.
283  for (auto &col : best_choice_bundle->beam) {
284  col->Clear();
285  }
286  // Reset best_choice_bundle.
287  word_res->ClearWordChoices();
288  best_choice_bundle->best_vse = nullptr;
289  // Clear out all existing pendings and add a new one for the first column.
290  pending[0].SetColumnClassified();
291  for (auto &data : pending) {
292  data.Clear();
293  }
294 }

◆ SaveAltChoices()

void tesseract::Wordrec::SaveAltChoices ( const LIST best_choices,
WERD_RES word 
)

◆ SegSearch()

void tesseract::Wordrec::SegSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

Definition at line 33 of file segsearch.cpp.

34  {
35  LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
36  assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level);
37  // Compute scaling factor that will help us recover blob outline length
38  // from classifier rating and certainty for the blob.
39  float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
40  std::vector<SegSearchPending> pending;
41  InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle, blamer_bundle);
42 
43  if (!SegSearchDone(0)) { // find a better choice
44  if (chop_enable && word_res->chopped_word != nullptr) {
45  improve_by_chopping(rating_cert_scale, word_res, best_choice_bundle, blamer_bundle,
46  &pain_points, &pending);
47  }
48  if (chop_debug) {
49  SEAM::PrintSeams("Final seam list:", word_res->seam_array);
50  }
51 
52  if (blamer_bundle != nullptr && !blamer_bundle->ChoiceIsCorrect(word_res->best_choice)) {
53  blamer_bundle->SetChopperBlame(word_res, wordrec_debug_blamer);
54  }
55  }
56  // Keep trying to find a better path by fixing the "pain points".
57 
58  MATRIX_COORD pain_point;
59  float pain_point_priority;
60  int num_futile_classifications = 0;
61  std::string blamer_debug;
62  while (wordrec_enable_assoc &&
63  (!SegSearchDone(num_futile_classifications) ||
64  (blamer_bundle != nullptr && blamer_bundle->GuidedSegsearchStillGoing()))) {
65  // Get the next valid "pain point".
66  bool found_nothing = true;
67  LMPainPointsType pp_type;
68  while ((pp_type = pain_points.Deque(&pain_point, &pain_point_priority)) != LM_PPTYPE_NUM) {
69  if (!pain_point.Valid(*word_res->ratings)) {
70  word_res->ratings->IncreaseBandSize(pain_point.row - pain_point.col + 1);
71  }
72  if (pain_point.Valid(*word_res->ratings) &&
73  !word_res->ratings->Classified(pain_point.col, pain_point.row, getDict().WildcardID())) {
74  found_nothing = false;
75  break;
76  }
77  }
78  if (found_nothing) {
79  if (segsearch_debug_level > 0) {
80  tprintf("Pain points queue is empty\n");
81  }
82  break;
83  }
84  ProcessSegSearchPainPoint(pain_point_priority, pain_point,
85  LMPainPoints::PainPointDescription(pp_type), &pending, word_res,
86  &pain_points, blamer_bundle);
87 
88  UpdateSegSearchNodes(rating_cert_scale, pain_point.col, &pending, word_res, &pain_points,
89  best_choice_bundle, blamer_bundle);
90  if (!best_choice_bundle->updated) {
91  ++num_futile_classifications;
92  }
93 
94  if (segsearch_debug_level > 0) {
95  tprintf("num_futile_classifications %d\n", num_futile_classifications);
96  }
97 
98  best_choice_bundle->updated = false; // reset updated
99 
100  // See if it's time to terminate SegSearch or time for starting a guided
101  // search for the true path to find the blame for the incorrect best_choice.
102  if (SegSearchDone(num_futile_classifications) && blamer_bundle != nullptr &&
103  blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
104  InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, blamer_debug);
105  }
106  } // end while loop exploring alternative paths
107  if (blamer_bundle != nullptr) {
108  blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, blamer_debug);
109  }
110 
111  if (segsearch_debug_level > 0) {
112  tprintf("Done with SegSearch (AcceptableChoiceFound: %d)\n",
113  language_model_->AcceptableChoiceFound());
114  }
115 }
static void PrintSeams(const char *label, const std::vector< SEAM * > &seams)
Definition: seam.cpp:158
static const char * PainPointDescription(LMPainPointsType type)
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending)
Definition: chopper.cpp:445
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, std::string &blamer_debug)
Definition: segsearch.cpp:296
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, std::vector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:120
bool SegSearchDone(int num_futile_classifications)
Definition: wordrec.h:394

◆ SegSearchDone()

bool tesseract::Wordrec::SegSearchDone ( int  num_futile_classifications)
inlineprotected

Definition at line 394 of file wordrec.h.

394  {
395  return (language_model_->AcceptableChoiceFound() ||
396  num_futile_classifications >= segsearch_max_futile_classifications);
397  }

◆ select_blob_to_split()

int tesseract::Wordrec::select_blob_to_split ( const std::vector< BLOB_CHOICE * > &  blob_choices,
float  rating_ceiling,
bool  split_next_to_fragment 
)

Definition at line 523 of file chopper.cpp.

524  {
525  BLOB_CHOICE *blob_choice;
526  float worst = -FLT_MAX;
527  int worst_index = -1;
528  float worst_near_fragment = -FLT_MAX;
529  int worst_index_near_fragment = -1;
530  std::vector<const CHAR_FRAGMENT *> fragments;
531 
532  if (chop_debug) {
533  if (rating_ceiling < FLT_MAX) {
534  tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
535  } else {
536  tprintf("rating_ceiling = No Limit\n");
537  }
538  }
539 
540  if (split_next_to_fragment && blob_choices.size() > 0) {
541  fragments.resize(blob_choices.size());
542  if (blob_choices[0] != nullptr) {
543  fragments[0] = getDict().getUnicharset().get_fragment(blob_choices[0]->unichar_id());
544  } else {
545  fragments[0] = nullptr;
546  }
547  }
548 
549  for (unsigned x = 0; x < blob_choices.size(); ++x) {
550  if (blob_choices[x] == nullptr) {
551  return x;
552  } else {
553  blob_choice = blob_choices[x];
554  // Populate fragments for the following position.
555  if (split_next_to_fragment && x + 1 < blob_choices.size()) {
556  if (blob_choices[x + 1] != nullptr) {
557  fragments[x + 1] =
558  getDict().getUnicharset().get_fragment(blob_choices[x + 1]->unichar_id());
559  } else {
560  fragments[x + 1] = nullptr;
561  }
562  }
563  if (blob_choice->rating() < rating_ceiling &&
564  blob_choice->certainty() < tessedit_certainty_threshold) {
565  // Update worst and worst_index.
566  if (blob_choice->rating() > worst) {
567  worst_index = x;
568  worst = blob_choice->rating();
569  }
570  if (split_next_to_fragment) {
571  // Update worst_near_fragment and worst_index_near_fragment.
572  bool expand_following_fragment =
573  (x + 1 < blob_choices.size() && fragments[x + 1] != nullptr &&
574  !fragments[x + 1]->is_beginning());
575  bool expand_preceding_fragment =
576  (x > 0 && fragments[x - 1] != nullptr && !fragments[x - 1]->is_ending());
577  if ((expand_following_fragment || expand_preceding_fragment) &&
578  blob_choice->rating() > worst_near_fragment) {
579  worst_index_near_fragment = x;
580  worst_near_fragment = blob_choice->rating();
581  if (chop_debug) {
582  tprintf(
583  "worst_index_near_fragment=%d"
584  " expand_following_fragment=%d"
585  " expand_preceding_fragment=%d\n",
586  worst_index_near_fragment, expand_following_fragment, expand_preceding_fragment);
587  }
588  }
589  }
590  }
591  }
592  }
593  // TODO(daria): maybe a threshold of badness for
594  // worst_near_fragment would be useful.
595  return worst_index_near_fragment != -1 ? worst_index_near_fragment : worst_index;
596 }
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:769
const UNICHARSET & getUnicharset() const
Definition: dict.h:104

◆ select_blob_to_split_from_fixpt()

int tesseract::Wordrec::select_blob_to_split_from_fixpt ( DANGERR fixpt)

Definition at line 605 of file chopper.cpp.

605  {
606  if (!fixpt) {
607  return -1;
608  }
609  for (auto &i : *fixpt) {
610  if (i.begin + 1 == i.end && i.dangerous && i.correct_is_ngram) {
611  return i.begin;
612  }
613  }
614  return -1;
615 }

◆ set_pass1()

void tesseract::Wordrec::set_pass1 ( )

Definition at line 97 of file tface.cpp.

97  {
98  chop_ok_split.set_value(70.0);
99  language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1);
100  SettupPass1();
101 }

◆ set_pass2()

void tesseract::Wordrec::set_pass2 ( )

Definition at line 108 of file tface.cpp.

108  {
109  chop_ok_split.set_value(pass2_ok_split);
110  language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2);
111  SettupPass2();
112 }

◆ try_point_pairs()

void tesseract::Wordrec::try_point_pairs ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 295 of file findseam.cpp.

297  {
298  int16_t x;
299  int16_t y;
300  PRIORITY priority;
301 
302  for (x = 0; x < num_points; x++) {
303  for (y = x + 1; y < num_points; y++) {
304  if (points[y] &&
305  points[x]->WeightedDistance(*points[y], chop_x_y_weight) < chop_split_length &&
306  points[x] != points[y]->next && points[y] != points[x]->next &&
307  !is_exterior_point(points[x], points[y]) && !is_exterior_point(points[y], points[x])) {
308  SPLIT split(points[x], points[y]);
309  priority = partial_split_priority(&split);
310 
311  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
312  }
313  }
314  }
315 }
#define partial_split_priority(split)
Definition: findseam.cpp:40

◆ try_vertical_splits()

void tesseract::Wordrec::try_vertical_splits ( EDGEPT points[MAX_NUM_POINTS],
int16_t  num_points,
EDGEPT_CLIST *  new_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

Definition at line 327 of file findseam.cpp.

329  {
330  EDGEPT *vertical_point = nullptr;
331  int16_t x;
332  PRIORITY priority;
333  TESSLINE *outline;
334 
335  for (x = 0; x < num_points; x++) {
336  vertical_point = nullptr;
337  for (outline = blob->outlines; outline; outline = outline->next) {
338  vertical_projection_point(points[x], outline->loop, &vertical_point, new_points);
339  }
340 
341  if (vertical_point && points[x] != vertical_point->next && vertical_point != points[x]->next &&
342  points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) < chop_split_length) {
343  SPLIT split(points[x], vertical_point);
344  priority = partial_split_priority(&split);
345  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
346  }
347  }
348 }
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:277

◆ UpdateSegSearchNodes()

void tesseract::Wordrec::UpdateSegSearchNodes ( float  rating_cert_scale,
int  starting_col,
std::vector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)
protected

Definition at line 162 of file segsearch.cpp.

165  {
166  MATRIX *ratings = word_res->ratings;
167  ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == pending->size());
168  ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == best_choice_bundle->beam.size());
169  for (int col = starting_col; col < ratings->dimension(); ++col) {
170  if (!(*pending)[col].WorkToDo()) {
171  continue;
172  }
173  int first_row = col;
174  int last_row = std::min(ratings->dimension() - 1, col + ratings->bandwidth() - 1);
175  if ((*pending)[col].SingleRow() >= 0) {
176  first_row = last_row = (*pending)[col].SingleRow();
177  }
178  if (segsearch_debug_level > 0) {
179  tprintf("\n\nUpdateSegSearchNodes: col=%d, rows=[%d,%d], alljust=%d\n", col, first_row,
180  last_row, (*pending)[col].IsRowJustClassified(INT32_MAX));
181  }
182  // Iterate over the pending list for this column.
183  for (int row = first_row; row <= last_row; ++row) {
184  // Update language model state of this child+parent pair.
185  BLOB_CHOICE_LIST *current_node = ratings->get(col, row);
186  LanguageModelState *parent_node = col == 0 ? nullptr : best_choice_bundle->beam[col - 1];
187  if (current_node != nullptr &&
188  language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row,
189  current_node, parent_node, pain_points, word_res,
190  best_choice_bundle, blamer_bundle) &&
191  row + 1 < ratings->dimension()) {
192  // Since the language model state of this entry changed, process all
193  // the child column.
194  (*pending)[row + 1].RevisitWholeColumn();
195  if (segsearch_debug_level > 0) {
196  tprintf("Added child col=%d to pending\n", row + 1);
197  }
198  } // end if UpdateState.
199  } // end for row.
200  } // end for col.
201  if (best_choice_bundle->best_vse != nullptr) {
202  ASSERT_HOST(word_res->StatesAllValid());
203  if (best_choice_bundle->best_vse->updated) {
204  pain_points->GenerateFromPath(rating_cert_scale, best_choice_bundle->best_vse, word_res);
205  if (!best_choice_bundle->fixpt.empty()) {
206  pain_points->GenerateFromAmbigs(best_choice_bundle->fixpt, best_choice_bundle->best_vse,
207  word_res);
208  }
209  }
210  }
211  // The segsearch is completed. Reset all updated flags on all VSEs and reset
212  // all pendings.
213  for (unsigned col = 0; col < pending->size(); ++col) {
214  (*pending)[col].Clear();
215  ViterbiStateEntry_IT vse_it(&best_choice_bundle->beam[col]->viterbi_state_entries);
216  for (vse_it.mark_cycle_pt(); !vse_it.cycled_list(); vse_it.forward()) {
217  vse_it.data()->updated = false;
218  }
219  }
220 }

◆ vertical_projection_point()

void tesseract::Wordrec::vertical_projection_point ( EDGEPT split_point,
EDGEPT target_point,
EDGEPT **  best_point,
EDGEPT_CLIST *  new_points 
)

Definition at line 277 of file chop.cpp.

278  {
279  EDGEPT *p; /* Iterator */
280  EDGEPT *this_edgept; /* Iterator */
281  EDGEPT_C_IT new_point_it(new_points);
282  int x = split_point->pos.x; /* X value of vertical */
283  int best_dist = LARGE_DISTANCE; /* Best point found */
284 
285  if (*best_point != nullptr) {
286  best_dist = edgept_dist(split_point, *best_point);
287  }
288 
289  p = target_point;
290  /* Look at each edge point */
291  do {
292  if (((p->pos.x <= x && x <= p->next->pos.x) || (p->next->pos.x <= x && x <= p->pos.x)) &&
293  !same_point(split_point->pos, p->pos) && !same_point(split_point->pos, p->next->pos) &&
294  !p->IsChopPt() && (*best_point == nullptr || !same_point((*best_point)->pos, p->pos))) {
295  if (near_point(split_point, p, p->next, &this_edgept)) {
296  new_point_it.add_before_then_move(this_edgept);
297  }
298 
299  if (*best_point == nullptr) {
300  best_dist = edgept_dist(split_point, this_edgept);
301  }
302 
303  this_edgept = pick_close_point(split_point, this_edgept, &best_dist);
304  if (this_edgept) {
305  *best_point = this_edgept;
306  }
307  }
308 
309  p = p->next;
310  } while (p != target_point);
311 }
#define LARGE_DISTANCE
Definition: outlines.h:31
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
Definition: outlines.cpp:36
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
Definition: chop.cpp:136

Member Data Documentation

◆ fill_lattice_

void(Wordrec::* tesseract::Wordrec::fill_lattice_) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

Definition at line 390 of file wordrec.h.

◆ language_model_

std::unique_ptr<LanguageModel> tesseract::Wordrec::language_model_

Definition at line 382 of file wordrec.h.

◆ pass2_ok_split

PRIORITY tesseract::Wordrec::pass2_ok_split

Definition at line 383 of file wordrec.h.

◆ prev_word_best_choice_

WERD_CHOICE* tesseract::Wordrec::prev_word_best_choice_

Definition at line 387 of file wordrec.h.


The documentation for this class was generated from the following files: