tesseract  5.0.0
lm_state.cpp
Go to the documentation of this file.
1 // File: lm_state.cpp
3 // Description: Structures and functionality for capturing the state of
4 // segmentation search guided by the language model.
5 // Author: Rika Antonova
6 //
7 // (C) Copyright 2012, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "lm_state.h"
21 
22 namespace tesseract {
23 
24 void ViterbiStateEntry::Print(const char *msg) const {
25  tprintf("%s ViterbiStateEntry", msg);
26  if (updated) {
27  tprintf("(NEW)");
28  }
29  if (this->debug_str != nullptr) {
30  tprintf(" str=%s", this->debug_str->c_str());
31  }
32  tprintf(" with ratings_sum=%.4f length=%d cost=%.6f", this->ratings_sum, this->length,
33  this->cost);
34  if (this->top_choice_flags) {
35  tprintf(" top_choice_flags=0x%x", this->top_choice_flags);
36  }
37  if (!this->Consistent()) {
38  tprintf(" inconsistent=(punc %d case %d chartype %d script %d font %d)",
40  this->consistency_info.NumInconsistentCase(),
41  this->consistency_info.NumInconsistentChartype(),
42  this->consistency_info.inconsistent_script, this->consistency_info.inconsistent_font);
43  }
44  if (this->dawg_info) {
45  tprintf(" permuter=%d", this->dawg_info->permuter);
46  }
47  if (this->ngram_info) {
48  tprintf(" ngram_cl_cost=%g context=%s ngram pruned=%d",
49  this->ngram_info->ngram_and_classifier_cost, this->ngram_info->context.c_str(),
50  this->ngram_info->pruned);
51  }
52  if (this->associate_stats.shape_cost > 0.0f) {
53  tprintf(" shape_cost=%g", this->associate_stats.shape_cost);
54  }
55  tprintf(" %s", XHeightConsistencyEnumName[this->consistency_info.xht_decision]);
56 
57  tprintf("\n");
58 }
59 
62  viterbi_state_entries.clear();
66 }
67 
68 void LanguageModelState::Print(const char *msg) {
69  tprintf("%s VSEs (max_cost=%g prn_len=%d tot_len=%d):\n", msg,
72  ViterbiStateEntry_IT vit(&viterbi_state_entries);
73  for (vit.mark_cycle_pt(); !vit.cycled_list(); vit.forward()) {
74  vit.data()->Print("");
75  }
76 }
77 
78 } // namespace tesseract
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
XHeightConsistencyEnum xht_decision
float ngram_and_classifier_cost
-[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
Definition: lm_state.h:87
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:170
AssociateStats associate_stats
character widths/gaps/seams
Definition: lm_state.h:192
int length
number of characters on the path
Definition: lm_state.h:189
void Print(const char *msg) const
Definition: lm_state.cpp:24
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:174
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:196
float ratings_sum
sum of ratings of character on the path
Definition: lm_state.h:186
bool updated
set to true if the entry has just been created/updated
Definition: lm_state.h:198
LMConsistencyInfo consistency_info
path consistency info
Definition: lm_state.h:191
float viterbi_state_entries_prunable_max_cost
Definition: lm_state.h:220
void Print(const char *msg)
Definition: lm_state.cpp:68
int viterbi_state_entries_length
Total number of entries in viterbi_state_entries.
Definition: lm_state.h:222
int viterbi_state_entries_prunable_length
Number and max cost of prunable paths in viterbi_state_entries.
Definition: lm_state.h:219
ViterbiStateEntry_LIST viterbi_state_entries
Storage for the Viterbi state.
Definition: lm_state.h:217
void Clear()
Clears the viterbi search state back to its initial conditions.
Definition: lm_state.cpp:61