tesseract  5.0.0
tesseract::BlamerBundle Struct Reference

#include <blamer.h>

Public Member Functions

 BlamerBundle ()
 
 BlamerBundle (const BlamerBundle &other)
 
 ~BlamerBundle ()
 
std::string TruthString () const
 
IncorrectResultReason incorrect_result_reason () const
 
bool NoTruth () const
 
bool HasDebugInfo () const
 
const std::string & debug () const
 
const std::string & misadaption_debug () const
 
void UpdateBestRating (float rating)
 
int correct_segmentation_length () const
 
bool MatrixPositionCorrect (int index, const MATRIX_COORD &coord)
 
void set_best_choice_is_dict_and_top_choice (bool value)
 
const char * lattice_data () const
 
int lattice_size () const
 
void set_lattice_data (const char *data, int size)
 
const tesseract::ParamsTrainingBundleparams_training_bundle () const
 
void AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo)
 
void SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
 
void SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
 
void SetRejectedTruth ()
 
bool ChoiceIsCorrect (const WERD_CHOICE *word_choice) const
 
void ClearResults ()
 
void CopyTruth (const BlamerBundle &other)
 
void CopyResults (const BlamerBundle &other)
 
const char * IncorrectReason () const
 
void FillDebugString (const std::string &msg, const WERD_CHOICE *choice, std::string &debug)
 
void SetupNormTruthWord (const DENORM &denorm)
 
void SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
 
void JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
 
void BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
 
void SetChopperBlame (const WERD_RES *word, bool debug)
 
void BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
 
void SetupCorrectSegmentation (const TWERD *word, bool debug)
 
bool GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const
 
void InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points, double max_char_wh_ratio, WERD_RES *word_res)
 
bool GuidedSegsearchStillGoing () const
 
void FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, std::string &debug_str)
 
void SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug)
 

Static Public Member Functions

static const char * IncorrectReasonName (IncorrectResultReason irr)
 
static void LastChanceBlame (bool debug, WERD_RES *word)
 

Detailed Description

Definition at line 107 of file blamer.h.

Constructor & Destructor Documentation

◆ BlamerBundle() [1/2]

tesseract::BlamerBundle::BlamerBundle ( )
inline

Definition at line 109 of file blamer.h.

110  : truth_has_char_boxes_(false)
111  , incorrect_result_reason_(IRR_CORRECT)
112  , lattice_data_(nullptr) {
113  ClearResults();
114  }
@ IRR_CORRECT
Definition: blamer.h:58

◆ BlamerBundle() [2/2]

tesseract::BlamerBundle::BlamerBundle ( const BlamerBundle other)
inline

Definition at line 115 of file blamer.h.

115  {
116  this->CopyTruth(other);
117  this->CopyResults(other);
118  }
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:214
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:220

◆ ~BlamerBundle()

tesseract::BlamerBundle::~BlamerBundle ( )
inline

Definition at line 119 of file blamer.h.

119  {
120  delete[] lattice_data_;
121  }

Member Function Documentation

◆ AddHypothesis()

void tesseract::BlamerBundle::AddHypothesis ( const tesseract::ParamsTrainingHypothesis hypo)
inline

Definition at line 180 of file blamer.h.

180  {
181  params_training_bundle_.AddHypothesis(hypo);
182  }
ParamsTrainingHypothesis & AddHypothesis(const ParamsTrainingHypothesis &other)

◆ BlameClassifier()

void tesseract::BlamerBundle::BlameClassifier ( const UNICHARSET unicharset,
const TBOX blob_box,
const BLOB_CHOICE_LIST &  choices,
bool  debug 
)

Definition at line 260 of file blamer.cpp.

261  {
262  if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
263  return; // Nothing to do here.
264  }
265 
266  for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
267  const TBOX &truth_box = norm_truth_word_.BlobBox(b);
268  // Note that we are more strict on the bounding box boundaries here
269  // than in other places (chopper, segmentation search), since we do
270  // not have the ability to check the previous and next bounding box.
271  if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
272  bool found = false;
273  bool incorrect_adapted = false;
274  UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
275  const char *truth_str = truth_text_[b].c_str();
276  // We promise not to modify the list or its contents, using a
277  // const BLOB_CHOICE* below.
278  BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST *>(&choices));
279  for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
280  const BLOB_CHOICE *choice = choices_it.data();
281  if (strcmp(truth_str, unicharset.get_normed_unichar(choice->unichar_id())) == 0) {
282  found = true;
283  break;
284  } else if (choice->IsAdapted()) {
285  incorrect_adapted = true;
286  incorrect_adapted_id = choice->unichar_id();
287  }
288  } // end choices_it for loop
289  if (!found) {
290  std::string debug_str = "unichar ";
291  debug_str += truth_str;
292  debug_str += " not found in classification list";
293  SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
294  } else if (incorrect_adapted) {
295  std::string debug_str = "better rating for adapted ";
296  debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
297  debug_str += " than for correct ";
298  debug_str += truth_str;
299  SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
300  }
301  break;
302  }
303  } // end iterating over blamer_bundle->norm_truth_word
304 }
@ TBOX
@ IRR_CLASSIFIER
Definition: blamer.h:63
@ IRR_ADAPTION
Definition: blamer.h:93
int UNICHAR_ID
Definition: unichar.h:36
const std::string & debug() const
Definition: blamer.h:140
const TBOX & BlobBox(unsigned index) const
Definition: boxword.h:84
unsigned length() const
Definition: boxword.h:81

◆ BlameClassifierOrLangModel()

void tesseract::BlamerBundle::BlameClassifierOrLangModel ( const WERD_RES word,
const UNICHARSET unicharset,
bool  valid_permuter,
bool  debug 
)

Definition at line 363 of file blamer.cpp.

364  {
365  if (valid_permuter) {
366  // Find out whether best choice is a top choice.
367  best_choice_is_dict_and_top_choice_ = true;
368  for (unsigned i = 0; i < word->best_choice->length(); ++i) {
369  BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
370  ASSERT_HOST(!blob_choice_it.empty());
371  BLOB_CHOICE *first_choice = nullptr;
372  for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
373  blob_choice_it.forward()) { // find first non-fragment choice
374  if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
375  first_choice = blob_choice_it.data();
376  break;
377  }
378  }
379  ASSERT_HOST(first_choice != nullptr);
380  if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
381  best_choice_is_dict_and_top_choice_ = false;
382  break;
383  }
384  }
385  }
386  std::string debug_str;
387  if (best_choice_is_dict_and_top_choice_) {
388  debug_str = "Best choice is: incorrect, top choice, dictionary word";
389  debug_str += " with permuter ";
390  debug_str += word->best_choice->permuter_name();
391  } else {
392  debug_str = "Classifier/Old LM tradeoff is to blame";
393  }
394  SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER : IRR_CLASS_OLD_LM_TRADEOFF,
395  debug_str, word->best_choice, debug);
396 }
#define ASSERT_HOST(x)
Definition: errcode.h:59
@ IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:90

◆ ChoiceIsCorrect()

bool tesseract::BlamerBundle::ChoiceIsCorrect ( const WERD_CHOICE word_choice) const

Definition at line 116 of file blamer.cpp.

116  {
117  if (word_choice == nullptr) {
118  return false;
119  }
120  const UNICHARSET *uni_set = word_choice->unicharset();
121  std::string normed_choice_str;
122  for (unsigned i = 0; i < word_choice->length(); ++i) {
123  normed_choice_str += uni_set->get_normed_unichar(word_choice->unichar_id(i));
124  }
125  std::string truth_str = TruthString();
126  return truth_str == normed_choice_str;
127 }
std::string TruthString() const
Definition: blamer.h:124

◆ ClearResults()

void tesseract::BlamerBundle::ClearResults ( )
inline

Definition at line 198 of file blamer.h.

198  {
199  norm_truth_word_.DeleteAllBoxes();
200  norm_box_tolerance_ = 0;
201  if (!NoTruth()) {
202  incorrect_result_reason_ = IRR_CORRECT;
203  }
204  debug_ = "";
205  segsearch_is_looking_for_blame_ = false;
206  best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
207  correct_segmentation_cols_.clear();
208  correct_segmentation_rows_.clear();
209  best_choice_is_dict_and_top_choice_ = false;
210  delete[] lattice_data_;
211  lattice_data_ = nullptr;
212  lattice_size_ = 0;
213  }
bool NoTruth() const
Definition: blamer.h:134
void DeleteAllBoxes()
Definition: boxword.cpp:184
static const float kBadRating
Definition: ratngs.h:256

◆ CopyResults()

void tesseract::BlamerBundle::CopyResults ( const BlamerBundle other)
inline

Definition at line 220 of file blamer.h.

220  {
221  norm_truth_word_ = other.norm_truth_word_;
222  norm_box_tolerance_ = other.norm_box_tolerance_;
223  incorrect_result_reason_ = other.incorrect_result_reason_;
224  segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
225  best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
226  correct_segmentation_cols_ = other.correct_segmentation_cols_;
227  correct_segmentation_rows_ = other.correct_segmentation_rows_;
228  best_choice_is_dict_and_top_choice_ = other.best_choice_is_dict_and_top_choice_;
229  if (other.lattice_data_ != nullptr) {
230  lattice_data_ = new char[other.lattice_size_];
231  memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
232  lattice_size_ = other.lattice_size_;
233  } else {
234  lattice_data_ = nullptr;
235  }
236  }

◆ CopyTruth()

void tesseract::BlamerBundle::CopyTruth ( const BlamerBundle other)
inline

Definition at line 214 of file blamer.h.

214  {
215  truth_has_char_boxes_ = other.truth_has_char_boxes_;
216  truth_word_ = other.truth_word_;
217  truth_text_ = other.truth_text_;
218  incorrect_result_reason_ = (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
219  }

◆ correct_segmentation_length()

int tesseract::BlamerBundle::correct_segmentation_length ( ) const
inline

Definition at line 151 of file blamer.h.

151  {
152  return correct_segmentation_cols_.size();
153  }

◆ debug()

const std::string& tesseract::BlamerBundle::debug ( ) const
inline

Definition at line 140 of file blamer.h.

140  {
141  return debug_;
142  }

◆ FillDebugString()

void tesseract::BlamerBundle::FillDebugString ( const std::string &  msg,
const WERD_CHOICE choice,
std::string &  debug 
)

Definition at line 129 of file blamer.cpp.

129  {
130  debug += "Truth ";
131  for (auto &text : this->truth_text_) {
132  debug += text;
133  }
134  if (!this->truth_has_char_boxes_) {
135  debug += " (no char boxes)";
136  }
137  if (choice != nullptr) {
138  debug += " Choice ";
139  std::string choice_str;
140  choice->string_and_lengths(&choice_str, nullptr);
141  debug += choice_str;
142  }
143  if (msg.length() > 0) {
144  debug += "\n";
145  debug += msg;
146  }
147  debug += "\n";
148 }

◆ FinishSegSearch()

void tesseract::BlamerBundle::FinishSegSearch ( const WERD_CHOICE best_choice,
bool  debug,
std::string &  debug_str 
)

Definition at line 503 of file blamer.cpp.

503  {
504  // If we are still looking for blame (i.e. best_choice is incorrect, but a
505  // path representing the correct segmentation could be constructed), we can
506  // blame segmentation search pain point prioritization if the rating of the
507  // path corresponding to the correct segmentation is better than that of
508  // best_choice (i.e. language model would have done the correct thing, but
509  // because of poor pain point prioritization the correct segmentation was
510  // never explored). Otherwise we blame the tradeoff between the language model
511  // and the classifier, since even after exploring the path corresponding to
512  // the correct segmentation incorrect best_choice would have been chosen.
513  // One special case when we blame the classifier instead is when best choice
514  // is incorrect, but it is a dictionary word and it classifier's top choice.
515  if (segsearch_is_looking_for_blame_) {
516  segsearch_is_looking_for_blame_ = false;
517  if (best_choice_is_dict_and_top_choice_) {
518  debug_str = "Best choice is: incorrect, top choice, dictionary word";
519  debug_str += " with permuter ";
520  debug_str += best_choice->permuter_name();
521  SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
522  } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
523  debug_str += "Correct segmentation state was not explored";
524  SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
525  } else {
526  if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
527  debug_str += "Correct segmentation paths were pruned by LM\n";
528  } else {
529  debug_str += "Best correct segmentation rating " +
530  std::to_string(best_correctly_segmented_rating_);
531  debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
532  }
533  SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
534  }
535  }
536 }
@ IRR_SEGSEARCH_PP
Definition: blamer.h:86
@ IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:73

◆ GuidedSegsearchNeeded()

bool tesseract::BlamerBundle::GuidedSegsearchNeeded ( const WERD_CHOICE best_choice) const

Definition at line 461 of file blamer.cpp.

461  {
462  return incorrect_result_reason_ == IRR_CORRECT && !segsearch_is_looking_for_blame_ &&
463  truth_has_char_boxes_ && !ChoiceIsCorrect(best_choice);
464 }
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:116

◆ GuidedSegsearchStillGoing()

bool tesseract::BlamerBundle::GuidedSegsearchStillGoing ( ) const

Definition at line 498 of file blamer.cpp.

498  {
499  return segsearch_is_looking_for_blame_;
500 }

◆ HasDebugInfo()

bool tesseract::BlamerBundle::HasDebugInfo ( ) const
inline

Definition at line 137 of file blamer.h.

137  {
138  return debug_.length() > 0 || misadaption_debug_.length() > 0;
139  }

◆ incorrect_result_reason()

IncorrectResultReason tesseract::BlamerBundle::incorrect_result_reason ( ) const
inline

Definition at line 131 of file blamer.h.

131  {
132  return incorrect_result_reason_;
133  }

◆ IncorrectReason()

const char * tesseract::BlamerBundle::IncorrectReason ( ) const

Definition at line 60 of file blamer.cpp.

60  {
61  return kIncorrectResultReasonNames[incorrect_result_reason_];
62 }
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:51

◆ IncorrectReasonName()

const char * tesseract::BlamerBundle::IncorrectReasonName ( IncorrectResultReason  irr)
static

Definition at line 56 of file blamer.cpp.

56  {
57  return kIncorrectResultReasonNames[irr];
58 }

◆ InitForSegSearch()

void tesseract::BlamerBundle::InitForSegSearch ( const WERD_CHOICE best_choice,
MATRIX ratings,
UNICHAR_ID  wildcard_id,
bool  debug,
std::string &  debug_str,
tesseract::LMPainPoints pain_points,
double  max_char_wh_ratio,
WERD_RES word_res 
)

Definition at line 468 of file blamer.cpp.

471  {
472  segsearch_is_looking_for_blame_ = true;
473  if (debug) {
474  tprintf("segsearch starting to look for blame\n");
475  }
476  // Fill pain points for any unclassifed blob corresponding to the
477  // correct segmentation state.
478  debug_str += "Correct segmentation:\n";
479  for (unsigned idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
480  debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
481  debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
482  debug_str += "\n";
483  if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
484  wildcard_id) &&
485  !pain_points->GeneratePainPoint(
486  correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
487  tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
488  segsearch_is_looking_for_blame_ = false;
489  debug_str += "\nFailed to insert pain point\n";
490  SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
491  break;
492  }
493  } // end for blamer_bundle->correct_segmentation_cols/rows
494 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
@ IRR_SEGSEARCH_HEUR
Definition: blamer.h:80
bool GeneratePainPoint(int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res)

◆ JoinBlames()

void tesseract::BlamerBundle::JoinBlames ( const BlamerBundle bundle1,
const BlamerBundle bundle2,
bool  debug 
)

Definition at line 226 of file blamer.cpp.

227  {
228  std::string debug_str;
229  IncorrectResultReason irr = incorrect_result_reason_;
230  if (irr != IRR_NO_TRUTH_SPLIT) {
231  debug_str = "";
232  }
233  if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
234  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
235  bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
236  debug_str += "Blame from part 1: ";
237  debug_str += bundle1.debug_;
238  irr = bundle1.incorrect_result_reason_;
239  }
240  if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
241  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
242  bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
243  debug_str += "Blame from part 2: ";
244  debug_str += bundle2.debug_;
245  if (irr == IRR_CORRECT) {
246  irr = bundle2.incorrect_result_reason_;
247  } else if (irr != bundle2.incorrect_result_reason_) {
248  irr = IRR_UNKNOWN;
249  }
250  }
251  incorrect_result_reason_ = irr;
252  if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
253  SetBlame(irr, debug_str, nullptr, debug);
254  }
255 }
IncorrectResultReason
Definition: blamer.h:56
@ IRR_UNKNOWN
Definition: blamer.h:101
@ IRR_NO_TRUTH
Definition: blamer.h:98
@ IRR_NO_TRUTH_SPLIT
Definition: blamer.h:95

◆ LastChanceBlame()

void tesseract::BlamerBundle::LastChanceBlame ( bool  debug,
WERD_RES word 
)
static

Definition at line 540 of file blamer.cpp.

540  {
541  if (word->blamer_bundle == nullptr) {
542  word->blamer_bundle = new BlamerBundle();
543  word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame", word->best_choice, debug);
544  } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
545  word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth", word->best_choice, debug);
546  } else {
547  bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
548  IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
549  if (irr == IRR_CORRECT && !correct) {
550  std::string debug_str = "Choice is incorrect after recognition";
551  word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice, debug);
552  } else if (irr != IRR_CORRECT && correct) {
553  if (debug) {
554  tprintf("Corrected %s\n", word->blamer_bundle->debug_.c_str());
555  }
556  word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
557  word->blamer_bundle->debug_ = "";
558  }
559  }
560 }
@ IRR_PAGE_LAYOUT
Definition: blamer.h:77

◆ lattice_data()

const char* tesseract::BlamerBundle::lattice_data ( ) const
inline

Definition at line 163 of file blamer.h.

163  {
164  return lattice_data_;
165  }

◆ lattice_size()

int tesseract::BlamerBundle::lattice_size ( ) const
inline

Definition at line 166 of file blamer.h.

166  {
167  return lattice_size_; // size of lattice_data in bytes
168  }

◆ MatrixPositionCorrect()

bool tesseract::BlamerBundle::MatrixPositionCorrect ( int  index,
const MATRIX_COORD coord 
)
inline

Definition at line 156 of file blamer.h.

156  {
157  return correct_segmentation_cols_[index] == coord.col &&
158  correct_segmentation_rows_[index] == coord.row;
159  }

◆ misadaption_debug()

const std::string& tesseract::BlamerBundle::misadaption_debug ( ) const
inline

Definition at line 143 of file blamer.h.

143  {
144  return misadaption_debug_;
145  }

◆ NoTruth()

bool tesseract::BlamerBundle::NoTruth ( ) const
inline

Definition at line 134 of file blamer.h.

134  {
135  return incorrect_result_reason_ == IRR_NO_TRUTH || incorrect_result_reason_ == IRR_PAGE_LAYOUT;
136  }

◆ params_training_bundle()

const tesseract::ParamsTrainingBundle& tesseract::BlamerBundle::params_training_bundle ( ) const
inline

Definition at line 176 of file blamer.h.

176  {
177  return params_training_bundle_;
178  }

◆ set_best_choice_is_dict_and_top_choice()

void tesseract::BlamerBundle::set_best_choice_is_dict_and_top_choice ( bool  value)
inline

Definition at line 160 of file blamer.h.

160  {
161  best_choice_is_dict_and_top_choice_ = value;
162  }

◆ set_lattice_data()

void tesseract::BlamerBundle::set_lattice_data ( const char *  data,
int  size 
)
inline

Definition at line 169 of file blamer.h.

169  {
170  lattice_size_ = size;
171  delete[] lattice_data_;
172  lattice_data_ = new char[lattice_size_];
173  memcpy(lattice_data_, data, lattice_size_);
174  }

◆ SetChopperBlame()

void tesseract::BlamerBundle::SetChopperBlame ( const WERD_RES word,
bool  debug 
)

Definition at line 309 of file blamer.cpp.

309  {
310  if (NoTruth() || !truth_has_char_boxes_ || word->chopped_word->blobs.empty()) {
311  return;
312  }
313  bool missing_chop = false;
314  int num_blobs = word->chopped_word->blobs.size();
315  unsigned box_index = 0;
316  int blob_index = 0;
317  int16_t truth_x = -1;
318  while (box_index < truth_word_.length() && blob_index < num_blobs) {
319  truth_x = norm_truth_word_.BlobBox(box_index).right();
320  TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
321  if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
322  ++blob_index;
323  continue; // encountered an extra chop, keep looking
324  } else if (curr_blob->bounding_box().right() > truth_x + norm_box_tolerance_) {
325  missing_chop = true;
326  break;
327  } else {
328  ++blob_index;
329  }
330  }
331  if (missing_chop || box_index < norm_truth_word_.length()) {
332  std::string debug_str;
333  if (missing_chop) {
334  debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
335  debug_str += ") at Bounding Box=";
336  TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
337  curr_blob->bounding_box().print_to_str(debug_str);
338  debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
339  } else {
340  debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
341  debug_str += " truth box(es)";
342  }
343  debug_str += "\nMaximally chopped word boxes:\n";
344  for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
345  TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
346  curr_blob->bounding_box().print_to_str(debug_str);
347  debug_str += '\n';
348  }
349  debug_str += "Truth bounding boxes:\n";
350  for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
351  norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
352  debug_str += '\n';
353  }
354  SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
355  }
356 }
@ IRR_CHOPPER
Definition: blamer.h:66
void print_to_str(std::string &str) const
Definition: rect.cpp:177
TDimension right() const
Definition: rect.h:89

◆ SetMisAdaptionDebug()

void tesseract::BlamerBundle::SetMisAdaptionDebug ( const WERD_CHOICE best_choice,
bool  debug 
)

Definition at line 564 of file blamer.cpp.

564  {
565  if (incorrect_result_reason_ != IRR_NO_TRUTH && !ChoiceIsCorrect(best_choice)) {
566  misadaption_debug_ = "misadapt to word (";
567  misadaption_debug_ += best_choice->permuter_name();
568  misadaption_debug_ += "): ";
569  FillDebugString("", best_choice, misadaption_debug_);
570  if (debug) {
571  tprintf("%s\n", misadaption_debug_.c_str());
572  }
573  }
574 }
void FillDebugString(const std::string &msg, const WERD_CHOICE *choice, std::string &debug)
Definition: blamer.cpp:129

◆ SetRejectedTruth()

void tesseract::BlamerBundle::SetRejectedTruth ( )

Definition at line 110 of file blamer.cpp.

110  {
111  incorrect_result_reason_ = IRR_NO_TRUTH;
112  truth_has_char_boxes_ = false;
113 }

◆ SetSymbolTruth()

void tesseract::BlamerBundle::SetSymbolTruth ( const UNICHARSET unicharset,
const char *  char_str,
const TBOX char_box 
)

Definition at line 88 of file blamer.cpp.

89  {
90  std::string symbol_str(char_str);
91  UNICHAR_ID id = unicharset.unichar_to_id(char_str);
92  if (id != INVALID_UNICHAR_ID) {
93  std::string normed_uch(unicharset.get_normed_unichar(id));
94  if (normed_uch.length() > 0) {
95  symbol_str = normed_uch;
96  }
97  }
98  int length = truth_word_.length();
99  truth_text_.push_back(symbol_str);
100  truth_word_.InsertBox(length, char_box);
101  if (length == 0) {
102  truth_has_char_boxes_ = true;
103  } else if (truth_word_.BlobBox(length - 1) == char_box) {
104  truth_has_char_boxes_ = false;
105  }
106 }
void InsertBox(unsigned index, const TBOX &box)
Definition: boxword.cpp:157

◆ SetupCorrectSegmentation()

void tesseract::BlamerBundle::SetupCorrectSegmentation ( const TWERD word,
bool  debug 
)

Definition at line 399 of file blamer.cpp.

399  {
400 #ifndef DISABLED_LEGACY_ENGINE
401  params_training_bundle_.StartHypothesisList();
402 #endif // ndef DISABLED_LEGACY_ENGINE
403  if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
404  return; // Nothing to do here.
405  }
406 
407  std::string debug_str = "Blamer computing correct_segmentation_cols\n";
408  int curr_box_col = 0;
409  int next_box_col = 0;
410  int num_blobs = word->NumBlobs();
411  if (num_blobs == 0) {
412  return; // No blobs to play with.
413  }
414  int blob_index = 0;
415  int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
416  for (unsigned truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
417  ++blob_index) {
418  ++next_box_col;
419  int16_t curr_box_x = next_box_x;
420  if (blob_index + 1 < num_blobs) {
421  next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
422  }
423  int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
424  debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
425  debug_str += " " + std::to_string(truth_x);
426  debug_str += "\n";
427  if (curr_box_x > (truth_x + norm_box_tolerance_)) {
428  break; // failed to find a matching box
429  } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
430  (blob_index + 1 >= num_blobs || // next box can't be included
431  next_box_x > truth_x + norm_box_tolerance_)) {
432  correct_segmentation_cols_.push_back(curr_box_col);
433  correct_segmentation_rows_.push_back(next_box_col - 1);
434  ++truth_idx;
435  debug_str += "col=" + std::to_string(curr_box_col);
436  debug_str += " row=" + std::to_string(next_box_col - 1);
437  debug_str += "\n";
438  curr_box_col = next_box_col;
439  }
440  }
441  if (blob_index < num_blobs || // trailing blobs
442  correct_segmentation_cols_.size() != norm_truth_word_.length()) {
443  debug_str +=
444  "Blamer failed to find correct segmentation"
445  " (tolerance=" +
446  std::to_string(norm_box_tolerance_);
447  if (blob_index >= num_blobs) {
448  debug_str += " blob == nullptr";
449  }
450  debug_str += ")\n";
451  debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
452  debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
453  debug_str += "\n";
454  SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
455  correct_segmentation_cols_.clear();
456  correct_segmentation_rows_.clear();
457  }
458 }

◆ SetupNormTruthWord()

void tesseract::BlamerBundle::SetupNormTruthWord ( const DENORM denorm)

Definition at line 151 of file blamer.cpp.

151  {
152  // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
153  norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
154  TPOINT topleft;
155  TPOINT botright;
156  TPOINT norm_topleft;
157  TPOINT norm_botright;
158  for (unsigned b = 0; b < truth_word_.length(); ++b) {
159  const TBOX &box = truth_word_.BlobBox(b);
160  topleft.x = box.left();
161  topleft.y = box.top();
162  botright.x = box.right();
163  botright.y = box.bottom();
164  denorm.NormTransform(nullptr, topleft, &norm_topleft);
165  denorm.NormTransform(nullptr, botright, &norm_botright);
166  TBOX norm_box(norm_topleft.x, norm_botright.y, norm_botright.x, norm_topleft.y);
167  norm_truth_word_.InsertBox(b, norm_box);
168  }
169 }
@ TPOINT
TDimension left() const
Definition: rect.h:82

◆ SetWordTruth()

void tesseract::BlamerBundle::SetWordTruth ( const UNICHARSET unicharset,
const char *  truth_str,
const TBOX word_box 
)

Definition at line 66 of file blamer.cpp.

67  {
68  truth_word_.InsertBox(0, word_box);
69  truth_has_char_boxes_ = false;
70  // Encode the string as UNICHAR_IDs.
71  std::vector<UNICHAR_ID> encoding;
72  std::vector<char> lengths;
73  unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
74  int total_length = 0;
75  for (size_t i = 0; i < encoding.size(); total_length += lengths[i++]) {
76  std::string uch(truth_str + total_length);
77  uch.resize(lengths[i] - total_length);
78  UNICHAR_ID id = encoding[i];
79  if (id != INVALID_UNICHAR_ID) {
80  uch = unicharset.get_normed_unichar(id);
81  }
82  truth_text_.push_back(uch);
83  }
84 }

◆ SplitBundle()

void tesseract::BlamerBundle::SplitBundle ( int  word1_right,
int  word2_left,
bool  debug,
BlamerBundle bundle1,
BlamerBundle bundle2 
) const

Definition at line 174 of file blamer.cpp.

175  {
176  std::string debug_str;
177  // Find truth boxes that correspond to the split in the blobs.
178  unsigned begin2_truth_index = 0;
179  if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
180  debug_str = "Looking for truth split at";
181  debug_str += " end1_x " + std::to_string(word1_right);
182  debug_str += " begin2_x " + std::to_string(word2_left);
183  debug_str += "\nnorm_truth_word boxes:\n";
184  if (norm_truth_word_.length() > 1) {
185  norm_truth_word_.BlobBox(0).print_to_str(debug_str);
186  for (unsigned b = 1; b < norm_truth_word_.length(); ++b) {
187  norm_truth_word_.BlobBox(b).print_to_str(debug_str);
188  if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
189  (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
190  begin2_truth_index = b;
191  debug_str += "Split found";
192  break;
193  }
194  }
195  debug_str += '\n';
196  }
197  }
198  // Populate truth information in word and word2 with the first and second
199  // part of the original truth.
200  if (begin2_truth_index > 0) {
201  bundle1->truth_has_char_boxes_ = true;
202  bundle1->norm_box_tolerance_ = norm_box_tolerance_;
203  bundle2->truth_has_char_boxes_ = true;
204  bundle2->norm_box_tolerance_ = norm_box_tolerance_;
205  BlamerBundle *curr_bb = bundle1;
206  for (unsigned b = 0; b < norm_truth_word_.length(); ++b) {
207  if (b == begin2_truth_index) {
208  curr_bb = bundle2;
209  }
210  curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
211  curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
212  curr_bb->truth_text_.push_back(truth_text_[b]);
213  }
214  } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
215  bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
216  bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
217  } else {
218  debug_str += "Truth split not found";
219  debug_str += truth_has_char_boxes_ ? "\n" : " (no truth char boxes)\n";
220  bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
221  bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
222  }
223 }

◆ TruthString()

std::string tesseract::BlamerBundle::TruthString ( ) const
inline

Definition at line 124 of file blamer.h.

124  {
125  std::string truth_str;
126  for (auto &text : truth_text_) {
127  truth_str += text;
128  }
129  return truth_str;
130  }

◆ UpdateBestRating()

void tesseract::BlamerBundle::UpdateBestRating ( float  rating)
inline

Definition at line 146 of file blamer.h.

146  {
147  if (rating < best_correctly_segmented_rating_) {
148  best_correctly_segmented_rating_ = rating;
149  }
150  }

The documentation for this struct was generated from the following files: