tesseract  5.0.0
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
 ~RecodeBeamSearch ()
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void DecodeSecondaryBeams (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void ExtractBestPathAsLabels (std::vector< int > *labels, std::vector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, std::vector< int > *unichar_ids, std::vector< float > *certs, std::vector< float > *ratings, std::vector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 
void extractSymbolChoices (const UNICHARSET *unicharset)
 
void PrintBeam2 (bool uids, int num_outputs, const UNICHARSET *charset, bool secondary) const
 
void segmentTimestepsByCharacters ()
 
std::vector< std::vector< std::pair< const char *, float > > > combineSegmentedTimesteps (std::vector< std::vector< std::vector< std::pair< const char *, float >>>> *segmentedTimesteps)
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
 
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
 
std::vector< std::unordered_set< int > > excludedUnichars
 
std::vector< int > character_boundaries_
 

Static Public Attributes

static constexpr float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 184 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 64 of file recodebeam.cpp.

66  : recoder_(recoder),
67  beam_size_(0),
68  top_code_(-1),
69  second_code_(-1),
70  dict_(dict),
71  space_delimited_(true),
72  is_simple_text_(simple_text),
73  null_char_(null_char) {
74  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
75  space_delimited_ = false;
76  }
77 }
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:912

◆ ~RecodeBeamSearch()

tesseract::RecodeBeamSearch::~RecodeBeamSearch ( )

Definition at line 79 of file recodebeam.cpp.

79  {
80  for (auto data : beam_) {
81  delete data;
82  }
83  for (auto data : secondary_beam_) {
84  delete data;
85  }
86 }

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 263 of file recodebeam.h.

263  {
264  return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
265  }
static const int kNumLengths
Definition: recodebeam.h:248

◆ combineSegmentedTimesteps()

std::vector< std::vector< std::pair< const char *, float > > > tesseract::RecodeBeamSearch::combineSegmentedTimesteps ( std::vector< std::vector< std::vector< std::pair< const char *, float >>>> *  segmentedTimesteps)

Definition at line 181 of file recodebeam.cpp.

183  {
184  std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
185  for (auto &segmentedTimestep : *segmentedTimesteps) {
186  for (auto &j : segmentedTimestep) {
187  combined_timesteps.push_back(j);
188  }
189  }
190  return combined_timesteps;
191 }
std::vector< std::vector< std::vector< std::pair< const char *, float > > > > segmentedTimesteps
Definition: recodebeam.h:235

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 256 of file recodebeam.h.

256  {
257  return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
258  }
NodeContinuation
Definition: recodebeam.h:75

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 522 of file recodebeam.cpp.

522  {
523  for (int p = 0; p < beam_size_; ++p) {
524  for (int d = 0; d < 2; ++d) {
525  for (int c = 0; c < NC_COUNT; ++c) {
526  auto cont = static_cast<NodeContinuation>(c);
527  int index = BeamIndex(d, cont, 0);
528  if (beam_[p]->beams_[index].empty()) {
529  continue;
530  }
531  // Print all the best scoring nodes for each unichar found.
532  tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
533  kNodeContNames[c]);
534  DebugBeamPos(unicharset, beam_[p]->beams_[index]);
535  }
536  }
537  }
538 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:263

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 106 of file recodebeam.cpp.

109  {
110  beam_size_ = 0;
111  int width = output.dim1();
112  for (int t = 0; t < width; ++t) {
113  ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
114  DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
115  }
116 }

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 89 of file recodebeam.cpp.

91  {
92  beam_size_ = 0;
93  int width = output.Width();
94  if (lstm_choice_mode) {
95  timesteps.clear();
96  }
97  for (int t = 0; t < width; ++t) {
98  ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
99  DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
100  charset);
101  if (lstm_choice_mode) {
102  SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
103  }
104  }
105 }
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:234

◆ DecodeSecondaryBeams()

void tesseract::RecodeBeamSearch::DecodeSecondaryBeams ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 118 of file recodebeam.cpp.

120  {
121  for (auto data : secondary_beam_) {
122  delete data;
123  }
124  secondary_beam_.clear();
125  if (character_boundaries_.size() < 2) {
126  return;
127  }
128  int width = output.Width();
129  unsigned bucketNumber = 0;
130  for (int t = 0; t < width; ++t) {
131  while ((bucketNumber + 1) < character_boundaries_.size() &&
132  t >= character_boundaries_[bucketNumber + 1]) {
133  ++bucketNumber;
134  }
135  ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t),
136  output.NumFeatures(), kBeamWidths[0]);
137  DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset,
138  worst_dict_cert, charset);
139  }
140 }
std::vector< int > character_boundaries_
Definition: recodebeam.h:241
std::vector< std::unordered_set< int > > excludedUnichars
Definition: recodebeam.h:239

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( std::vector< int > *  labels,
std::vector< int > *  xcoords 
) const

Definition at line 207 of file recodebeam.cpp.

208  {
209  labels->clear();
210  xcoords->clear();
211  std::vector<const RecodeNode *> best_nodes;
212  ExtractBestPaths(&best_nodes, nullptr);
213  // Now just run CTC on the best nodes.
214  int t = 0;
215  int width = best_nodes.size();
216  while (t < width) {
217  int label = best_nodes[t]->code;
218  if (label != null_char_) {
219  labels->push_back(label);
220  xcoords->push_back(t);
221  }
222  while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
223  }
224  }
225  xcoords->push_back(width);
226 }

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
std::vector< int > *  unichar_ids,
std::vector< float > *  certs,
std::vector< float > *  ratings,
std::vector< int > *  xcoords 
) const

Definition at line 230 of file recodebeam.cpp.

233  {
234  std::vector<const RecodeNode *> best_nodes;
235  ExtractBestPaths(&best_nodes, nullptr);
236  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
237  if (debug) {
238  DebugPath(unicharset, best_nodes);
239  DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
240  *xcoords);
241  }
242 }

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 245 of file recodebeam.cpp.

249  {
250  words->truncate(0);
251  std::vector<int> unichar_ids;
252  std::vector<float> certs;
253  std::vector<float> ratings;
254  std::vector<int> xcoords;
255  std::vector<const RecodeNode *> best_nodes;
256  std::vector<const RecodeNode *> second_nodes;
257  character_boundaries_.clear();
258  ExtractBestPaths(&best_nodes, &second_nodes);
259  if (debug) {
260  DebugPath(unicharset, best_nodes);
261  ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
262  &xcoords);
263  tprintf("\nSecond choice path:\n");
264  DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
265  xcoords);
266  }
267  // If lstm choice mode is required in granularity level 2, it stores the x
268  // Coordinates of every chosen character, to match the alternative choices to
269  // it.
270  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords,
272  int num_ids = unichar_ids.size();
273  if (debug) {
274  DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
275  xcoords);
276  }
277  // Convert labels to unichar-ids.
278  int word_end = 0;
279  float prev_space_cert = 0.0f;
280  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
281  for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
282  // A word is terminated when a space character or start_of_word flag is
283  // hit. We also want to force a separate word for every non
284  // space-delimited character when not in a dictionary context.
285  if (unichar_ids[word_end] == UNICHAR_SPACE) {
286  break;
287  }
288  int index = xcoords[word_end];
289  if (best_nodes[index]->start_of_word) {
290  break;
291  }
292  if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
293  (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
294  !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) {
295  break;
296  }
297  }
298  float space_cert = 0.0f;
299  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
300  space_cert = certs[word_end];
301  }
302  bool leading_space =
303  word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
304  // Create a WERD_RES for the output word.
305  WERD_RES *word_res =
306  InitializeWord(leading_space, line_box, word_start, word_end,
307  std::min(space_cert, prev_space_cert), unicharset,
308  xcoords, scale_factor);
309  for (int i = word_start; i < word_end; ++i) {
310  auto *choices = new BLOB_CHOICE_LIST;
311  BLOB_CHOICE_IT bc_it(choices);
312  auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1,
313  1.0f, static_cast<float>(INT16_MAX), 0.0f,
315  int col = i - word_start;
316  choice->set_matrix_cell(col, col);
317  bc_it.add_after_then_move(choice);
318  word_res->ratings->put(col, col, choices);
319  }
320  int index = xcoords[word_end - 1];
321  word_res->FakeWordFromRatings(best_nodes[index]->permuter);
322  words->push_back(word_res);
323  prev_space_cert = space_cert;
324  if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
325  ++word_end;
326  }
327  }
328 }
@ UNICHAR_SPACE
Definition: unicharset.h:36
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:49
@ TOP_CHOICE_PERM
Definition: ratngs.h:234

◆ extractSymbolChoices()

void tesseract::RecodeBeamSearch::extractSymbolChoices ( const UNICHARSET unicharset)

Definition at line 415 of file recodebeam.cpp.

415  {
416  if (character_boundaries_.size() < 2) {
417  return;
418  }
419  // For the first iteration the original beam is analyzed. After that a
420  // new beam is calculated based on the results from the original beam.
421  std::vector<RecodeBeam *> &currentBeam =
422  secondary_beam_.empty() ? beam_ : secondary_beam_;
423  character_boundaries_[0] = 0;
424  for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
425  std::vector<int> unichar_ids;
426  std::vector<float> certs;
427  std::vector<float> ratings;
428  std::vector<int> xcoords;
429  int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
430  std::vector<tesseract::RecodePair> &heaps =
431  currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
432  std::vector<const RecodeNode *> best_nodes;
433  std::vector<const RecodeNode *> best;
434  // Scan the segmented node chain for valid unichar ids.
435  for (auto entry : heaps) {
436  bool validChar = false;
437  int backcounter = 0;
438  const RecodeNode *node = &entry.data();
439  while (node != nullptr && backcounter < backpath) {
440  if (node->code != null_char_ &&
441  node->unichar_id != INVALID_UNICHAR_ID) {
442  validChar = true;
443  break;
444  }
445  node = node->prev;
446  ++backcounter;
447  }
448  if (validChar) {
449  best.push_back(&entry.data());
450  }
451  }
452  // find the best rated segmented node chain and extract the unichar id.
453  if (!best.empty()) {
454  std::sort(best.begin(), best.end(), greater_than());
455  ExtractPath(best[0], &best_nodes, backpath);
456  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
457  &xcoords);
458  }
459  if (!unichar_ids.empty()) {
460  int bestPos = 0;
461  for (unsigned i = 1; i < unichar_ids.size(); ++i) {
462  if (ratings[i] < ratings[bestPos]) {
463  bestPos = i;
464  }
465  }
466 #if 0 // TODO: bestCode is currently unused (see commit 2dd5d0d60).
467  int bestCode = -10;
468  for (auto &node : best_nodes) {
469  if (node->unichar_id == unichar_ids[bestPos]) {
470  bestCode = node->code;
471  }
472  }
473 #endif
474  // Exclude the best choice for the followup decoding.
475  std::unordered_set<int> excludeCodeList;
476  for (auto &best_node : best_nodes) {
477  if (best_node->code != null_char_) {
478  excludeCodeList.insert(best_node->code);
479  }
480  }
481  if (j - 1 < excludedUnichars.size()) {
482  for (auto elem : excludeCodeList) {
483  excludedUnichars[j - 1].insert(elem);
484  }
485  } else {
486  excludedUnichars.push_back(excludeCodeList);
487  }
488  // Save the best choice for the choice iterator.
489  if (j - 1 < ctc_choices.size()) {
490  int id = unichar_ids[bestPos];
491  const char *result = unicharset->id_to_unichar_ext(id);
492  float rating = ratings[bestPos];
493  ctc_choices[j - 1].push_back(
494  std::pair<const char *, float>(result, rating));
495  } else {
496  std::vector<std::pair<const char *, float>> choice;
497  int id = unichar_ids[bestPos];
498  const char *result = unicharset->id_to_unichar_ext(id);
499  float rating = ratings[bestPos];
500  choice.emplace_back(result, rating);
501  ctc_choices.push_back(choice);
502  }
503  // fill the blank spot with an empty array
504  } else {
505  if (j - 1 >= excludedUnichars.size()) {
506  std::unordered_set<int> excludeCodeList;
507  excludedUnichars.push_back(excludeCodeList);
508  }
509  if (j - 1 >= ctc_choices.size()) {
510  std::vector<std::pair<const char *, float>> choice;
511  ctc_choices.push_back(choice);
512  }
513  }
514  }
515  for (auto data : secondary_beam_) {
516  delete data;
517  }
518  secondary_beam_.clear();
519 }
std::vector< std::vector< std::pair< const char *, float > > > ctc_choices
Definition: recodebeam.h:237

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 259 of file recodebeam.h.

259  {
260  return index / (kNumLengths * NC_COUNT) > 0;
261  }

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 253 of file recodebeam.h.

253  {
254  return index % kNumLengths;
255  }

◆ PrintBeam2()

void tesseract::RecodeBeamSearch::PrintBeam2 ( bool  uids,
int  num_outputs,
const UNICHARSET charset,
bool  secondary 
) const

Definition at line 336 of file recodebeam.cpp.

338  {
339  std::vector<std::vector<const RecodeNode *>> topology;
340  std::unordered_set<const RecodeNode *> visited;
341  const std::vector<RecodeBeam *> &beam = !secondary ? beam_ : secondary_beam_;
342  // create the topology
343  for (int step = beam.size() - 1; step >= 0; --step) {
344  std::vector<const RecodeNode *> layer;
345  topology.push_back(layer);
346  }
347  // fill the topology with depths first
348  for (int step = beam.size() - 1; step >= 0; --step) {
349  std::vector<tesseract::RecodePair> &heaps = beam.at(step)->beams_->heap();
350  for (auto node : heaps) {
351  int backtracker = 0;
352  const RecodeNode *curr = &node.data();
353  while (curr != nullptr && !visited.count(curr)) {
354  visited.insert(curr);
355  topology[step - backtracker].push_back(curr);
356  curr = curr->prev;
357  ++backtracker;
358  }
359  }
360  }
361  int ct = 0;
362  unsigned cb = 1;
363  for (const std::vector<const RecodeNode *> &layer : topology) {
364  if (cb >= character_boundaries_.size()) {
365  break;
366  }
367  if (ct == character_boundaries_[cb]) {
368  tprintf("***\n");
369  ++cb;
370  }
371  for (const RecodeNode *node : layer) {
372  const char *code;
373  int intCode;
374  if (node->unichar_id != INVALID_UNICHAR_ID) {
375  code = charset->id_to_unichar(node->unichar_id);
376  intCode = node->unichar_id;
377  } else if (node->code == null_char_) {
378  intCode = 0;
379  code = " ";
380  } else {
381  intCode = 666;
382  code = "*";
383  }
384  int intPrevCode = 0;
385  const char *prevCode;
386  float prevScore = 0;
387  if (node->prev != nullptr) {
388  prevScore = node->prev->score;
389  if (node->prev->unichar_id != INVALID_UNICHAR_ID) {
390  prevCode = charset->id_to_unichar(node->prev->unichar_id);
391  intPrevCode = node->prev->unichar_id;
392  } else if (node->code == null_char_) {
393  intPrevCode = 0;
394  prevCode = " ";
395  } else {
396  prevCode = "*";
397  intPrevCode = 666;
398  }
399  } else {
400  prevCode = " ";
401  }
402  if (uids) {
403  tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode,
404  node->score);
405  } else {
406  tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
407  }
408  }
409  tprintf("-\n");
410  ++ct;
411  }
412  tprintf("***\n");
413 }

◆ segmentTimestepsByCharacters()

void tesseract::RecodeBeamSearch::segmentTimestepsByCharacters ( )

Definition at line 170 of file recodebeam.cpp.

170  {
171  for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
172  std::vector<std::vector<std::pair<const char *, float>>> segment;
173  for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i];
174  ++j) {
175  segment.push_back(timesteps[j]);
176  }
177  segmentedTimesteps.push_back(segment);
178  }
179 }

Member Data Documentation

◆ character_boundaries_

std::vector<int> tesseract::RecodeBeamSearch::character_boundaries_

Definition at line 241 of file recodebeam.h.

◆ ctc_choices

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::ctc_choices

Definition at line 237 of file recodebeam.h.

◆ excludedUnichars

std::vector<std::unordered_set<int> > tesseract::RecodeBeamSearch::excludedUnichars

Definition at line 239 of file recodebeam.h.

◆ kMinCertainty

constexpr float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
staticconstexpr

Definition at line 246 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 251 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 248 of file recodebeam.h.

◆ segmentedTimesteps

std::vector<std::vector<std::vector<std::pair<const char *, float> > > > tesseract::RecodeBeamSearch::segmentedTimesteps

Definition at line 235 of file recodebeam.h.

◆ timesteps

std::vector<std::vector<std::pair<const char *, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 234 of file recodebeam.h.


The documentation for this class was generated from the following files: