tesseract  5.0.0
tesseract::WERD Class Reference

#include <werd.h>

Inheritance diagram for tesseract::WERD:
tesseract::ELIST2_LINK

Public Member Functions

 WERD ()=default
 
 WERD (C_BLOB_LIST *blob_list, uint8_t blanks, const char *text)
 
 WERD (C_BLOB_LIST *blob_list, WERD *clone)
 
WERDConstructFromSingleBlob (bool bol, bool eol, C_BLOB *blob)
 
 ~WERD ()=default
 
WERDoperator= (const WERD &source)
 
WERDConstructWerdWithNewBlobs (C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
 
C_BLOB_LIST * rej_cblob_list ()
 
C_BLOB_LIST * cblob_list ()
 
uint8_t space () const
 
void set_blanks (uint8_t new_blanks)
 
int script_id () const
 
void set_script_id (int id)
 
TBOX bounding_box () const
 
TBOX restricted_bounding_box (bool upper_dots, bool lower_dots) const
 
TBOX true_bounding_box () const
 
const char * text () const
 
void set_text (const char *new_text)
 
bool flag (WERD_FLAGS mask) const
 
void set_flag (WERD_FLAGS mask, bool value)
 
bool display_flag (uint8_t flag) const
 
void set_display_flag (uint8_t flag, bool value)
 
WERDshallow_copy ()
 
void move (const ICOORD vec)
 
void join_on (WERD *other)
 
void copy_on (WERD *other)
 
void print () const
 
void plot (ScrollView *window, ScrollView::Color colour)
 
void plot (ScrollView *window)
 
void plot_rej_blobs (ScrollView *window)
 
void CleanNoise (float size_threshold)
 
void GetNoiseOutlines (std::vector< C_OUTLINE * > *outlines)
 
bool AddSelectedOutlines (const std::vector< bool > &wanted, const std::vector< C_BLOB * > &target_blobs, const std::vector< C_OUTLINE * > &outlines, bool *make_next_word_fuzzy)
 
- Public Member Functions inherited from tesseract::ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)=delete
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ScrollView::Color NextColor (ScrollView::Color colour)
 

Detailed Description

Definition at line 58 of file werd.h.

Constructor & Destructor Documentation

◆ WERD() [1/3]

tesseract::WERD::WERD ( )
default

◆ WERD() [2/3]

tesseract::WERD::WERD ( C_BLOB_LIST *  blob_list,
uint8_t  blank_count,
const char *  text 
)

WERD::WERD

Constructor to build a WERD from a list of C_BLOBs. blob_list The C_BLOBs (in word order) are not copied; we take its elements and put them in our lists. blank_count blanks in front of the word text correct text, outlives this WERD

Definition at line 45 of file werd.cpp.

46  : blanks(blank_count), flags(0), script_id_(0), correct(text ? text : "") {
47  C_BLOB_IT start_it = &cblobs;
48  C_BLOB_IT rej_cblob_it = &rej_cblobs;
49  C_OUTLINE_IT c_outline_it;
50  int16_t inverted_vote = 0;
51  int16_t non_inverted_vote = 0;
52 
53  // Move blob_list's elements into cblobs.
54  start_it.add_list_after(blob_list);
55 
56  /*
57  Set white on black flag for the WERD, moving any duff blobs onto the
58  rej_cblobs list.
59  First, walk the cblobs checking the inverse flag for each outline of each
60  cblob. If a cblob has inconsistent flag settings for its different
61  outlines, move the blob to the reject list. Otherwise, increment the
62  appropriate w-on-b or b-on-w vote for the word.
63 
64  Now set the inversion flag for the WERD by maximum vote.
65 
66  Walk the blobs again, moving any blob whose inversion flag does not agree
67  with the concencus onto the reject list.
68 */
69  start_it.set_to_list(&cblobs);
70  if (start_it.empty()) {
71  return;
72  }
73  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
74  bool reject_blob = false;
75  bool blob_inverted;
76 
77  c_outline_it.set_to_list(start_it.data()->out_list());
78  blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
79  for (c_outline_it.mark_cycle_pt(); !c_outline_it.cycled_list() && !reject_blob;
80  c_outline_it.forward()) {
81  reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
82  }
83  if (reject_blob) {
84  rej_cblob_it.add_after_then_move(start_it.extract());
85  } else {
86  if (blob_inverted) {
87  inverted_vote++;
88  } else {
89  non_inverted_vote++;
90  }
91  }
92  }
93 
94  flags.set(W_INVERSE, (inverted_vote > non_inverted_vote));
95 
96  start_it.set_to_list(&cblobs);
97  if (start_it.empty()) {
98  return;
99  }
100  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
101  c_outline_it.set_to_list(start_it.data()->out_list());
102  if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE]) {
103  rej_cblob_it.add_after_then_move(start_it.extract());
104  }
105  }
106 }
@ W_INVERSE
white on black
Definition: werd.h:43
@ COUT_INVERSE
Definition: coutln.h:46
const char * text() const
Definition: werd.h:121

◆ WERD() [3/3]

tesseract::WERD::WERD ( C_BLOB_LIST *  blob_list,
WERD clone 
)

WERD::WERD

Constructor to build a WERD from a list of C_BLOBs. The C_BLOBs are not copied so the source list is emptied.

Parameters
blob_listIn word order
cloneSource of flags

Definition at line 115 of file werd.cpp.

117  : flags(clone->flags), script_id_(clone->script_id_), correct(clone->correct) {
118  C_BLOB_IT start_it = blob_list; // iterator
119  C_BLOB_IT end_it = blob_list; // another
120 
121  while (!end_it.at_last()) {
122  end_it.forward(); // move to last
123  }
124  cblobs.assign_to_sublist(&start_it, &end_it);
125  // move to our list
126  blanks = clone->blanks;
127  // fprintf(stderr,"Wrong constructor!!!!\n");
128 }

◆ ~WERD()

tesseract::WERD::~WERD ( )
default

Member Function Documentation

◆ AddSelectedOutlines()

bool tesseract::WERD::AddSelectedOutlines ( const std::vector< bool > &  wanted,
const std::vector< C_BLOB * > &  target_blobs,
const std::vector< C_OUTLINE * > &  outlines,
bool *  make_next_word_fuzzy 
)

Definition at line 526 of file werd.cpp.

529  {
530  bool outline_added_to_start = false;
531  if (make_next_word_fuzzy != nullptr) {
532  *make_next_word_fuzzy = false;
533  }
534  C_BLOB_IT rej_it(&rej_cblobs);
535  for (unsigned i = 0; i < outlines.size(); ++i) {
536  C_OUTLINE *outline = outlines[i];
537  if (outline == nullptr) {
538  continue; // Already used it.
539  }
540  if (wanted[i]) {
541  C_BLOB *target_blob = target_blobs[i];
542  TBOX noise_box = outline->bounding_box();
543  if (target_blob == nullptr) {
544  target_blob = new C_BLOB(outline);
545  // Need to find the insertion point.
546  C_BLOB_IT blob_it(&cblobs);
547  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
548  C_BLOB *blob = blob_it.data();
549  TBOX blob_box = blob->bounding_box();
550  if (blob_box.left() > noise_box.left()) {
551  if (blob_it.at_first() && !flag(W_FUZZY_SP) && !flag(W_FUZZY_NON)) {
552  // We might want to join this word to its predecessor.
553  outline_added_to_start = true;
554  }
555  blob_it.add_before_stay_put(target_blob);
556  break;
557  }
558  }
559  if (blob_it.cycled_list()) {
560  blob_it.add_to_end(target_blob);
561  if (make_next_word_fuzzy != nullptr) {
562  *make_next_word_fuzzy = true;
563  }
564  }
565  // Add all consecutive wanted, but null-blob outlines to same blob.
566  C_OUTLINE_IT ol_it(target_blob->out_list());
567  while (i + 1 < outlines.size() && wanted[i + 1] && target_blobs[i + 1] == nullptr) {
568  ++i;
569  ol_it.add_to_end(outlines[i]);
570  }
571  } else {
572  // Insert outline into this blob.
573  C_OUTLINE_IT ol_it(target_blob->out_list());
574  ol_it.add_to_end(outline);
575  }
576  } else {
577  // Put back on noise list.
578  rej_it.add_to_end(new C_BLOB(outline));
579  }
580  }
581  return outline_added_to_start;
582 }
@ TBOX
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:41
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:42
bool flag(WERD_FLAGS mask) const
Definition: werd.h:128

◆ bounding_box()

TBOX tesseract::WERD::bounding_box ( ) const

WERD::bounding_box

Return the bounding box of the WERD. This is quite a mess to compute! ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the words on the row were re-sorted. The original words were built with reject blobs included. The FUZZY SPACE flags were set accordingly. If ALL the blobs in a word are rejected the BB for the word is nullptr, causing the sort to screw up, leading to the erroneous possibility of the first word in a row being marked as FUZZY space.

Definition at line 155 of file werd.cpp.

155  {
156  return restricted_bounding_box(true, true);
157 }
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:161

◆ cblob_list()

C_BLOB_LIST* tesseract::WERD::cblob_list ( )
inline

Definition at line 96 of file werd.h.

96  { // get compact blobs
97  return &cblobs;
98  }

◆ CleanNoise()

void tesseract::WERD::CleanNoise ( float  size_threshold)

Definition at line 483 of file werd.cpp.

483  {
484  C_BLOB_IT blob_it(&cblobs);
485  C_BLOB_IT rej_it(&rej_cblobs);
486  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
487  C_BLOB *blob = blob_it.data();
488  C_OUTLINE_IT ol_it(blob->out_list());
489  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
490  C_OUTLINE *outline = ol_it.data();
491  TBOX ol_box = outline->bounding_box();
492  int ol_size = ol_box.width() > ol_box.height() ? ol_box.width() : ol_box.height();
493  if (ol_size < size_threshold) {
494  // This outline is too small. Move it to a separate blob in the
495  // reject blobs list.
496  auto *rej_blob = new C_BLOB(ol_it.extract());
497  rej_it.add_after_then_move(rej_blob);
498  }
499  }
500  if (blob->out_list()->empty()) {
501  delete blob_it.extract();
502  }
503  }
504 }

◆ ConstructFromSingleBlob()

WERD * tesseract::WERD::ConstructFromSingleBlob ( bool  bol,
bool  eol,
C_BLOB blob 
)

Definition at line 132 of file werd.cpp.

132  {
133  C_BLOB_LIST temp_blobs;
134  C_BLOB_IT temp_it(&temp_blobs);
135  temp_it.add_after_then_move(blob);
136  WERD *blob_word = new WERD(&temp_blobs, this);
137  blob_word->set_flag(W_BOL, bol);
138  blob_word->set_flag(W_EOL, eol);
139  return blob_word;
140 }
@ W_BOL
start of line
Definition: werd.h:34
@ W_EOL
end of line
Definition: werd.h:35
WERD()=default

◆ ConstructWerdWithNewBlobs()

WERD * tesseract::WERD::ConstructWerdWithNewBlobs ( C_BLOB_LIST *  all_blobs,
C_BLOB_LIST *  orphan_blobs 
)

WERD::ConstructWerdWithNewBlobs()

This method returns a new werd constructed using the blobs in the input all_blobs list, which correspond to the blobs in this werd object. The blobs used to construct the new word are consumed and removed from the input all_blobs list. Returns nullptr if the word couldn't be constructed. Returns original blobs for which no matches were found in the output list orphan_blobs (appends).

Definition at line 395 of file werd.cpp.

395  {
396  C_BLOB_LIST current_blob_list;
397  C_BLOB_IT werd_blobs_it(&current_blob_list);
398  // Add the word's c_blobs.
399  werd_blobs_it.add_list_after(cblob_list());
400 
401  // New blob list. These contain the blobs which will form the new word.
402  C_BLOB_LIST new_werd_blobs;
403  C_BLOB_IT new_blobs_it(&new_werd_blobs);
404 
405  // not_found_blobs contains the list of current word's blobs for which a
406  // corresponding blob wasn't found in the input all_blobs list.
407  C_BLOB_LIST not_found_blobs;
408  C_BLOB_IT not_found_it(&not_found_blobs);
409  not_found_it.move_to_last();
410 
411  werd_blobs_it.move_to_first();
412  for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list(); werd_blobs_it.forward()) {
413  C_BLOB *werd_blob = werd_blobs_it.extract();
414  TBOX werd_blob_box = werd_blob->bounding_box();
415  bool found = false;
416  // Now find the corresponding blob for this blob in the all_blobs
417  // list. For now, follow the inefficient method of pairwise
418  // comparisons. Ideally, one can pre-bucket the blobs by row.
419  C_BLOB_IT all_blobs_it(all_blobs);
420  for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
421  C_BLOB *a_blob = all_blobs_it.data();
422  // Compute the overlap of the two blobs. If major, a_blob should
423  // be added to the new blobs list.
424  TBOX a_blob_box = a_blob->bounding_box();
425  if (a_blob_box.null_box()) {
426  tprintf("Bounding box couldn't be ascertained\n");
427  }
428  if (werd_blob_box.contains(a_blob_box) || werd_blob_box.major_overlap(a_blob_box)) {
429  // Old blobs are from minimal splits, therefore are expected to be
430  // bigger. The new small blobs should cover a significant portion.
431  // This is it.
432  all_blobs_it.extract();
433  new_blobs_it.add_after_then_move(a_blob);
434  found = true;
435  }
436  }
437  if (!found) {
438  not_found_it.add_after_then_move(werd_blob);
439  } else {
440  delete werd_blob;
441  }
442  }
443  // Iterate over all not found blobs. Some of them may be due to
444  // under-segmentation (which is OK, since the corresponding blob is already
445  // in the list in that case.
446  not_found_it.move_to_first();
447  for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
448  C_BLOB *not_found = not_found_it.data();
449  TBOX not_found_box = not_found->bounding_box();
450  C_BLOB_IT existing_blobs_it(new_blobs_it);
451  for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
452  existing_blobs_it.forward()) {
453  C_BLOB *a_blob = existing_blobs_it.data();
454  TBOX a_blob_box = a_blob->bounding_box();
455  if ((not_found_box.major_overlap(a_blob_box) || a_blob_box.major_overlap(not_found_box)) &&
456  not_found_box.y_overlap_fraction(a_blob_box) > 0.8) {
457  // Already taken care of.
458  delete not_found_it.extract();
459  break;
460  }
461  }
462  }
463  if (orphan_blobs) {
464  C_BLOB_IT orphan_blobs_it(orphan_blobs);
465  orphan_blobs_it.move_to_last();
466  orphan_blobs_it.add_list_after(&not_found_blobs);
467  }
468 
469  // New blobs are ready. Create a new werd object with these.
470  WERD *new_werd = nullptr;
471  if (!new_werd_blobs.empty()) {
472  new_werd = new WERD(&new_werd_blobs, this);
473  } else {
474  // Add the blobs back to this word so that it can be reused.
475  C_BLOB_IT this_list_it(cblob_list());
476  this_list_it.add_list_after(&not_found_blobs);
477  }
478  return new_werd;
479 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
C_BLOB_LIST * cblob_list()
Definition: werd.h:96

◆ copy_on()

void tesseract::WERD::copy_on ( WERD other)

WERD::copy_on

Copy blobs from other word onto this one.

Definition at line 230 of file werd.cpp.

230  {
231  bool reversed = other->bounding_box().left() < bounding_box().left();
232  C_BLOB_IT c_blob_it(&cblobs);
233  C_BLOB_LIST c_blobs;
234 
235  c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
236  if (reversed) {
237  c_blob_it.add_list_before(&c_blobs);
238  } else {
239  c_blob_it.move_to_last();
240  c_blob_it.add_list_after(&c_blobs);
241  }
242  if (!other->rej_cblobs.empty()) {
243  C_BLOB_IT rej_c_blob_it(&rej_cblobs);
244  C_BLOB_LIST new_rej_c_blobs;
245 
246  new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
247  if (reversed) {
248  rej_c_blob_it.add_list_before(&new_rej_c_blobs);
249  } else {
250  rej_c_blob_it.move_to_last();
251  rej_c_blob_it.add_list_after(&new_rej_c_blobs);
252  }
253  }
254 }
TDimension left() const
Definition: rect.h:82
static C_BLOB * deep_copy(const C_BLOB *src)
Definition: stepblob.h:118
TBOX bounding_box() const
Definition: werd.cpp:155

◆ display_flag()

bool tesseract::WERD::display_flag ( uint8_t  flag) const
inline

Definition at line 135 of file werd.h.

135  {
136  return disp_flags[flag];
137  }

◆ flag()

bool tesseract::WERD::flag ( WERD_FLAGS  mask) const
inline

Definition at line 128 of file werd.h.

128  {
129  return flags[mask];
130  }

◆ GetNoiseOutlines()

void tesseract::WERD::GetNoiseOutlines ( std::vector< C_OUTLINE * > *  outlines)

Definition at line 508 of file werd.cpp.

508  {
509  C_BLOB_IT rej_it(&rej_cblobs);
510  for (rej_it.mark_cycle_pt(); !rej_it.empty(); rej_it.forward()) {
511  C_BLOB *blob = rej_it.extract();
512  C_OUTLINE_IT ol_it(blob->out_list());
513  outlines->push_back(ol_it.extract());
514  delete blob;
515  }
516 }

◆ join_on()

void tesseract::WERD::join_on ( WERD other)

WERD::join_on

Join other word onto this one. Delete the old word.

Definition at line 208 of file werd.cpp.

208  {
209  C_BLOB_IT blob_it(&cblobs);
210  C_BLOB_IT src_it(&other->cblobs);
211  C_BLOB_IT rej_cblob_it(&rej_cblobs);
212  C_BLOB_IT src_rej_it(&other->rej_cblobs);
213 
214  while (!src_it.empty()) {
215  blob_it.add_to_end(src_it.extract());
216  src_it.forward();
217  }
218  while (!src_rej_it.empty()) {
219  rej_cblob_it.add_to_end(src_rej_it.extract());
220  src_rej_it.forward();
221  }
222 }

◆ move()

void tesseract::WERD::move ( const ICOORD  vec)

WERD::move

Reposition WERD by vector NOTE!! REJECT CBLOBS ARE NOT MOVED

Definition at line 194 of file werd.cpp.

194  {
195  C_BLOB_IT cblob_it(&cblobs); // cblob iterator
196 
197  for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
198  cblob_it.data()->move(vec);
199  }
200 }

◆ NextColor()

ScrollView::Color tesseract::WERD::NextColor ( ScrollView::Color  colour)
static

Definition at line 298 of file werd.cpp.

298  {
299  auto next = static_cast<ScrollView::Color>(colour + 1);
300  if (next >= LAST_COLOUR || next < FIRST_COLOUR) {
301  next = FIRST_COLOUR;
302  }
303  return next;
304 }
#define FIRST_COLOUR
first rainbow colour
Definition: werd.cpp:32
#define LAST_COLOUR
last rainbow colour
Definition: werd.cpp:33

◆ operator=()

WERD & tesseract::WERD::operator= ( const WERD source)

WERD::operator=

Assign a word, DEEP copying the blob list

Definition at line 357 of file werd.cpp.

357  {
358  this->ELIST2_LINK::operator=(source);
359  blanks = source.blanks;
360  flags = source.flags;
361  script_id_ = source.script_id_;
362  correct = source.correct;
363  cblobs.clear();
364  cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
365  rej_cblobs.clear();
366  rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
367  return *this;
368 }
void operator=(const ELIST2_LINK &)
Definition: elst2.h:75

◆ plot() [1/2]

void tesseract::WERD::plot ( ScrollView window)

WERD::plot

Draw the WERD in rainbow colours in window.

Definition at line 312 of file werd.cpp.

312  {
314  C_BLOB_IT it = &cblobs;
315  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
316  it.data()->plot(window, colour, CHILD_COLOUR);
317  colour = NextColor(colour);
318  }
319  plot_rej_blobs(window);
320 }
#define CHILD_COLOUR
colour of children
Definition: werd.cpp:34
void plot_rej_blobs(ScrollView *window)
Definition: werd.cpp:328
static ScrollView::Color NextColor(ScrollView::Color colour)
Definition: werd.cpp:298

◆ plot() [2/2]

void tesseract::WERD::plot ( ScrollView window,
ScrollView::Color  colour 
)

WERD::plot

Draw the WERD in the given colour.

Definition at line 289 of file werd.cpp.

289  {
290  C_BLOB_IT it = &cblobs;
291  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
292  it.data()->plot(window, colour, colour);
293  }
294  plot_rej_blobs(window);
295 }

◆ plot_rej_blobs()

void tesseract::WERD::plot_rej_blobs ( ScrollView window)

WERD::plot_rej_blobs

Draw the WERD rejected blobs in window - ALWAYS GREY

Definition at line 328 of file werd.cpp.

328  {
329  C_BLOB_IT it = &rej_cblobs;
330  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
331  it.data()->plot(window, ScrollView::GREY, ScrollView::GREY);
332  }
333 }

◆ print()

void tesseract::WERD::print ( ) const

WERD::print

Display members

Definition at line 262 of file werd.cpp.

262  {
263  tprintf("Blanks= %d\n", blanks);
264  bounding_box().print();
265  tprintf("Flags = %lu = 0%lo\n", flags.to_ulong(), flags.to_ulong());
266  tprintf(" W_SEGMENTED = %s\n", flags[W_SEGMENTED] ? "TRUE" : "FALSE");
267  tprintf(" W_ITALIC = %s\n", flags[W_ITALIC] ? "TRUE" : "FALSE");
268  tprintf(" W_BOL = %s\n", flags[W_BOL] ? "TRUE" : "FALSE");
269  tprintf(" W_EOL = %s\n", flags[W_EOL] ? "TRUE" : "FALSE");
270  tprintf(" W_NORMALIZED = %s\n", flags[W_NORMALIZED] ? "TRUE" : "FALSE");
271  tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n", flags[W_SCRIPT_HAS_XHEIGHT] ? "TRUE" : "FALSE");
272  tprintf(" W_SCRIPT_IS_LATIN = %s\n", flags[W_SCRIPT_IS_LATIN] ? "TRUE" : "FALSE");
273  tprintf(" W_DONT_CHOP = %s\n", flags[W_DONT_CHOP] ? "TRUE" : "FALSE");
274  tprintf(" W_REP_CHAR = %s\n", flags[W_REP_CHAR] ? "TRUE" : "FALSE");
275  tprintf(" W_FUZZY_SP = %s\n", flags[W_FUZZY_SP] ? "TRUE" : "FALSE");
276  tprintf(" W_FUZZY_NON = %s\n", flags[W_FUZZY_NON] ? "TRUE" : "FALSE");
277  tprintf("Correct= %s\n", correct.c_str());
278  tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
279  tprintf("Script = %d\n", script_id_);
280 }
@ W_NORMALIZED
flags
Definition: werd.h:36
@ W_ITALIC
italic text
Definition: werd.h:32
@ W_SEGMENTED
correctly segmented
Definition: werd.h:31
@ W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
Definition: werd.h:37
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:38
@ W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:39
@ W_REP_CHAR
repeated character
Definition: werd.h:40
void print() const
Definition: rect.h:289

◆ rej_cblob_list()

C_BLOB_LIST* tesseract::WERD::rej_cblob_list ( )
inline

Definition at line 91 of file werd.h.

91  { // compact format
92  return &rej_cblobs;
93  }

◆ restricted_bounding_box()

TBOX tesseract::WERD::restricted_bounding_box ( bool  upper_dots,
bool  lower_dots 
) const

Definition at line 161 of file werd.cpp.

161  {
162  TBOX box = true_bounding_box();
163  int bottom = box.bottom();
164  int top = box.top();
165  // This is a read-only iteration of the rejected blobs.
166  C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&rej_cblobs));
167  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
168  TBOX dot_box = it.data()->bounding_box();
169  if ((upper_dots || dot_box.bottom() <= top) && (lower_dots || dot_box.top() >= bottom)) {
170  box += dot_box;
171  }
172  }
173  return box;
174 }
TBOX true_bounding_box() const
Definition: werd.cpp:177

◆ script_id()

int tesseract::WERD::script_id ( ) const
inline

Definition at line 106 of file werd.h.

106  {
107  return script_id_;
108  }

◆ set_blanks()

void tesseract::WERD::set_blanks ( uint8_t  new_blanks)
inline

Definition at line 103 of file werd.h.

103  {
104  blanks = new_blanks;
105  }

◆ set_display_flag()

void tesseract::WERD::set_display_flag ( uint8_t  flag,
bool  value 
)
inline

Definition at line 138 of file werd.h.

138  {
139  disp_flags.set(flag, value);
140  }

◆ set_flag()

void tesseract::WERD::set_flag ( WERD_FLAGS  mask,
bool  value 
)
inline

Definition at line 131 of file werd.h.

131  {
132  flags.set(mask, value);
133  }

◆ set_script_id()

void tesseract::WERD::set_script_id ( int  id)
inline

Definition at line 109 of file werd.h.

109  {
110  script_id_ = id;
111  }

◆ set_text()

void tesseract::WERD::set_text ( const char *  new_text)
inline

Definition at line 124 of file werd.h.

124  {
125  correct = new_text;
126  }

◆ shallow_copy()

WERD * tesseract::WERD::shallow_copy ( )

WERD::shallow_copy()

Make a shallow copy of a word

Definition at line 342 of file werd.cpp.

342  {
343  WERD *new_word = new WERD;
344 
345  new_word->blanks = blanks;
346  new_word->flags = flags;
347  new_word->correct = correct;
348  return new_word;
349 }

◆ space()

uint8_t tesseract::WERD::space ( ) const
inline

Definition at line 100 of file werd.h.

100  { // access function
101  return blanks;
102  }

◆ text()

const char* tesseract::WERD::text ( ) const
inline

Definition at line 121 of file werd.h.

121  {
122  return correct.c_str();
123  }

◆ true_bounding_box()

TBOX tesseract::WERD::true_bounding_box ( ) const

Definition at line 177 of file werd.cpp.

177  {
178  TBOX box; // box being built
179  // This is a read-only iteration of the good blobs.
180  C_BLOB_IT it(const_cast<C_BLOB_LIST *>(&cblobs));
181  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
182  box += it.data()->bounding_box();
183  }
184  return box;
185 }

The documentation for this class was generated from the following files: