tesseract  5.0.0
tesseract::UnicodeSpanSkipper Class Reference

Public Member Functions

 UnicodeSpanSkipper (const UNICHARSET *unicharset, const WERD_CHOICE *word)
 
unsigned SkipPunc (unsigned pos)
 
unsigned SkipDigits (unsigned pos)
 
unsigned SkipRomans (unsigned pos)
 
unsigned SkipAlpha (unsigned pos)
 

Detailed Description

Definition at line 298 of file paragraphs.cpp.

Constructor & Destructor Documentation

◆ UnicodeSpanSkipper()

tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper ( const UNICHARSET unicharset,
const WERD_CHOICE word 
)
inline

Definition at line 300 of file paragraphs.cpp.

301  : u_(unicharset), word_(word), wordlen_(word->length()) {
302  }

Member Function Documentation

◆ SkipAlpha()

unsigned tesseract::UnicodeSpanSkipper::SkipAlpha ( unsigned  pos)

Definition at line 346 of file paragraphs.cpp.

346  {
347  while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) {
348  pos++;
349  }
350  return pos;
351 }
UNICHAR_ID unichar_id(unsigned index) const
Definition: ratngs.h:295

◆ SkipDigits()

unsigned tesseract::UnicodeSpanSkipper::SkipDigits ( unsigned  pos)

Definition at line 326 of file paragraphs.cpp.

326  {
327  while (pos < wordlen_ &&
328  (u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos)))) {
329  pos++;
330  }
331  return pos;
332 }
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:524

◆ SkipPunc()

unsigned tesseract::UnicodeSpanSkipper::SkipPunc ( unsigned  pos)

Definition at line 319 of file paragraphs.cpp.

319  {
320  while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) {
321  pos++;
322  }
323  return pos;
324 }

◆ SkipRomans()

unsigned tesseract::UnicodeSpanSkipper::SkipRomans ( unsigned  pos)

Definition at line 334 of file paragraphs.cpp.

334  {
335  const char *kRomans = "ivxlmdIVXLMD";
336  while (pos < wordlen_) {
337  int ch = UnicodeFor(u_, word_, pos);
338  if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) {
339  break;
340  }
341  pos++;
342  }
343  return pos;
344 }

The documentation for this class was generated from the following file: