tesseract  5.0.0
UnicodeText::const_iterator Class Reference

#include <unicodetext.h>

Public Types

typedef std::bidirectional_iterator_tag iterator_category
 
typedef char32 value_type
 
typedef ptrdiff_t difference_type
 
typedef void pointer
 
typedef const char32 reference
 

Public Member Functions

 const_iterator ()
 
 const_iterator (const const_iterator &other)
 
const_iteratoroperator= (const const_iterator &other)
 
char32 operator* () const
 
const_iteratoroperator++ ()
 
const_iterator operator++ (int)
 
const_iteratoroperator-- ()
 
const_iterator operator-- (int)
 
int get_utf8 (char *buf) const
 
string get_utf8_string () const
 
int utf8_length () const
 
const char * utf8_data () const
 
string DebugString () const
 

Friends

class UnicodeText
 
class UnicodeTextUtils
 
class UTF8StateTableProperty
 
bool operator== (const CI &lhs, const CI &rhs)
 
bool operator!= (const CI &lhs, const CI &rhs)
 
bool operator< (const CI &lhs, const CI &rhs)
 
bool operator> (const CI &lhs, const CI &rhs)
 
bool operator<= (const CI &lhs, const CI &rhs)
 
bool operator>= (const CI &lhs, const CI &rhs)
 
difference_type distance (const CI &first, const CI &last)
 

Detailed Description

Definition at line 181 of file unicodetext.h.

Member Typedef Documentation

◆ difference_type

Definition at line 187 of file unicodetext.h.

◆ iterator_category

typedef std::bidirectional_iterator_tag UnicodeText::const_iterator::iterator_category

Definition at line 185 of file unicodetext.h.

◆ pointer

Definition at line 188 of file unicodetext.h.

◆ reference

Definition at line 189 of file unicodetext.h.

◆ value_type

Definition at line 186 of file unicodetext.h.

Constructor & Destructor Documentation

◆ const_iterator() [1/2]

UnicodeText::const_iterator::const_iterator ( )

Definition at line 398 of file unicodetext.cc.

398 : it_(nullptr) {}

◆ const_iterator() [2/2]

UnicodeText::const_iterator::const_iterator ( const const_iterator other)

Definition at line 400 of file unicodetext.cc.

400 : it_(other.it_) {}

Member Function Documentation

◆ DebugString()

string UnicodeText::const_iterator::DebugString ( ) const

◆ get_utf8()

int UnicodeText::const_iterator::get_utf8 ( char *  buf) const

Definition at line 454 of file unicodetext.cc.

454  {
455  utf8_output[0] = it_[0];
456  if ((it_[0] & 0xff) < 0x80)
457  return 1;
458  utf8_output[1] = it_[1];
459  if ((it_[0] & 0xff) < 0xE0)
460  return 2;
461  utf8_output[2] = it_[2];
462  if ((it_[0] & 0xff) < 0xF0)
463  return 3;
464  utf8_output[3] = it_[3];
465  return 4;
466 }

◆ get_utf8_string()

string UnicodeText::const_iterator::get_utf8_string ( ) const

Definition at line 468 of file unicodetext.cc.

468  {
469  return string(utf8_data(), utf8_length());
470 }
const char * utf8_data() const
Definition: unicodetext.h:244

◆ operator*()

char32 UnicodeText::const_iterator::operator* ( ) const

Definition at line 420 of file unicodetext.cc.

420  {
421  // (We could call chartorune here, but that does some
422  // error-checking, and we're guaranteed that our data is valid
423  // UTF-8. Also, we expect this routine to be called very often. So
424  // for speed, we do the calculation ourselves.)
425 
426  // Convert from UTF-8
427  unsigned char byte1 = it_[0];
428  if (byte1 < 0x80)
429  return byte1;
430 
431  unsigned char byte2 = it_[1];
432  if (byte1 < 0xE0)
433  return ((byte1 & 0x1F) << 6) | (byte2 & 0x3F);
434 
435  unsigned char byte3 = it_[2];
436  if (byte1 < 0xF0)
437  return ((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F);
438 
439  unsigned char byte4 = it_[3];
440  return ((byte1 & 0x07) << 18) | ((byte2 & 0x3F) << 12) | ((byte3 & 0x3F) << 6) | (byte4 & 0x3F);
441 }

◆ operator++() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator++ ( )

Definition at line 443 of file unicodetext.cc.

443  {
444  it_ += UniLib::OneCharLen(it_);
445  return *this;
446 }
int OneCharLen(const char *src)

◆ operator++() [2/2]

const_iterator UnicodeText::const_iterator::operator++ ( int  )
inline

Definition at line 201 of file unicodetext.h.

201  { // (iter++)
202  const_iterator result(*this);
203  ++*this;
204  return result;
205  }

◆ operator--() [1/2]

UnicodeText::const_iterator & UnicodeText::const_iterator::operator-- ( )

Definition at line 448 of file unicodetext.cc.

448  {
449  while (UniLib::IsTrailByte(*--it_))
450  ;
451  return *this;
452 }
bool IsTrailByte(char x)

◆ operator--() [2/2]

const_iterator UnicodeText::const_iterator::operator-- ( int  )
inline

Definition at line 208 of file unicodetext.h.

208  { // (iter--)
209  const_iterator result(*this);
210  --*this;
211  return result;
212  }

◆ operator=()

UnicodeText::const_iterator & UnicodeText::const_iterator::operator= ( const const_iterator other)

Definition at line 402 of file unicodetext.cc.

402  {
403  if (&other != this)
404  it_ = other.it_;
405  return *this;
406 }

◆ utf8_data()

const char* UnicodeText::const_iterator::utf8_data ( ) const
inline

Definition at line 244 of file unicodetext.h.

244  {
245  return it_;
246  }

◆ utf8_length()

int UnicodeText::const_iterator::utf8_length ( ) const

Definition at line 472 of file unicodetext.cc.

472  {
473  if ((it_[0] & 0xff) < 0x80) {
474  return 1;
475  } else if ((it_[0] & 0xff) < 0xE0) {
476  return 2;
477  } else if ((it_[0] & 0xff) < 0xF0) {
478  return 3;
479  } else {
480  return 4;
481  }
482 }

Friends And Related Function Documentation

◆ distance

difference_type distance ( const CI first,
const CI last 
)
friend

Definition at line 44 of file unicodetext.cc.

45  {
46  return CodepointDistance(first.it_, last.it_);
47 }
LIST last(LIST var_list)
Definition: oldlist.cpp:153

◆ operator!=

bool operator!= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 218 of file unicodetext.h.

218  {
219  return !(lhs == rhs);
220  }

◆ operator<

bool operator< ( const CI lhs,
const CI rhs 
)
friend

Definition at line 416 of file unicodetext.cc.

416  {
417  return lhs.it_ < rhs.it_;
418 }

◆ operator<=

bool operator<= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 225 of file unicodetext.h.

225  {
226  return !(rhs < lhs);
227  }

◆ operator==

bool operator== ( const CI lhs,
const CI rhs 
)
friend

Definition at line 215 of file unicodetext.h.

215  {
216  return lhs.it_ == rhs.it_;
217  }

◆ operator>

bool operator> ( const CI lhs,
const CI rhs 
)
friend

Definition at line 222 of file unicodetext.h.

222  {
223  return rhs < lhs;
224  }

◆ operator>=

bool operator>= ( const CI lhs,
const CI rhs 
)
friend

Definition at line 228 of file unicodetext.h.

228  {
229  return !(lhs < rhs);
230  }

◆ UnicodeText

friend class UnicodeText
friend

Definition at line 251 of file unicodetext.h.

◆ UnicodeTextUtils

friend class UnicodeTextUtils
friend

Definition at line 252 of file unicodetext.h.

◆ UTF8StateTableProperty

friend class UTF8StateTableProperty
friend

Definition at line 253 of file unicodetext.h.


The documentation for this class was generated from the following files: