19 #ifndef TESSERACT_CCUTIL_UNICHAR_H_
20 #define TESSERACT_CCUTIL_UNICHAR_H_
33 #define UNICHAR_LEN 30
39 static const int INVALID_UNICHAR_ID = -1;
41 static const char INVALID_UNICHAR[] =
"__INVALID_UNICHAR__";
66 UNICHAR(
const char *utf8_str,
int len);
74 int first_uni()
const;
88 char *utf8_str()
const;
91 static int utf8_step(
const char *utf8_str);
127 int get_utf8(
char *buf)
const;
130 int utf8_len()
const;
132 bool is_legal()
const;
141 return lhs.it_ == rhs.it_;
144 return !(lhs == rhs);
157 static const_iterator begin(
const char *utf8_str,
int byte_length);
158 static const_iterator end(
const char *utf8_str,
int byte_length);
162 static std::vector<char32> UTF8ToUTF32(
const char *utf8_str);
165 static std::string UTF32ToUTF8(
const std::vector<char32> &str32);
int32_t operator*(const ICOORD &op1, const ICOORD &op2)
const char * utf8() const
friend bool operator!=(const CI &lhs, const CI &rhs)
const char * utf8_data() const
friend bool operator==(const CI &lhs, const CI &rhs)