tesseract  5.0.0
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

Public Types

enum  FontTypeEnum { UNKNOWN , SERIF , SANS_SERIF , DECORATIVE }
 

Public Member Functions

 PangoFontInfo ()
 
 ~PangoFontInfo ()
 
 PangoFontInfo (const std::string &name)
 
bool ParseFontDescriptionName (const std::string &name)
 
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
 
int DropUncoveredChars (std::string *utf8_text) const
 
bool CanRenderString (const char *utf8_word, int len, std::vector< std::string > *graphemes) const
 
bool CanRenderString (const char *utf8_word, int len) const
 
bool GetSpacingProperties (const std::string &utf8_char, int *x_bearing, int *x_advance) const
 
std::string DescriptionName () const
 
const std::string & family_name () const
 
int font_size () const
 
FontTypeEnum font_type () const
 
int resolution () const
 
void set_resolution (const int resolution)
 

Static Public Member Functions

static void SoftInitFontConfig ()
 
static void HardInitFontConfig (const char *fonts_dir, const char *cache_dir)
 

Friends

class FontUtils
 

Detailed Description

Definition at line 42 of file pango_font_info.h.

Member Enumeration Documentation

◆ FontTypeEnum

Enumerator
UNKNOWN 
SERIF 
SANS_SERIF 
DECORATIVE 

Definition at line 44 of file pango_font_info.h.

Constructor & Destructor Documentation

◆ PangoFontInfo() [1/2]

tesseract::PangoFontInfo::PangoFontInfo ( )

Definition at line 76 of file pango_font_info.cpp.

76  : desc_(nullptr), resolution_(kDefaultResolution) {
77  Clear();
78 }
const int kDefaultResolution

◆ ~PangoFontInfo()

tesseract::PangoFontInfo::~PangoFontInfo ( )

Definition at line 98 of file pango_font_info.cpp.

98  {
99  pango_font_description_free(desc_);
100 }

◆ PangoFontInfo() [2/2]

tesseract::PangoFontInfo::PangoFontInfo ( const std::string &  name)
explicit

Definition at line 80 of file pango_font_info.cpp.

81  : desc_(nullptr), resolution_(kDefaultResolution) {
82  if (!ParseFontDescriptionName(desc)) {
83  tprintf("ERROR: Could not parse %s\n", desc.c_str());
84  Clear();
85  }
86 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool ParseFontDescriptionName(const std::string &name)

Member Function Documentation

◆ CanRenderString() [1/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 352 of file pango_font_info.cpp.

352  {
353  std::vector<std::string> graphemes;
354  return CanRenderString(utf8_word, len, &graphemes);
355 }
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const

◆ CanRenderString() [2/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
std::vector< std::string > *  graphemes 
) const

Definition at line 357 of file pango_font_info.cpp.

358  {
359  if (graphemes) {
360  graphemes->clear();
361  }
362  // We check for font coverage of the text first, as otherwise Pango could
363  // (undesirably) fall back to another font that does have the required
364  // coverage.
365  if (!CoversUTF8Text(utf8_word, len)) {
366  return false;
367  }
368  // U+25CC dotted circle character that often (but not always) gets rendered
369  // when there is an illegal grapheme sequence.
370  const char32 kDottedCircleGlyph = 9676;
371  bool bad_glyph = false;
372  PangoFontMap *font_map = pango_cairo_font_map_get_default();
373  PangoContext *context = pango_context_new();
374  pango_context_set_font_map(context, font_map);
375  PangoLayout *layout;
376  {
377  // Pango is not releasing the cached layout.
379  layout = pango_layout_new(context);
380  }
381  if (desc_) {
382  pango_layout_set_font_description(layout, desc_);
383  } else {
384  PangoFontDescription *desc = pango_font_description_from_string(DescriptionName().c_str());
385  pango_layout_set_font_description(layout, desc);
386  pango_font_description_free(desc);
387  }
388  pango_layout_set_text(layout, utf8_word, len);
389  PangoLayoutIter *run_iter = nullptr;
390  { // Fontconfig caches some information here that is not freed before exit.
392  run_iter = pango_layout_get_iter(layout);
393  }
394  do {
395  PangoLayoutRun *run = pango_layout_iter_get_run_readonly(run_iter);
396  if (!run) {
397  tlog(2, "Found end of line nullptr run marker\n");
398  continue;
399  }
400  PangoGlyph dotted_circle_glyph;
401  PangoFont *font = run->item->analysis.font;
402 
403  dotted_circle_glyph = get_glyph(font, kDottedCircleGlyph);
404 
405  if (TLOG_IS_ON(2)) {
406  PangoFontDescription *desc = pango_font_describe(font);
407  char *desc_str = pango_font_description_to_string(desc);
408  tlog(2, "Desc of font in run: %s\n", desc_str);
409  g_free(desc_str);
410  pango_font_description_free(desc);
411  }
412 
413  PangoGlyphItemIter cluster_iter;
414  gboolean have_cluster;
415  for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word);
416  have_cluster && !bad_glyph;
417  have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
418  const int start_byte_index = cluster_iter.start_index;
419  const int end_byte_index = cluster_iter.end_index;
420  int start_glyph_index = cluster_iter.start_glyph;
421  int end_glyph_index = cluster_iter.end_glyph;
422  std::string cluster_text =
423  std::string(utf8_word + start_byte_index, end_byte_index - start_byte_index);
424  if (graphemes) {
425  graphemes->push_back(cluster_text);
426  }
427  if (IsUTF8Whitespace(cluster_text.c_str())) {
428  tlog(2, "Skipping whitespace\n");
429  continue;
430  }
431  if (TLOG_IS_ON(2)) {
432  printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", start_byte_index,
433  end_byte_index, start_glyph_index, end_glyph_index);
434  }
435  for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1;
436  !bad_glyph && i != end_glyph_index; i += step) {
437  const bool unknown_glyph =
438  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG);
439  const bool illegal_glyph =
440  (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == dotted_circle_glyph);
441  bad_glyph = unknown_glyph || illegal_glyph;
442  if (TLOG_IS_ON(2)) {
443  printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, bad_glyph ? 1 : 0);
444  }
445  }
446  if (TLOG_IS_ON(2)) {
447  printf(" '%s'\n", cluster_text.c_str());
448  }
449  if (bad_glyph)
450  tlog(1, "Found illegal glyph!\n");
451  }
452  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
453 
454  pango_layout_iter_free(run_iter);
455  g_object_unref(context);
456  g_object_unref(layout);
457  if (bad_glyph && graphemes) {
458  graphemes->clear();
459  }
460  return !bad_glyph;
461 }
#define DISABLE_HEAP_LEAK_CHECK
#define TLOG_IS_ON(level)
Definition: tlog.h:43
#define tlog(level,...)
Definition: tlog.h:36
signed int char32
Definition: unichar.h:51
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:233
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
std::string DescriptionName() const

◆ CoversUTF8Text()

bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 216 of file pango_font_info.cpp.

216  {
217  PangoFont *font = ToPangoFont();
218  if (font == nullptr) {
219  // Font not found.
220  return false;
221  }
222  PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
223  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
224  it != UNICHAR::end(utf8_text, byte_length); ++it) {
225  if (IsWhitespace(*it) || pango_is_zero_width(*it)) {
226  continue;
227  }
228  if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
229  char tmp[5];
230  int len = it.get_utf8(tmp);
231  tmp[len] = '\0';
232  tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
233  pango_coverage_unref(coverage);
234  g_object_unref(font);
235  return false;
236  }
237  }
238  pango_coverage_unref(coverage);
239  g_object_unref(font);
240  return true;
241 }
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:228
static const_iterator begin(const char *utf8_str, int byte_length)
Definition: unichar.cpp:209
static const_iterator end(const char *utf8_str, int byte_length)
Definition: unichar.cpp:213

◆ DescriptionName()

std::string tesseract::PangoFontInfo::DescriptionName ( ) const

Definition at line 102 of file pango_font_info.cpp.

102  {
103  if (!desc_) {
104  return "";
105  }
106  char *desc_str = pango_font_description_to_string(desc_);
107  std::string desc_name(desc_str);
108  g_free(desc_str);
109  return desc_name;
110 }

◆ DropUncoveredChars()

int tesseract::PangoFontInfo::DropUncoveredChars ( std::string *  utf8_text) const

Definition at line 265 of file pango_font_info.cpp.

265  {
266  int num_dropped_chars = 0;
267  PangoFont *font = ToPangoFont();
268  if (font == nullptr) {
269  // Font not found, drop all characters.
270  num_dropped_chars = utf8_text->length();
271  utf8_text->clear();
272  return num_dropped_chars;
273  }
274  PangoCoverage *coverage = pango_font_get_coverage(font, nullptr);
275  // Maintain two iterators that point into the string. For space efficiency, we
276  // will repeatedly copy one covered UTF8 character from one to the other, and
277  // at the end resize the string to the right length.
278  char *out = const_cast<char *>(utf8_text->c_str());
279  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
280  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length());
281  for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
282  // Skip bad utf-8.
283  if (!it.is_legal()) {
284  ++it; // One suitable error message will still be issued.
285  continue;
286  }
287  int unicode = *it;
288  int utf8_len = it.utf8_len();
289  const char *utf8_char = it.utf8_data();
290  // Move it forward before the data gets modified.
291  ++it;
292  if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
293  pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
294  if (TLOG_IS_ON(2)) {
295  UNICHAR unichar(unicode);
296  char *str = unichar.utf8_str();
297  tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
298  delete[] str;
299  }
300  ++num_dropped_chars;
301  continue;
302  }
303  my_strnmove(out, utf8_char, utf8_len);
304  out += utf8_len;
305  }
306  pango_coverage_unref(coverage);
307  g_object_unref(font);
308  utf8_text->resize(out - utf8_text->c_str());
309  return num_dropped_chars;
310 }

◆ family_name()

const std::string& tesseract::PangoFontInfo::family_name ( ) const
inline

Definition at line 100 of file pango_font_info.h.

100  {
101  return family_name_;
102  }

◆ font_size()

int tesseract::PangoFontInfo::font_size ( ) const
inline

Definition at line 104 of file pango_font_info.h.

104  {
105  return font_size_;
106  }

◆ font_type()

FontTypeEnum tesseract::PangoFontInfo::font_type ( ) const
inline

Definition at line 107 of file pango_font_info.h.

107  {
108  return font_type_;
109  }

◆ GetSpacingProperties()

bool tesseract::PangoFontInfo::GetSpacingProperties ( const std::string &  utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 312 of file pango_font_info.cpp.

313  {
314  // Convert to equivalent PangoFont structure
315  PangoFont *font = ToPangoFont();
316  if (!font) {
317  return false;
318  }
319  // Find the glyph index in the font for the supplied utf8 character.
320  int total_advance = 0;
321  int min_bearing = 0;
322  // Handle multi-unicode strings by reporting the left-most position of the
323  // x-bearing, and right-most position of the x-advance if the string were to
324  // be rendered.
325  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), utf8_char.length());
326  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), utf8_char.length());
327  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
328  PangoGlyph glyph_index = get_glyph(font, *it);
329  if (!glyph_index) {
330  // Glyph for given unicode character doesn't exist in font.
331  g_object_unref(font);
332  return false;
333  }
334  // Find the ink glyph extents for the glyph
335  PangoRectangle ink_rect, logical_rect;
336  pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
337  pango_extents_to_pixels(&ink_rect, nullptr);
338  pango_extents_to_pixels(&logical_rect, nullptr);
339 
340  int bearing = total_advance + PANGO_LBEARING(ink_rect);
341  if (it == it_begin || bearing < min_bearing) {
342  min_bearing = bearing;
343  }
344  total_advance += PANGO_RBEARING(logical_rect);
345  }
346  *x_bearing = min_bearing;
347  *x_advance = total_advance;
348  g_object_unref(font);
349  return true;
350 }

◆ HardInitFontConfig()

void tesseract::PangoFontInfo::HardInitFontConfig ( const char *  fonts_dir,
const char *  cache_dir 
)
static

Definition at line 125 of file pango_font_info.cpp.

125  {
126  if (!cache_dir_.empty()) {
127  File::DeleteMatchingFiles(File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
128  }
129  const int MAX_FONTCONF_FILESIZE = 1024;
130  char fonts_conf_template[MAX_FONTCONF_FILESIZE];
131  cache_dir_ = cache_dir;
132  fonts_dir_ = fonts_dir;
133  snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
134  "<?xml version=\"1.0\"?>\n"
135  "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
136  "<fontconfig>\n"
137  "<dir>%s</dir>\n"
138  "<cachedir>%s</cachedir>\n"
139  "<config></config>\n"
140  "</fontconfig>\n",
141  fonts_dir, cache_dir);
142  std::string fonts_conf_file = File::JoinPath(cache_dir, "fonts.conf");
143  File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
144 #ifdef _WIN32
145  std::string env("FONTCONFIG_PATH=");
146  env.append(cache_dir);
147  _putenv(env.c_str());
148  _putenv("LANG=en_US.utf8");
149 #else
150  setenv("FONTCONFIG_PATH", cache_dir, true);
151  // Fix the locale so that the reported font names are consistent.
152  setenv("LANG", "en_US.utf8", true);
153 #endif // _WIN32
154 
155  if (FcInitReinitialize() != FcTrue) {
156  tprintf("FcInitiReinitialize failed!!\n");
157  }
159  // Clear Pango's font cache too.
160  pango_cairo_font_map_set_default(nullptr);
161 }
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:117
static std::string JoinPath(const std::string &prefix, const std::string &suffix)
Definition: fileio.cpp:84
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:54

◆ ParseFontDescriptionName()

bool tesseract::PangoFontInfo::ParseFontDescriptionName ( const std::string &  name)

Definition at line 191 of file pango_font_info.cpp.

191  {
192  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
193  bool success = ParseFontDescription(desc);
194  pango_font_description_free(desc);
195  return success;
196 }

◆ resolution()

int tesseract::PangoFontInfo::resolution ( ) const
inline

Definition at line 111 of file pango_font_info.h.

111  {
112  return resolution_;
113  }

◆ set_resolution()

void tesseract::PangoFontInfo::set_resolution ( const int  resolution)
inline

Definition at line 114 of file pango_font_info.h.

114  {
115  resolution_ = resolution;
116  }

◆ SoftInitFontConfig()

void tesseract::PangoFontInfo::SoftInitFontConfig ( )
static

Definition at line 116 of file pango_font_info.cpp.

116  {
117  if (fonts_dir_.empty()) {
118  HardInitFontConfig(FLAGS_fonts_dir.c_str(), FLAGS_fontconfig_tmpdir.c_str());
119  }
120 }
static void HardInitFontConfig(const char *fonts_dir, const char *cache_dir)

Friends And Related Function Documentation

◆ FontUtils

friend class FontUtils
friend

Definition at line 119 of file pango_font_info.h.


The documentation for this class was generated from the following files: