tesseract  5.0.0
ligature_table_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 #include "ligature_table.h"
13 #include "commandlineflags.h"
14 #include "fileio.h"
15 #include "include_gunit.h"
16 #include "pango_font_info.h"
17 
18 namespace tesseract {
19 
20 #if 0 // not with NFC normalization
21 const char kEngNonLigatureText[] = "fidelity effigy ſteep";
22 // Same as above text, but with "fi" in the first word and "ffi" in the second
23 // word replaced with their respective ligatures.
24 const char kEngLigatureText[] = "fidelity effigy ſteep";
25 // Same as kEngLigatureText but with "fi" in both words replaced with their
26 // ligature. The test Verdana font does not support the "ffi" or "ſt" ligature.
27 const char kRenderableEngLigatureText[] = "fidelity effigy ſteep";
28 #endif
29 
30 static PangoFontMap *font_map;
31 
32 class LigatureTableTest : public ::testing::Test {
33 protected:
34  void SetUp() override {
36  if (!font_map) {
37  font_map = pango_cairo_font_map_new_for_font_type(CAIRO_FONT_TYPE_FT);
38  }
39  pango_cairo_font_map_set_default(PANGO_CAIRO_FONT_MAP(font_map));
40  }
41 
42  static void SetUpTestCase() {
43  static std::locale system_locale("");
44  std::locale::global(system_locale);
45 
46  FLAGS_fonts_dir = TESTING_DIR;
47  FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
49  PangoFontInfo::SoftInitFontConfig(); // init early
50  }
52 };
53 
54 TEST_F(LigatureTableTest, DoesFillLigatureTables) {
55  EXPECT_GT(lig_table_->norm_to_lig_table().size(), 0);
56  EXPECT_GT(lig_table_->lig_to_norm_table().size(), 0);
57 }
58 
59 #if 0 // not with NFC normalization
60 TEST_F(LigatureTableTest, DoesAddLigatures) {
61  EXPECT_STREQ(kEngLigatureText, lig_table_->AddLigatures(kEngNonLigatureText, nullptr).c_str());
62 }
63 
64 TEST_F(LigatureTableTest, DoesAddLigaturesWithSupportedFont) {
65  PangoFontInfo font;
66  EXPECT_TRUE(font.ParseFontDescriptionName("Verdana"));
67  printf("1:%s\n", kRenderableEngLigatureText);
68  printf("2:%s\n", lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
69  EXPECT_STREQ(kRenderableEngLigatureText,
70  lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
71 }
72 
73 TEST_F(LigatureTableTest, DoesNotAddLigaturesWithUnsupportedFont) {
74  PangoFontInfo font;
75  EXPECT_TRUE(font.ParseFontDescriptionName("Lohit Hindi"));
76  EXPECT_STREQ(kEngNonLigatureText, lig_table_->AddLigatures(kEngNonLigatureText, &font).c_str());
77 }
78 
79 TEST_F(LigatureTableTest, DoesRemoveLigatures) {
80  EXPECT_STREQ(kEngNonLigatureText, lig_table_->RemoveLigatures(kEngLigatureText).c_str());
81 }
82 #endif
83 
84 TEST_F(LigatureTableTest, TestCustomLigatures) {
85  const char *kTestCases[] = {
86  "act", "a\uE003", "publiſh", "publi\uE006", "ſince",
87  "\uE007nce", "aſleep", "a\uE008eep", "neceſſary", "nece\uE009ary",
88  };
89  for (size_t i = 0; i < countof(kTestCases); i += 2) {
90  EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
91  EXPECT_STREQ(kTestCases[i], lig_table_->RemoveLigatures(kTestCases[i + 1]).c_str());
92  EXPECT_STREQ(kTestCases[i], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
93  }
94 }
95 
96 #if 0 // not with NFC normalization
97 TEST_F(LigatureTableTest, TestRemovesCustomLigatures) {
98  const char *kTestCases[] = {
99  "fiction",
100  "fi\uE003ion",
101  "fiction",
102  };
103  for (size_t i = 0; i < countof(kTestCases); i += 3) {
104  EXPECT_STREQ(kTestCases[i + 1], lig_table_->AddLigatures(kTestCases[i], nullptr).c_str());
105  EXPECT_STREQ(kTestCases[i + 2], lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
106  }
107 }
108 #endif
109 
110 } // namespace tesseract
const char kEngNonLigatureText[]
constexpr size_t countof(T const (&)[N]) noexcept
Definition: serialis.h:42
const char kEngLigatureText[]
TEST_F(EuroText, FastLatinOCR)
static LigatureTable * Get()
static void MakeTmpdir()
Definition: include_gunit.h:38