tesseract  5.0.0
unicharset_training_utils.h
Go to the documentation of this file.
1 // File: unicharset_training_utils.h
3 // Description: Training utilities for UNICHARSET.
4 // Author: Ray Smith
5 // Created: Fri Oct 17 17:14:01 PDT 2014
6 //
7 // (C) Copyright 2014, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
21 #define TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
22 
23 #include "export.h"
24 
25 #include <tesseract/export.h>
26 
27 #include <string>
28 
29 namespace tesseract {
30 
31 class STATS;
32 class UNICHARSET;
33 
34 // Helper sets the character attribute properties and sets up the script table.
35 // Does not set tops and bottoms.
36 TESS_UNICHARSET_TRAINING_API
37 void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset);
38 // Default behavior is to compose, until it is proven that decomposed benefits
39 // at least one language.
40 inline void SetupBasicProperties(bool report_errors, UNICHARSET *unicharset) {
41  SetupBasicProperties(report_errors, false, unicharset);
42 }
43 // Helper sets the properties from universal script unicharsets, if found.
44 TESS_UNICHARSET_TRAINING_API
45 void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset);
46 // Helper gets the combined x-heights string.
47 std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset);
48 
49 // Helper to set the properties for an input unicharset file, writes to the
50 // output file. If an appropriate script unicharset can be found in the
51 // script_dir directory, then the tops and bottoms are expanded using the
52 // script unicharset.
53 // If non-empty, xheight data for the fonts are written to the xheights_file.
54 TESS_UNICHARSET_TRAINING_API
55 void SetPropertiesForInputFile(const std::string &script_dir,
56  const std::string &input_unicharset_file,
57  const std::string &output_unicharset_file,
58  const std::string &output_xheights_file);
59 
60 } // namespace tesseract.
61 
62 #endif // TESSERACT_TRAINING_UNICHARSET_TRAINING_UTILS_H_
std::string GetXheightString(const std::string &script_dir, const UNICHARSET &unicharset)
void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET *unicharset)
void SetPropertiesForInputFile(const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
void SetScriptProperties(const std::string &script_dir, UNICHARSET *unicharset)