tesseract  5.0.0
dawg.h
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: dawg.h
4  * Description: Definition of a class that represents Directed Acyclic Word
5  * Graph (DAWG), functions to build and manipulate the DAWG.
6  * Author: Mark Seaman, SW Productivity
7  *
8  * (c) Copyright 1987, Hewlett-Packard Company.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  *****************************************************************************/
20 
21 #ifndef DICT_DAWG_H_
22 #define DICT_DAWG_H_
23 
24 /*----------------------------------------------------------------------
25  I n c l u d e s
26 ----------------------------------------------------------------------*/
27 
28 #include <cinttypes> // for PRId64
29 #include <functional> // for std::function
30 #include <memory>
31 #include "elst.h"
32 #include "params.h"
33 #include "ratngs.h"
34 
35 #ifndef __GNUC__
36 # ifdef _WIN32
37 # define NO_EDGE (int64_t)0xffffffffffffffffi64
38 # endif /*_WIN32*/
39 #else
40 # define NO_EDGE (int64_t)0xffffffffffffffffll
41 #endif /*__GNUC__*/
42 
43 namespace tesseract {
44 
45 class UNICHARSET;
46 
47 using EDGE_RECORD = uint64_t;
49 using EDGE_REF = int64_t;
50 using NODE_REF = int64_t;
51 using NODE_MAP = EDGE_REF *;
52 
53 struct NodeChild {
57  NodeChild() : unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
58 };
59 
60 using NodeChildVector = std::vector<NodeChild>;
61 using SuccessorList = std::vector<int>;
62 using SuccessorListsVector = std::vector<SuccessorList *>;
63 
64 enum DawgType {
69 
70  DAWG_TYPE_COUNT // number of enum entries
71 };
72 
73 /*----------------------------------------------------------------------
74  C o n s t a n t s
75 ----------------------------------------------------------------------*/
76 
77 #define FORWARD_EDGE (int32_t)0
78 #define BACKWARD_EDGE (int32_t)1
79 #define MAX_NODE_EDGES_DISPLAY (int64_t)100
80 #define MARKER_FLAG (int64_t)1
81 #define DIRECTION_FLAG (int64_t)2
82 #define WERD_END_FLAG (int64_t)4
83 #define LETTER_START_BIT 0
84 #define NUM_FLAG_BITS 3
85 #define REFFORMAT "%" PRId64
86 
87 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = {
88  {false, true, true, false}, // for DAWG_TYPE_PUNCTUATION
89  {true, false, false, false}, // for DAWG_TYPE_WORD
90  {true, false, false, false}, // for DAWG_TYPE_NUMBER
91  {false, false, false, false}, // for DAWG_TYPE_PATTERN
92 };
93 
94 static const char kWildcard[] = "*";
95 
96 /*----------------------------------------------------------------------
97  C l a s s e s a n d S t r u c t s
98 ----------------------------------------------------------------------*/
99 //
109 //
110 class TESS_API Dawg {
111 public:
113  static const int16_t kDawgMagicNumber = 42;
117  static const UNICHAR_ID kPatternUnicharID = 0;
118 
119  inline DawgType type() const {
120  return type_;
121  }
122  inline const std::string &lang() const {
123  return lang_;
124  }
125  inline PermuterType permuter() const {
126  return perm_;
127  }
128 
129  virtual ~Dawg();
130 
132  bool word_in_dawg(const WERD_CHOICE &word) const;
133 
134  // Returns true if the given word prefix is not contraindicated by the dawg.
135  // If requires_complete is true, then the exact complete word must be present.
136  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
137 
140  int check_for_words(const char *filename, const UNICHARSET &unicharset,
141  bool enable_wildcard) const;
142 
143  // For each word in the Dawg, call the given (permanent) callback with the
144  // text (UTF-8) version of the word.
145  void iterate_words(const UNICHARSET &unicharset,
146  std::function<void(const WERD_CHOICE *)> cb) const;
147 
148  // For each word in the Dawg, call the given (permanent) callback with the
149  // text (UTF-8) version of the word.
150  void iterate_words(const UNICHARSET &unicharset,
151  const std::function<void(const char *)> &cb) const;
152 
153  // Pure virtual function that should be implemented by the derived classes.
154 
156  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
157  bool word_end) const = 0;
158 
161  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
162  bool word_end) const = 0;
163 
166  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
167 
170  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
171 
173  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
174 
177  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
178 
181  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
182  const UNICHARSET &unicharset,
183  std::vector<UNICHAR_ID> *vec) const {
184  (void)unichar_id;
185  (void)unicharset;
186  (void)vec;
187  }
188 
192  virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id,
193  bool word_end) const {
194  (void)edge_ref;
195  (void)unichar_id;
196  (void)word_end;
197  return false;
198  }
199 
200 protected:
201  Dawg(DawgType type, const std::string &lang, PermuterType perm,
202  int debug_level)
203  : lang_(lang),
204  type_(type),
205  perm_(perm),
206  unicharset_size_(0),
207  debug_level_(debug_level) {}
208 
210  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
211  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
212  }
214  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
215  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
216  }
218  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
219  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE
220  : FORWARD_EDGE;
221  }
223  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
224  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
225  }
228  const EDGE_RECORD &edge_rec) const {
229  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
230  }
232  inline void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value) {
233  *edge_rec &= (~next_node_mask_);
234  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
235  }
237  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
238  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
239  }
247  inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end,
248  UNICHAR_ID unichar_id,
249  const EDGE_RECORD &edge_rec) const {
250  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
251  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
252  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
253  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
254  curr_word_end, curr_unichar_id)) {
255  return 0;
256  }
257  if (unichar_id > curr_unichar_id) {
258  return 1;
259  }
260  if (unichar_id == curr_unichar_id) {
261  if (next_node > curr_next_node) {
262  return 1;
263  }
264  if (next_node == curr_next_node) {
265  if (word_end > curr_word_end) {
266  return 1;
267  }
268  }
269  }
270  return -1;
271  }
275  inline bool edge_rec_match(NODE_REF next_node, bool word_end,
276  UNICHAR_ID unichar_id, NODE_REF other_next_node,
277  bool other_word_end,
278  UNICHAR_ID other_unichar_id) const {
279  return ((unichar_id == other_unichar_id) &&
280  (next_node == NO_EDGE || next_node == other_next_node) &&
281  (!word_end || (word_end == other_word_end)));
282  }
283 
286  void init(int unicharset_size);
287 
293  bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node,
294  UNICHAR_ID wildcard) const;
295 
296  // Recursively iterate over all words in a dawg (see public iterate_words).
297  void iterate_words_rec(
298  const WERD_CHOICE &word_so_far, NODE_REF to_explore,
299  const std::function<void(const WERD_CHOICE *)> &cb) const;
300 
301  // Member Variables.
302  std::string lang_;
306  // Variables to construct various edge masks. Formerly:
307  // #define NEXT_EDGE_MASK (int64_t) 0xfffffff800000000i64
308  // #define FLAGS_MASK (int64_t) 0x0000000700000000i64
309  // #define LETTER_MASK (int64_t) 0x00000000ffffffffi64
310  uint64_t next_node_mask_ = 0;
311  uint64_t flags_mask_ = 0;
312  uint64_t letter_mask_ = 0;
314  int flag_start_bit_ = 0;
315  int next_node_start_bit_ = 0;
316  // Level of debug statements to print to stdout.
318 };
319 
320 //
321 // DawgPosition keeps track of where we are in the primary dawg we're searching
322 // as well as where we may be in the "punctuation dawg" which may provide
323 // surrounding context.
324 //
325 // Example:
326 // punctuation dawg -- space is the "pattern character"
327 // " " // no punctuation
328 // "' '" // leading and trailing apostrophes
329 // " '" // trailing apostrophe
330 // word dawg:
331 // "cat"
332 // "cab"
333 // "cat's"
334 //
335 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
336 //
337 // DawgPosition(-1, NO_EDGE, p, pe, false)
338 // We're in the punctuation dawg, no other dawg has been started.
339 // (1) If there's a pattern edge as a punc dawg child of us,
340 // for each punc-following dawg starting with ch, produce:
341 // Result: DawgPosition(k, w, p', false)
342 // (2) If there's a valid continuation in the punc dawg, produce:
343 // Result: DawgPosition(-k, NO_EDGE, p', false)
344 //
345 // DawgPosition(k, w, -1, NO_EDGE, false)
346 // We're in dawg k. Going back to punctuation dawg is not an option.
347 // Follow ch in dawg k.
348 //
349 // DawgPosition(k, w, p, pe, false)
350 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
351 // If ending, check that the punctuation dawg is also ok to end here.
352 //
353 // DawgPosition(k, w, p, pe true)
354 // We're back in the punctuation dawg. Continuing there is the only option.
355 struct DawgPosition {
356  DawgPosition() = default;
357  DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref,
358  bool backtopunc)
359  : dawg_ref(dawgref),
360  punc_ref(puncref),
361  dawg_index(dawg_idx),
362  punc_index(punc_idx),
363  back_to_punc(backtopunc) {}
364  bool operator==(const DawgPosition &other) {
365  return dawg_index == other.dawg_index && dawg_ref == other.dawg_ref &&
366  punc_index == other.punc_index && punc_ref == other.punc_ref &&
367  back_to_punc == other.back_to_punc;
368  }
369 
370  EDGE_REF dawg_ref = NO_EDGE;
371  EDGE_REF punc_ref = NO_EDGE;
372  int8_t dawg_index = -1;
373  int8_t punc_index = -1;
374  // Have we returned to the punc dawg at the end of the word?
375  bool back_to_punc = false;
376 };
377 
378 class DawgPositionVector : public std::vector<DawgPosition> {
379 public:
383  inline bool add_unique(const DawgPosition &new_pos, bool debug,
384  const char *debug_msg) {
385  for (auto position : *this) {
386  if (position == new_pos) {
387  return false;
388  }
389  }
390  push_back(new_pos);
391  if (debug) {
392  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg,
393  new_pos.dawg_index, new_pos.dawg_ref, new_pos.punc_ref,
394  new_pos.back_to_punc ? " returned" : "");
395  }
396  return true;
397  }
398 };
399 
400 //
407 //
408 class TESS_API SquishedDawg : public Dawg {
409 public:
410  SquishedDawg(DawgType type, const std::string &lang, PermuterType perm,
411  int debug_level)
412  : Dawg(type, lang, perm, debug_level) {}
413  SquishedDawg(const char *filename, DawgType type, const std::string &lang,
414  PermuterType perm, int debug_level)
415  : Dawg(type, lang, perm, debug_level) {
416  TFile file;
417  ASSERT_HOST(file.Open(filename, nullptr));
418  ASSERT_HOST(read_squished_dawg(&file));
419  num_forward_edges_in_node0 = num_forward_edges(0);
420  }
421  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
422  const std::string &lang, PermuterType perm, int unicharset_size,
423  int debug_level)
424  : Dawg(type, lang, perm, debug_level),
425  edges_(edges),
426  num_edges_(num_edges) {
427  init(unicharset_size);
428  num_forward_edges_in_node0 = num_forward_edges(0);
429  if (debug_level > 3) {
430  print_all("SquishedDawg:");
431  }
432  }
433  ~SquishedDawg() override;
434 
435  // Loads using the given TFile. Returns false on failure.
436  bool Load(TFile *fp) {
437  if (!read_squished_dawg(fp)) {
438  return false;
439  }
440  num_forward_edges_in_node0 = num_forward_edges(0);
441  return true;
442  }
443 
444  int NumEdges() {
445  return num_edges_;
446  }
447 
449  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
450  bool word_end) const override;
451 
455  bool word_end) const override {
456  EDGE_REF edge = node;
457  if (!edge_occupied(edge) || edge == NO_EDGE) {
458  return;
459  }
460  assert(forward_edge(edge)); // we don't expect any backward edges to
461  do { // be present when this function is called
462  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
463  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
464  }
465  } while (!last_edge(edge++));
466  }
467 
470  NODE_REF next_node(EDGE_REF edge) const override {
471  return next_node_from_edge_rec((edges_[edge]));
472  }
473 
476  bool end_of_word(EDGE_REF edge_ref) const override {
477  return end_of_word_from_edge_rec((edges_[edge_ref]));
478  }
479 
481  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override {
482  return unichar_id_from_edge_rec((edges_[edge_ref]));
483  }
484 
487  void print_node(NODE_REF node, int max_num_edges) const override;
488 
490  bool write_squished_dawg(TFile *file);
491 
494  bool write_squished_dawg(const char *filename) {
495  TFile file;
496  file.OpenWrite(nullptr);
497  if (!this->write_squished_dawg(&file)) {
498  tprintf("Error serializing %s\n", filename);
499  return false;
500  }
501  if (!file.CloseWrite(filename, nullptr)) {
502  tprintf("Error writing file %s\n", filename);
503  return false;
504  }
505  return true;
506  }
507 
508 private:
510  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
511  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
512  }
514  inline void set_empty_edge(EDGE_REF edge_ref) {
515  (edges_[edge_ref] = next_node_mask_);
516  }
518  inline void clear_all_edges() {
519  for (int edge = 0; edge < num_edges_; edge++) {
520  set_empty_edge(edge);
521  }
522  }
524  inline void clear_marker_flag(EDGE_REF edge_ref) {
525  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
526  }
528  inline bool forward_edge(EDGE_REF edge_ref) const {
529  return (edge_occupied(edge_ref) &&
530  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
531  }
533  inline bool backward_edge(EDGE_REF edge_ref) const {
534  return (edge_occupied(edge_ref) &&
535  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
536  }
538  inline bool edge_occupied(EDGE_REF edge_ref) const {
539  return (edges_[edge_ref] != next_node_mask_);
540  }
542  inline bool last_edge(EDGE_REF edge_ref) const {
543  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
544  }
545 
547  int32_t num_forward_edges(NODE_REF node) const;
548 
550  bool read_squished_dawg(TFile *file);
551 
553  void print_edge(EDGE_REF edge) const;
554 
556  void print_all(const char *msg) {
557  tprintf("\n__________________________\n%s\n", msg);
558  for (int i = 0; i < num_edges_; ++i) {
559  print_edge(i);
560  }
561  tprintf("__________________________\n");
562  }
564  std::unique_ptr<EDGE_REF[]> build_node_map(int32_t *num_nodes) const;
565 
566  // Member variables.
567  EDGE_ARRAY edges_ = nullptr;
568  int32_t num_edges_ = 0;
569  int num_forward_edges_in_node0 = 0;
570 };
571 
572 } // namespace tesseract
573 
574 #endif // DICT_DAWG_H_
#define ASSERT_HOST(x)
Definition: errcode.h:59
#define WERD_END_FLAG
Definition: dawg.h:82
#define BACKWARD_EDGE
Definition: dawg.h:78
#define FORWARD_EDGE
Definition: dawg.h:77
#define MARKER_FLAG
Definition: dawg.h:80
#define REFFORMAT
Definition: dawg.h:85
#define LETTER_START_BIT
Definition: dawg.h:83
#define DIRECTION_FLAG
Definition: dawg.h:81
DawgType
Definition: dawg.h:64
@ DAWG_TYPE_NUMBER
Definition: dawg.h:67
@ DAWG_TYPE_PATTERN
Definition: dawg.h:68
@ DAWG_TYPE_WORD
Definition: dawg.h:66
@ DAWG_TYPE_PUNCTUATION
Definition: dawg.h:65
@ DAWG_TYPE_COUNT
Definition: dawg.h:70
int64_t EDGE_REF
Definition: dawg.h:49
std::vector< int > SuccessorList
Definition: dawg.h:61
uint64_t EDGE_RECORD
Definition: dawg.h:47
std::vector< SuccessorList * > SuccessorListsVector
Definition: dawg.h:62
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int64_t NODE_REF
Definition: dawg.h:50
int UNICHAR_ID
Definition: unichar.h:36
std::vector< NodeChild > NodeChildVector
Definition: dawg.h:60
EDGE_REF * NODE_MAP
Definition: dawg.h:51
PermuterType
Definition: ratngs.h:231
EDGE_RECORD * EDGE_ARRAY
Definition: dawg.h:48
UNICHAR_ID unichar_id
Definition: dawg.h:54
EDGE_REF edge_ref
Definition: dawg.h:55
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:56
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:275
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:210
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:247
const std::string & lang() const
Definition: dawg.h:122
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:232
virtual bool end_of_word(EDGE_REF edge_ref) const =0
std::string lang_
Definition: dawg.h:302
Dawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
Definition: dawg.h:201
DawgType type_
Definition: dawg.h:303
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, std::vector< UNICHAR_ID > *vec) const
Definition: dawg.h:181
int unicharset_size_
Definition: dawg.h:313
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:223
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:214
virtual void print_node(NODE_REF node, int max_num_edges) const =0
DawgType type() const
Definition: dawg.h:119
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:237
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:227
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:192
PermuterType permuter() const
Definition: dawg.h:125
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:305
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:218
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
int debug_level_
Definition: dawg.h:317
virtual ~Dawg()
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
EDGE_REF punc_ref
Definition: dawg.h:371
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:357
bool operator==(const DawgPosition &other)
Definition: dawg.h:364
EDGE_REF dawg_ref
Definition: dawg.h:370
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:383
bool end_of_word(EDGE_REF edge_ref) const override
Definition: dawg.h:476
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const override
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:481
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override
Definition: dawg.h:454
SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
Definition: dawg.h:410
NODE_REF next_node(EDGE_REF edge) const override
Definition: dawg.h:470
SquishedDawg(const char *filename, DawgType type, const std::string &lang, PermuterType perm, int debug_level)
Definition: dawg.h:413
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:421
bool Load(TFile *fp)
Definition: dawg.h:436
bool write_squished_dawg(const char *filename)
Definition: dawg.h:494
static FILE * Open(const std::string &filename, const std::string &mode)
Definition: fileio.cpp:41
#define TESS_API
Definition: export.h:34