tesseract  5.0.0
dawg_cache.cpp
Go to the documentation of this file.
1 // File: dawg_cache.cpp
3 // Description: A class that knows about loading and caching dawgs.
4 // Author: David Eger
5 //
6 // (C) Copyright 2012, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #include "dawg_cache.h"
20 
21 #include "dawg.h"
22 #include "object_cache.h"
23 #include "tessdatamanager.h"
24 
25 namespace tesseract {
26 
27 struct DawgLoader {
28  DawgLoader(const std::string &lang, TessdataType tessdata_dawg_type, int dawg_debug_level,
29  TessdataManager *data_file)
30  : lang_(lang)
31  , data_file_(data_file)
32  , tessdata_dawg_type_(tessdata_dawg_type)
33  , dawg_debug_level_(dawg_debug_level) {}
34 
35  Dawg *Load();
36 
37  std::string lang_;
41 };
42 
43 Dawg *DawgCache::GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type,
44  int debug_level, TessdataManager *data_file) {
45  std::string data_id = data_file->GetDataFileName();
46  data_id += kTessdataFileSuffixes[tessdata_dawg_type];
47  DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
48  return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
49 }
50 
52  TFile fp;
54  return nullptr;
55  }
56  DawgType dawg_type;
57  PermuterType perm_type;
58  switch (tessdata_dawg_type_) {
59  case TESSDATA_PUNC_DAWG:
61  dawg_type = DAWG_TYPE_PUNCTUATION;
62  perm_type = PUNC_PERM;
63  break;
66  dawg_type = DAWG_TYPE_WORD;
67  perm_type = SYSTEM_DAWG_PERM;
68  break;
71  dawg_type = DAWG_TYPE_NUMBER;
72  perm_type = NUMBER_PERM;
73  break;
75  dawg_type = DAWG_TYPE_WORD; // doesn't actually matter
76  perm_type = COMPOUND_PERM; // doesn't actually matter
77  break;
79  dawg_type = DAWG_TYPE_WORD;
80  perm_type = SYSTEM_DAWG_PERM;
81  break;
82  case TESSDATA_FREQ_DAWG:
83  dawg_type = DAWG_TYPE_WORD;
84  perm_type = FREQ_DAWG_PERM;
85  break;
86  default:
87  return nullptr;
88  }
89  auto *retval = new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_);
90  if (retval->Load(&fp)) {
91  return retval;
92  }
93  delete retval;
94  return nullptr;
95 }
96 
97 } // namespace tesseract
DawgType
Definition: dawg.h:64
@ DAWG_TYPE_NUMBER
Definition: dawg.h:67
@ DAWG_TYPE_WORD
Definition: dawg.h:66
@ DAWG_TYPE_PUNCTUATION
Definition: dawg.h:65
@ TESSDATA_UNAMBIG_DAWG
@ TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_NUMBER_DAWG
@ TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_BIGRAM_DAWG
@ TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_SYSTEM_DAWG
PermuterType
Definition: ratngs.h:231
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:240
@ NUMBER_PERM
Definition: ratngs.h:238
@ COMPOUND_PERM
Definition: ratngs.h:244
@ PUNC_PERM
Definition: ratngs.h:233
@ FREQ_DAWG_PERM
Definition: ratngs.h:243
bool GetComponent(TessdataType type, TFile *fp)
const std::string & GetDataFileName() const
DawgLoader(const std::string &lang, TessdataType tessdata_dawg_type, int dawg_debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:28
TessdataType tessdata_dawg_type_
Definition: dawg_cache.cpp:39
TessdataManager * data_file_
Definition: dawg_cache.cpp:38
Dawg * GetSquishedDawg(const std::string &lang, TessdataType tessdata_dawg_type, int debug_level, TessdataManager *data_file)
Definition: dawg_cache.cpp:43