tesseract  5.0.0
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include "adaptive.h"
19 
20 #include "classify.h"
21 
22 #include <cassert>
23 #include <cstdio>
24 
25 namespace tesseract {
26 
27 /*----------------------------------------------------------------------------
28  Public Code
29 ----------------------------------------------------------------------------*/
30 /*---------------------------------------------------------------------------*/
42  assert(Templates != nullptr);
43  assert(Class != nullptr);
44  assert(LegalClassId(ClassId));
45  assert(UnusedClassIdIn(Templates->Templates, ClassId));
46  assert(Class->NumPermConfigs == 0);
47 
48  auto IntClass = new INT_CLASS_STRUCT(1, 1);
49  AddIntClass(Templates->Templates, ClassId, IntClass);
50 
51  assert(Templates->Class[ClassId] == nullptr);
52  Templates->Class[ClassId] = Class;
53 
54 } /* AddAdaptedClass */
55 
56 /*---------------------------------------------------------------------------*/
57 
59  delete[] Ambigs;
60 }
61 
63  NumPermConfigs = 0;
64  MaxNumTimesSeen = 0;
66 
67  PermProtos = NewBitVector(MAX_NUM_PROTOS);
68  PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
69  zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
70  zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
71 
72  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
73  TempConfigFor(this, i) = nullptr;
74  }
75 }
76 
78  for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
79  if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
80  delete PermConfigFor(this, i);
81  } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
82  delete TempConfigFor(this, i);
83  }
84  }
85  FreeBitVector(PermProtos);
86  FreeBitVector(PermConfigs);
87  auto list = TempProtos;
88  while (list != nullptr) {
89  delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
90  list = pop(list);
91  }
92 }
93 
98  NumPermClasses = 0;
100 
101  /* Insert an empty class for each unichar id in unicharset */
102  for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
103  Class[i] = nullptr;
104  if (i < unicharset.size()) {
105  AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
106  }
107  }
108 }
109 
111  for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
112  delete Class[i];
113  }
114  delete Templates;
115 }
116 
117 // Returns FontinfoId of the given config of the given adapted class.
118 int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
119  return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
120  : TempConfigFor(Class, ConfigId)->FontinfoId);
121 }
122 
127 TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) {
128  int NumProtos = maxProtoId + 1;
129 
130  Protos = NewBitVector(NumProtos);
131 
132  NumTimesSeen = 1;
133  MaxProtoId = maxProtoId;
134  ProtoVectorSize = WordsInVectorOfSize(NumProtos);
135  zero_all_bits(Protos, ProtoVectorSize);
136  FontinfoId = fontinfoId;
137 }
138 
140  FreeBitVector(Protos);
141 }
142 
143 /*---------------------------------------------------------------------------*/
154  INT_CLASS_STRUCT *IClass;
155  ADAPT_CLASS_STRUCT *AClass;
156 
157  fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
158  fprintf(File, "Num classes = %d; Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
159  Templates->NumPermClasses);
160  fprintf(File, " Id NC NPC NP NPP\n");
161  fprintf(File, "------------------------\n");
162 
163  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
164  IClass = Templates->Templates->Class[i];
165  AClass = Templates->Class[i];
166  if (!IsEmptyAdaptedClass(AClass)) {
167  fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
168  AClass->NumPermConfigs, IClass->NumProtos,
169  IClass->NumProtos - AClass->TempProtos->size());
170  }
171  }
172  fprintf(File, "\n");
173 
174 } /* PrintAdaptedTemplates */
175 
176 /*---------------------------------------------------------------------------*/
187  int NumTempProtos;
188  int NumConfigs;
189  int i;
190  ADAPT_CLASS_STRUCT *Class;
191 
192  /* first read high level adapted class structure */
193  Class = new ADAPT_CLASS_STRUCT;
194  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
195 
196  /* then read in the definitions of the permanent protos and configs */
197  Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
198  Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
199  fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
200  fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
201 
202  /* then read in the list of temporary protos */
203  fp->FRead(&NumTempProtos, sizeof(int), 1);
204  Class->TempProtos = NIL_LIST;
205  for (i = 0; i < NumTempProtos; i++) {
206  auto TempProto = new TEMP_PROTO_STRUCT;
207  fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
208  Class->TempProtos = push_last(Class->TempProtos, TempProto);
209  }
210 
211  /* then read in the adapted configs */
212  fp->FRead(&NumConfigs, sizeof(int), 1);
213  for (i = 0; i < NumConfigs; i++) {
214  if (test_bit(Class->PermConfigs, i)) {
215  Class->Config[i].Perm = ReadPermConfig(fp);
216  } else {
217  Class->Config[i].Temp = ReadTempConfig(fp);
218  }
219  }
220 
221  return (Class);
222 
223 } /* ReadAdaptedClass */
224 
225 /*---------------------------------------------------------------------------*/
236  auto Templates = new ADAPT_TEMPLATES_STRUCT;
237 
238  /* first read the high level adaptive template struct */
239  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
240 
241  /* then read in the basic integer templates */
242  Templates->Templates = ReadIntTemplates(fp);
243 
244  /* then read in the adaptive info for each class */
245  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
246  Templates->Class[i] = ReadAdaptedClass(fp);
247  }
248  return (Templates);
249 
250 } /* ReadAdaptedTemplates */
251 
252 /*---------------------------------------------------------------------------*/
263  auto Config = new PERM_CONFIG_STRUCT;
264  uint8_t NumAmbigs;
265  fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
266  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
267  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
268  Config->Ambigs[NumAmbigs] = -1;
269  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
270 
271  return (Config);
272 
273 } /* ReadPermConfig */
274 
275 /*---------------------------------------------------------------------------*/
286  auto Config = new TEMP_CONFIG_STRUCT;
287  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
288 
289  Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
290  fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
291 
292  return (Config);
293 
294 } /* ReadTempConfig */
295 
296 /*---------------------------------------------------------------------------*/
307 void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
308  /* first write high level adapted class structure */
309  fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
310 
311  /* then write out the definitions of the permanent protos and configs */
312  fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
313  fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
314 
315  /* then write out the list of temporary protos */
316  uint32_t NumTempProtos = Class->TempProtos->size();
317  fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
318  auto TempProtos = Class->TempProtos;
319  iterate(TempProtos) {
320  void *proto = TempProtos->node;
321  fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
322  }
323 
324  /* then write out the adapted configs */
325  fwrite(&NumConfigs, sizeof(int), 1, File);
326  for (int i = 0; i < NumConfigs; i++) {
327  if (test_bit(Class->PermConfigs, i)) {
328  WritePermConfig(File, Class->Config[i].Perm);
329  } else {
330  WriteTempConfig(File, Class->Config[i].Temp);
331  }
332  }
333 
334 } /* WriteAdaptedClass */
335 
336 /*---------------------------------------------------------------------------*/
346  /* first write the high level adaptive template struct */
347  fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
348 
349  /* then write out the basic integer templates */
351 
352  /* then write out the adaptive info for each class */
353  for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
354  WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
355  }
356 } /* WriteAdaptedTemplates */
357 
358 /*---------------------------------------------------------------------------*/
369  uint8_t NumAmbigs = 0;
370 
371  assert(Config != nullptr);
372  while (Config->Ambigs[NumAmbigs] > 0) {
373  ++NumAmbigs;
374  }
375 
376  fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
377  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
378  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
379 } /* WritePermConfig */
380 
381 /*---------------------------------------------------------------------------*/
392  assert(Config != nullptr);
393 
394  fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
395  fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
396 
397 } /* WriteTempConfig */
398 
399 } // namespace tesseract
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:83
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:85
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:93
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:91
#define UnusedClassIdIn(T, c)
Definition: intproto.h:155
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define LegalClassId(c)
Definition: intproto.h:154
#define test_bit(array, bit)
Definition: bitvec.h:59
const size_t BITSINLONG
Definition: bitvec.h:31
#define iterate(l)
Definition: oldlist.h:91
#define NIL_LIST
Definition: oldlist.h:75
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void AddIntClass(INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_STRUCT *Class)
Definition: intproto.cpp:220
void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs)
Definition: adaptive.cpp:307
ADAPT_CLASS_STRUCT * ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:186
void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:368
PERM_CONFIG_STRUCT * ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:262
CLUSTERCONFIG Config
int UNICHAR_ID
Definition: unichar.h:36
void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId)
Definition: adaptive.cpp:41
TEMP_CONFIG_STRUCT * ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:285
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:192
LIST pop(LIST list)
Definition: oldlist.cpp:166
void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config)
Definition: adaptive.cpp:391
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
UNICHARSET unicharset
Definition: ccutil.h:61
size_t FRead(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:221
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
size_t size() const
Definition: unicharset.h:355
PERM_CONFIG_STRUCT * Perm
Definition: adaptive.h:52
TEMP_CONFIG_STRUCT * Temp
Definition: adaptive.h:51
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:64
ADAPT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: adaptive.h:75
INT_TEMPLATES_STRUCT * Templates
Definition: adaptive.h:72
void WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:917
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:345
INT_TEMPLATES_STRUCT * ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:627
ADAPT_TEMPLATES_STRUCT * ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:235
int GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId)
Definition: adaptive.cpp:118
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates)
Definition: adaptive.cpp:153
INT_CLASS_STRUCT * Class[MAX_NUM_CLASSES]
Definition: intproto.h:111