tesseract  5.0.0
trainingsample.cpp
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #define _USE_MATH_DEFINES // for M_PI
17 // Include automatically generated configuration file if running autoconf.
18 #ifdef HAVE_CONFIG_H
19 # include "config_auto.h"
20 #endif
21 
22 #include "trainingsample.h"
23 
24 #include "helpers.h"
25 #include "intfeaturespace.h"
26 #include "normfeat.h"
27 #include "shapetable.h"
28 
29 #include <allheaders.h>
30 
31 #include <cmath> // for M_PI
32 
33 namespace tesseract {
34 
35 // Center of randomizing operations.
36 const int kRandomizingCenter = 128;
37 
38 // Randomizing factors.
39 const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {6, 3, -3, -6, 0};
40 const double TrainingSample::kScaleValues[kSampleScaleSize] = {1.0625, 0.9375, 1.0};
41 
43  delete[] features_;
44  delete[] micro_features_;
45 }
46 
47 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
48 // members, which is mostly the mapped features, and the weight.
49 // It is assumed these can all be reconstructed from what is saved.
50 // Writes to the given file. Returns false in case of error.
51 bool TrainingSample::Serialize(FILE *fp) const {
52  if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) {
53  return false;
54  }
55  if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) {
56  return false;
57  }
58  if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) {
59  return false;
60  }
61  if (!bounding_box_.Serialize(fp)) {
62  return false;
63  }
64  if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) {
65  return false;
66  }
67  if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) {
68  return false;
69  }
70  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) {
71  return false;
72  }
73  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) {
74  return false;
75  }
76  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) !=
77  num_micro_features_) {
78  return false;
79  }
80  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) {
81  return false;
82  }
83  if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) {
84  return false;
85  }
86  return true;
87 }
88 
89 // Creates from the given file. Returns nullptr in case of error.
90 // If swap is true, assumes a big/little-endian swap is needed.
92  auto *sample = new TrainingSample;
93  if (sample->DeSerialize(swap, fp)) {
94  return sample;
95  }
96  delete sample;
97  return nullptr;
98 }
99 
100 // Reads from the given file. Returns false in case of error.
101 // If swap is true, assumes a big/little-endian swap is needed.
102 bool TrainingSample::DeSerialize(bool swap, FILE *fp) {
103  if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) {
104  return false;
105  }
106  if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) {
107  return false;
108  }
109  if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) {
110  return false;
111  }
112  if (!bounding_box_.DeSerialize(swap, fp)) {
113  return false;
114  }
115  if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) {
116  return false;
117  }
118  if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) {
119  return false;
120  }
121  if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) {
122  return false;
123  }
124  if (swap) {
125  ReverseN(&class_id_, sizeof(class_id_));
126  ReverseN(&num_features_, sizeof(num_features_));
127  ReverseN(&num_micro_features_, sizeof(num_micro_features_));
128  ReverseN(&outline_length_, sizeof(outline_length_));
129  }
130  // Arbitrarily limit the number of elements to protect against bad data.
131  if (num_features_ > UINT16_MAX) {
132  return false;
133  }
134  if (num_micro_features_ > UINT16_MAX) {
135  return false;
136  }
137  delete[] features_;
138  features_ = new INT_FEATURE_STRUCT[num_features_];
139  if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) {
140  return false;
141  }
142  delete[] micro_features_;
143  micro_features_ = new MicroFeature[num_micro_features_];
144  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) !=
145  num_micro_features_) {
146  return false;
147  }
148  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) {
149  return false;
150  }
151  if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) {
152  return false;
153  }
154  return true;
155 }
156 
157 // Saves the given features into a TrainingSample.
159  const TBOX &bounding_box,
160  const INT_FEATURE_STRUCT *features,
161  int num_features) {
162  auto *sample = new TrainingSample;
163  sample->num_features_ = num_features;
164  sample->features_ = new INT_FEATURE_STRUCT[num_features];
165  sample->outline_length_ = fx_info.Length;
166  memcpy(sample->features_, features, num_features * sizeof(features[0]));
167  sample->geo_feature_[GeoBottom] = bounding_box.bottom();
168  sample->geo_feature_[GeoTop] = bounding_box.top();
169  sample->geo_feature_[GeoWidth] = bounding_box.width();
170 
171  // Generate the cn_feature_ from the fx_info.
172  sample->cn_feature_[CharNormY] = MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
173  sample->cn_feature_[CharNormLength] = MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
174  sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
175  sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
176 
177  sample->features_are_indexed_ = false;
178  sample->features_are_mapped_ = false;
179  return sample;
180 }
181 
182 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
184  auto feature = new FEATURE_STRUCT(&CharNormDesc);
185  for (int i = 0; i < kNumCNParams; ++i) {
186  feature->Params[i] = cn_feature_[i];
187  }
188  return feature;
189 }
190 
191 // Constructs and returns a copy randomized by the method given by
192 // the randomizer index. If index is out of [0, kSampleRandomSize) then
193 // an exact copy is returned.
195  TrainingSample *sample = Copy();
196  if (index >= 0 && index < kSampleRandomSize) {
197  ++index; // Remove the first combination.
198  const int yshift = kYShiftValues[index / kSampleScaleSize];
199  double scaling = kScaleValues[index % kSampleScaleSize];
200  for (uint32_t i = 0; i < num_features_; ++i) {
201  double result = (features_[i].X - kRandomizingCenter) * scaling;
202  result += kRandomizingCenter;
203  sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
204  result = (features_[i].Y - kRandomizingCenter) * scaling;
205  result += kRandomizingCenter + yshift;
206  sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
207  }
208  }
209  return sample;
210 }
211 
212 // Constructs and returns an exact copy.
214  auto *sample = new TrainingSample;
215  sample->class_id_ = class_id_;
216  sample->font_id_ = font_id_;
217  sample->weight_ = weight_;
218  sample->sample_index_ = sample_index_;
219  sample->num_features_ = num_features_;
220  if (num_features_ > 0) {
221  sample->features_ = new INT_FEATURE_STRUCT[num_features_];
222  memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
223  }
224  sample->num_micro_features_ = num_micro_features_;
225  if (num_micro_features_ > 0) {
226  sample->micro_features_ = new MicroFeature[num_micro_features_];
227  memcpy(sample->micro_features_, micro_features_,
228  num_micro_features_ * sizeof(micro_features_[0]));
229  }
230  memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
231  memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
232  return sample;
233 }
234 
235 // Extracts the needed information from the CHAR_DESC_STRUCT.
236 void TrainingSample::ExtractCharDesc(int int_feature_type, int micro_type, int cn_type,
237  int geo_type, CHAR_DESC_STRUCT *char_desc) {
238  // Extract the INT features.
239  delete[] features_;
240  FEATURE_SET_STRUCT *char_features = char_desc->FeatureSets[int_feature_type];
241  if (char_features == nullptr) {
242  tprintf("Error: no features to train on of type %s\n", kIntFeatureType);
243  num_features_ = 0;
244  features_ = nullptr;
245  } else {
246  num_features_ = char_features->NumFeatures;
247  features_ = new INT_FEATURE_STRUCT[num_features_];
248  for (uint32_t f = 0; f < num_features_; ++f) {
249  features_[f].X = static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
250  features_[f].Y = static_cast<uint8_t>(char_features->Features[f]->Params[IntY]);
251  features_[f].Theta = static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]);
252  features_[f].CP_misses = 0;
253  }
254  }
255  // Extract the Micro features.
256  delete[] micro_features_;
257  char_features = char_desc->FeatureSets[micro_type];
258  if (char_features == nullptr) {
259  tprintf("Error: no features to train on of type %s\n", kMicroFeatureType);
260  num_micro_features_ = 0;
261  micro_features_ = nullptr;
262  } else {
263  num_micro_features_ = char_features->NumFeatures;
264  micro_features_ = new MicroFeature[num_micro_features_];
265  for (uint32_t f = 0; f < num_micro_features_; ++f) {
266  for (int d = 0; d < (int)MicroFeatureParameter::MFCount; ++d) {
267  micro_features_[f][d] = char_features->Features[f]->Params[d];
268  }
269  }
270  }
271  // Extract the CN feature.
272  char_features = char_desc->FeatureSets[cn_type];
273  if (char_features == nullptr) {
274  tprintf("Error: no CN feature to train on.\n");
275  } else {
276  ASSERT_HOST(char_features->NumFeatures == 1);
277  cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
278  cn_feature_[CharNormLength] = char_features->Features[0]->Params[CharNormLength];
279  cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
280  cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
281  }
282  // Extract the Geo feature.
283  char_features = char_desc->FeatureSets[geo_type];
284  if (char_features == nullptr) {
285  tprintf("Error: no Geo feature to train on.\n");
286  } else {
287  ASSERT_HOST(char_features->NumFeatures == 1);
288  geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
289  geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
290  geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
291  }
292  features_are_indexed_ = false;
293  features_are_mapped_ = false;
294 }
295 
296 // Sets the mapped_features_ from the features_ using the provided
297 // feature_space to the indexed versions of the features.
298 void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) {
299  std::vector<int> indexed_features;
300  feature_space.IndexAndSortFeatures(features_, num_features_, &mapped_features_);
301  features_are_indexed_ = true;
302  features_are_mapped_ = false;
303 }
304 
305 // Returns a pix representing the sample. (Int features only.)
306 Image TrainingSample::RenderToPix(const UNICHARSET *unicharset) const {
307  Image pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
308  for (uint32_t f = 0; f < num_features_; ++f) {
309  int start_x = features_[f].X;
310  int start_y = kIntFeatureExtent - features_[f].Y;
311  double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
312  double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
313  for (int i = 0; i <= 5; ++i) {
314  int x = static_cast<int>(start_x + dx * i);
315  int y = static_cast<int>(start_y + dy * i);
316  if (x >= 0 && x < 256 && y >= 0 && y < 256) {
317  pixSetPixel(pix, x, y, 1);
318  }
319  }
320  }
321  if (unicharset != nullptr) {
322  pixSetText(pix, unicharset->id_to_unichar(class_id_));
323  }
324  return pix;
325 }
326 
327 #ifndef GRAPHICS_DISABLED
328 
329 // Displays the features in the given window with the given color.
331  for (uint32_t f = 0; f < num_features_; ++f) {
332  RenderIntFeature(window, &features_[f], color);
333  }
334 }
335 
336 #endif // !GRAPHICS_DISABLED
337 
338 // Returns a pix of the original sample image. The pix is padded all round
339 // by padding wherever possible.
340 // The returned Pix must be pixDestroyed after use.
341 // If the input page_pix is nullptr, nullptr is returned.
342 Image TrainingSample::GetSamplePix(int padding, Image page_pix) const {
343  if (page_pix == nullptr) {
344  return nullptr;
345  }
346  int page_width = pixGetWidth(page_pix);
347  int page_height = pixGetHeight(page_pix);
348  TBOX padded_box = bounding_box();
349  padded_box.pad(padding, padding);
350  // Clip the padded_box to the limits of the page
351  TBOX page_box(0, 0, page_width, page_height);
352  padded_box &= page_box;
353  Box *box =
354  boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height());
355  Image sample_pix = pixClipRectangle(page_pix, box, nullptr);
356  boxDestroy(&box);
357  return sample_pix;
358 }
359 
360 } // namespace tesseract
#define ASSERT_HOST(x)
Definition: errcode.h:59
const int kIntFeatureExtent
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:189
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1500
const float MF_SCALE_FACTOR
Definition: mfoutline.h:61
const char *const kIntFeatureType
Definition: featdefs.cpp:35
@ GeoCount
Definition: picofeat.h:40
@ GeoTop
Definition: picofeat.h:37
@ GeoWidth
Definition: picofeat.h:38
@ GeoBottom
Definition: picofeat.h:36
const int kRandomizingCenter
@ CharNormLength
Definition: normfeat.h:30
@ CharNormRy
Definition: normfeat.h:30
@ CharNormY
Definition: normfeat.h:30
@ CharNormRx
Definition: normfeat.h:30
std::array< float,(int) MicroFeatureParameter::MFCount > MicroFeature
Definition: mfdefs.h:36
@ IntDir
Definition: picofeat.h:31
const FEATURE_DESC_STRUCT CharNormDesc
const char *const kMicroFeatureType
Definition: featdefs.cpp:33
const int kBlnBaselineOffset
Definition: normalis.h:34
TDimension left() const
Definition: rect.h:82
TDimension height() const
Definition: rect.h:118
TDimension width() const
Definition: rect.h:126
bool Serialize(FILE *fp) const
Definition: rect.cpp:187
bool DeSerialize(bool swap, FILE *fp)
Definition: rect.cpp:198
TDimension top() const
Definition: rect.h:68
TDimension bottom() const
Definition: rect.h:75
void pad(int xpad, int ypad)
Definition: rect.h:144
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:279
std::array< FEATURE_SET_STRUCT *, NUM_FEATURE_TYPES > FeatureSets
Definition: featdefs.h:63
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, std::vector< int > *sorted_features) const
std::vector< FEATURE_STRUCT * > Features
Definition: ocrfeatures.h:85
bool DeSerialize(bool swap, FILE *fp)
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
Image GetSamplePix(int padding, Image page_pix) const
TrainingSample * RandomizedCopy(int index) const
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
uint32_t num_features() const
FEATURE_STRUCT * GetCNFeature() const
void IndexFeatures(const IntFeatureSpace &feature_space)
const TBOX & bounding_box() const
const std::vector< int > & indexed_features() const
const INT_FEATURE_STRUCT * features() const
TrainingSample * Copy() const
Image RenderToPix(const UNICHARSET *unicharset) const
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
std::vector< int > mapped_features_
bool Serialize(FILE *fp) const
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)