tesseract  5.0.0
networkio.cpp
Go to the documentation of this file.
1 // File: networkio.cpp
3 // Description: Network input/output data, allowing float/int implementations.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2014, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 
18 #include "networkio.h"
19 #include <cfloat> // for FLT_MAX
20 #include <cmath>
21 
22 #include <allheaders.h>
23 #include "functions.h"
24 #include "statistc.h"
25 #include "tprintf.h"
26 
27 namespace tesseract {
28 
29 // Minimum value to output for certainty.
30 const float kMinCertainty = -20.0f;
31 // Probability corresponding to kMinCertainty.
32 const float kMinProb = std::exp(kMinCertainty);
33 
34 // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
35 void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
36  stride_map_ = StrideMap();
37  int_mode_ = int_mode;
38  if (int_mode_) {
39  i_.ResizeNoInit(width, num_features, GetPadding(num_features));
40  } else {
41  f_.ResizeNoInit(width, num_features);
42  }
43 }
44 
45 // Resizes to a specific stride_map.
46 void NetworkIO::ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features) {
47  // If this method crashes with this == nullptr,
48  // it most likely got here through an uninitialized scratch element,
49  // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!!
50  stride_map_ = stride_map;
51  int_mode_ = int_mode;
52  if (int_mode_) {
53  i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));
54  } else {
55  f_.ResizeNoInit(stride_map.Width(), num_features);
56  }
58 }
59 
60 // Shrinks image size by x_scale,y_scale, and use given number of features.
61 void NetworkIO::ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features) {
62  StrideMap stride_map = src.stride_map_;
63  stride_map.ScaleXY(x_scale, y_scale);
64  ResizeToMap(src.int_mode_, stride_map, num_features);
65 }
66 
67 // Resizes to just 1 x-coord, whatever the input.
68 void NetworkIO::ResizeXTo1(const NetworkIO &src, int num_features) {
69  StrideMap stride_map = src.stride_map_;
71  ResizeToMap(src.int_mode_, stride_map, num_features);
72 }
73 
74 // Initialize all the array to zero.
76  int width = Width();
77  // Zero out the everything. Column-by-column in case it is aligned.
78  for (int t = 0; t < width; ++t) {
79  ZeroTimeStep(t);
80  }
81 }
82 
83 // Initializes to zero all elements of the array that do not correspond to
84 // valid image positions. (If a batch of different-sized images are packed
85 // together, then there will be padding pixels.)
87  int num_features = NumFeatures();
88  int full_width = stride_map_.Size(FD_WIDTH);
89  int full_height = stride_map_.Size(FD_HEIGHT);
90  StrideMap::Index b_index(stride_map_);
91  do {
92  int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
93  if (end_x < full_width) {
94  // The width is small, so fill for every valid y.
95  StrideMap::Index y_index(b_index);
96  int fill_size = num_features * (full_width - end_x);
97  do {
98  StrideMap::Index z_index(y_index);
99  z_index.AddOffset(end_x, FD_WIDTH);
100  if (int_mode_) {
101  ZeroVector(fill_size, i_[z_index.t()]);
102  } else {
103  ZeroVector(fill_size, f_[z_index.t()]);
104  }
105  } while (y_index.AddOffset(1, FD_HEIGHT));
106  }
107  int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;
108  if (end_y < full_height) {
109  // The height is small, so fill in the space in one go.
110  StrideMap::Index y_index(b_index);
111  y_index.AddOffset(end_y, FD_HEIGHT);
112  int fill_size = num_features * full_width * (full_height - end_y);
113  if (int_mode_) {
114  ZeroVector(fill_size, i_[y_index.t()]);
115  } else {
116  ZeroVector(fill_size, f_[y_index.t()]);
117  }
118  }
119  } while (b_index.AddOffset(1, FD_BATCH));
120 }
121 
122 // Helper computes a black point and white point to contrast-enhance an image.
123 // The computation is based on the assumption that the image is of a single line
124 // of text, so a horizontal line through the middle of the image passes through
125 // at least some of it, so local minima and maxima are a good proxy for black
126 // and white pixel samples.
127 static void ComputeBlackWhite(Image pix, float *black, float *white) {
128  int width = pixGetWidth(pix);
129  int height = pixGetHeight(pix);
130  STATS mins(0, 256), maxes(0, 256);
131  if (width >= 3) {
132  int y = height / 2;
133  l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;
134  int prev = GET_DATA_BYTE(line, 0);
135  int curr = GET_DATA_BYTE(line, 1);
136  for (int x = 1; x + 1 < width; ++x) {
137  int next = GET_DATA_BYTE(line, x + 1);
138  if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
139  // Local minimum.
140  mins.add(curr, 1);
141  }
142  if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
143  // Local maximum.
144  maxes.add(curr, 1);
145  }
146  prev = curr;
147  curr = next;
148  }
149  }
150  if (mins.get_total() == 0) {
151  mins.add(0, 1);
152  }
153  if (maxes.get_total() == 0) {
154  maxes.add(255, 1);
155  }
156  *black = mins.ile(0.25);
157  *white = maxes.ile(0.75);
158 }
159 
160 // Sets up the array from the given image, using the currently set int_mode_.
161 // If the image width doesn't match the shape, the image is truncated or padded
162 // with noise to match.
163 void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) {
164  std::vector<Image> pixes(1, pix);
165  FromPixes(shape, pixes, randomizer);
166 }
167 
168 // Sets up the array from the given set of images, using the currently set
169 // int_mode_. If the image width doesn't match the shape, the images are
170 // truncated or padded with noise to match.
171 void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
172  TRand *randomizer) {
173  int target_height = shape.height();
174  int target_width = shape.width();
175  std::vector<std::pair<int, int>> h_w_pairs;
176  for (auto pix : pixes) {
177  Image var_pix = pix;
178  int width = pixGetWidth(var_pix);
179  if (target_width != 0) {
180  width = target_width;
181  }
182  int height = pixGetHeight(var_pix);
183  if (target_height != 0) {
184  height = target_height;
185  }
186  h_w_pairs.emplace_back(height, width);
187  }
188  stride_map_.SetStride(h_w_pairs);
189  ResizeToMap(int_mode(), stride_map_, shape.depth());
190  // Iterate over the images again to copy the data.
191  for (size_t b = 0; b < pixes.size(); ++b) {
192  Image pix = pixes[b];
193  float black = 0.0f, white = 255.0f;
194  if (shape.depth() != 3) {
195  ComputeBlackWhite(pix, &black, &white);
196  }
197  float contrast = (white - black) / 2.0f;
198  if (contrast <= 0.0f) {
199  contrast = 1.0f;
200  }
201  if (shape.height() == 1) {
202  Copy1DGreyImage(b, pix, black, contrast, randomizer);
203  } else {
204  Copy2DImage(b, pix, black, contrast, randomizer);
205  }
206  }
207 }
208 
209 // Copies the given pix to *this at the given batch index, stretching and
210 // clipping the pixel values so that [black, black + 2*contrast] maps to the
211 // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
212 // This is a 2-d operation in the sense that the output depth is the number
213 // of input channels, the height is the height of the image, and the width
214 // is the width of the image, or truncated/padded with noise if the width
215 // is a fixed size.
216 void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) {
217  int width = pixGetWidth(pix);
218  int height = pixGetHeight(pix);
219  int wpl = pixGetWpl(pix);
220  StrideMap::Index index(stride_map_);
221  index.AddOffset(batch, FD_BATCH);
222  int t = index.t();
223  int target_height = stride_map_.Size(FD_HEIGHT);
224  int target_width = stride_map_.Size(FD_WIDTH);
225  int num_features = NumFeatures();
226  bool color = num_features == 3;
227  if (width > target_width) {
228  width = target_width;
229  }
230  uint32_t *line = pixGetData(pix);
231  for (int y = 0; y < target_height; ++y, line += wpl) {
232  int x = 0;
233  if (y < height) {
234  for (x = 0; x < width; ++x, ++t) {
235  if (color) {
236  int f = 0;
237  for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
238  int pixel = GET_DATA_BYTE(line + x, c);
239  SetPixel(t, f++, pixel, black, contrast);
240  }
241  } else {
242  int pixel = GET_DATA_BYTE(line, x);
243  SetPixel(t, 0, pixel, black, contrast);
244  }
245  }
246  }
247  for (; x < target_width; ++x) {
248  Randomize(t++, 0, num_features, randomizer);
249  }
250  }
251 }
252 
253 // Copies the given pix to *this at the given batch index, as Copy2DImage
254 // above, except that the output depth is the height of the input image, the
255 // output height is 1, and the output width as for Copy2DImage.
256 // The image is thus treated as a 1-d set of vertical pixel strips.
257 void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast,
258  TRand *randomizer) {
259  int width = pixGetWidth(pix);
260  int height = pixGetHeight(pix);
261  ASSERT_HOST(height == NumFeatures());
262  int wpl = pixGetWpl(pix);
263  StrideMap::Index index(stride_map_);
264  index.AddOffset(batch, FD_BATCH);
265  int t = index.t();
266  int target_width = stride_map_.Size(FD_WIDTH);
267  if (width > target_width) {
268  width = target_width;
269  }
270  int x;
271  for (x = 0; x < width; ++x, ++t) {
272  for (int y = 0; y < height; ++y) {
273  uint32_t *line = pixGetData(pix) + wpl * y;
274  int pixel = GET_DATA_BYTE(line, x);
275  SetPixel(t, y, pixel, black, contrast);
276  }
277  }
278  for (; x < target_width; ++x) {
279  Randomize(t++, 0, height, randomizer);
280  }
281 }
282 
283 // Helper stores the pixel value in i_ or f_ according to int_mode_.
284 // t: is the index from the StrideMap corresponding to the current
285 // [batch,y,x] position
286 // f: is the index into the depth/channel
287 // pixel: the value of the pixel from the image (in one channel)
288 // black: the pixel value to map to the lowest of the range of *this
289 // contrast: the range of pixel values to stretch to half the range of *this.
290 void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
291  float float_pixel = (pixel - black) / contrast - 1.0f;
292  if (int_mode_) {
293  i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);
294  } else {
295  f_[t][f] = float_pixel;
296  }
297 }
298 
299 // Converts the array to a Pix. Must be pixDestroyed after use.
301  // Count the width of the image, and find the max multiplication factor.
302  int im_width = stride_map_.Size(FD_WIDTH);
303  int im_height = stride_map_.Size(FD_HEIGHT);
304  int num_features = NumFeatures();
305  int feature_factor = 1;
306  if (num_features == 3) {
307  // Special hack for color.
308  num_features = 1;
309  feature_factor = 3;
310  }
311  Image pix = pixCreate(im_width, im_height * num_features, 32);
312  StrideMap::Index index(stride_map_);
313  do {
314  int im_x = index.index(FD_WIDTH);
315  int top_im_y = index.index(FD_HEIGHT);
316  int im_y = top_im_y;
317  int t = index.t();
318  if (int_mode_) {
319  const int8_t *features = i_[t];
320  for (int y = 0; y < num_features; ++y, im_y += im_height) {
321  int pixel = features[y * feature_factor];
322  // 1 or 2 features use greyscale.
323  int red = ClipToRange<int>(pixel + 128, 0, 255);
324  int green = red, blue = red;
325  if (feature_factor == 3) {
326  // With 3 features assume RGB color.
327  green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
328  blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
329  } else if (num_features > 3) {
330  // More than 3 features use false yellow/blue color, assuming a signed
331  // input in the range [-1,1].
332  red = abs(pixel) * 2;
333  if (pixel >= 0) {
334  green = red;
335  blue = 0;
336  } else {
337  blue = red;
338  green = red = 0;
339  }
340  }
341  pixSetPixel(pix, im_x, im_y,
342  (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
343  }
344  } else {
345  const float *features = f_[t];
346  for (int y = 0; y < num_features; ++y, im_y += im_height) {
347  float pixel = features[y * feature_factor];
348  // 1 or 2 features use greyscale.
349  int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
350  int green = red, blue = red;
351  if (feature_factor == 3) {
352  // With 3 features assume RGB color.
353  pixel = features[y * feature_factor + 1];
354  green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
355  pixel = features[y * feature_factor + 2];
356  blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
357  } else if (num_features > 3) {
358  // More than 3 features use false yellow/blue color, assuming a signed
359  // input in the range [-1,1].
360  red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255);
361  if (pixel >= 0) {
362  green = red;
363  blue = 0;
364  } else {
365  blue = red;
366  green = red = 0;
367  }
368  }
369  pixSetPixel(pix, im_x, im_y,
370  (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));
371  }
372  }
373  } while (index.Increment());
374  return pix;
375 }
376 
377 // Prints the first and last num timesteps of the array for each feature.
378 void NetworkIO::Print(int num) const {
379  int num_features = NumFeatures();
380  for (int y = 0; y < num_features; ++y) {
381  for (int t = 0; t < Width(); ++t) {
382  if (num == 0 || t < num || t + num >= Width()) {
383  if (int_mode_) {
384  tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
385  } else {
386  tprintf(" %g", f_[t][y]);
387  }
388  }
389  }
390  tprintf("\n");
391  }
392 }
393 
394 // Copies a single time step from src.
395 void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t) {
396  ASSERT_HOST(int_mode_ == src.int_mode_);
397  if (int_mode_) {
398  memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));
399  } else {
400  memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));
401  }
402 }
403 
404 // Copies a part of single time step from src.
405 void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features,
406  const NetworkIO &src, int src_t, int src_offset) {
407  ASSERT_HOST(int_mode_ == src.int_mode_);
408  if (int_mode_) {
409  memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features * sizeof(i_[0][0]));
410  } else {
411  memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features * sizeof(f_[0][0]));
412  }
413 }
414 
415 // Zeroes a single time step.
416 void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
417  if (int_mode_) {
418  ZeroVector(num_features, i_[t] + offset);
419  } else {
420  ZeroVector(num_features, f_[t] + offset);
421  }
422 }
423 
424 // Sets the given range to random values.
425 void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) {
426  if (int_mode_) {
427  int8_t *line = i_[t] + offset;
428  for (int i = 0; i < num_features; ++i) {
429  line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX));
430  }
431  } else {
432  // float mode.
433  float *line = f_[t] + offset;
434  for (int i = 0; i < num_features; ++i) {
435  line[i] = randomizer->SignedRand(1.0);
436  }
437  }
438 }
439 
440 // Helper returns the label and score of the best choice over a range.
441 int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating,
442  float *certainty) const {
443  if (t_end <= t_start) {
444  return -1;
445  }
446  int max_char = -1;
447  float min_score = 0.0f;
448  for (int c = 0; c < NumFeatures(); ++c) {
449  if (c == not_this || c == null_ch) {
450  continue;
451  }
452  ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);
453  if (max_char < 0 || *rating < min_score) {
454  min_score = *rating;
455  max_char = c;
456  }
457  }
458  ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);
459  return max_char;
460 }
461 
462 // Helper returns the rating and certainty of the choice over a range in output.
463 void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating,
464  float *certainty) const {
465  ASSERT_HOST(!int_mode_);
466  *rating = 0.0f;
467  *certainty = 0.0f;
468  if (t_end <= t_start || t_end <= 0) {
469  return;
470  }
471  float ratings[3] = {0.0f, 0.0f, 0.0f};
472  float certs[3] = {0.0f, 0.0f, 0.0f};
473  for (int t = t_start; t < t_end; ++t) {
474  const float *line = f_[t];
475  float score = ProbToCertainty(line[choice]);
476  float zero = ProbToCertainty(line[null_ch]);
477  if (t == t_start) {
478  ratings[2] = FLT_MAX;
479  ratings[1] = -score;
480  certs[1] = score;
481  } else {
482  for (int i = 2; i >= 1; --i) {
483  if (ratings[i] > ratings[i - 1]) {
484  ratings[i] = ratings[i - 1];
485  certs[i] = certs[i - 1];
486  }
487  }
488  ratings[2] -= zero;
489  if (zero < certs[2]) {
490  certs[2] = zero;
491  }
492  ratings[1] -= score;
493  if (score < certs[1]) {
494  certs[1] = score;
495  }
496  }
497  ratings[0] -= zero;
498  if (zero < certs[0]) {
499  certs[0] = zero;
500  }
501  }
502  int best_i = ratings[2] < ratings[1] ? 2 : 1;
503  *rating = ratings[best_i] + t_end - t_start;
504  *certainty = certs[best_i];
505 }
506 
507 // Returns the index (label) of the best value at the given timestep,
508 // excluding not_this and not_that, and if not null, sets the score to the
509 // log of the corresponding value.
510 int NetworkIO::BestLabel(int t, int not_this, int not_that, float *score) const {
511  ASSERT_HOST(!int_mode_);
512  int best_index = -1;
513  float best_score = -FLT_MAX;
514  const float *line = f_[t];
515  for (int i = 0; i < f_.dim2(); ++i) {
516  if (line[i] > best_score && i != not_this && i != not_that) {
517  best_score = line[i];
518  best_index = i;
519  }
520  }
521  if (score != nullptr) {
522  *score = ProbToCertainty(best_score);
523  }
524  return best_index;
525 }
526 
527 // Returns the best start position out of [start, end) (into which all labels
528 // must fit) to obtain the highest cumulative score for the given labels.
529 int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const {
530  int length = labels.size();
531  int last_start = end - length;
532  int best_start = -1;
533  TFloat best_score = 0;
534  for (int s = start; s <= last_start; ++s) {
535  TFloat score = ScoreOfLabels(labels, s);
536  if (score > best_score || best_start < 0) {
537  best_score = score;
538  best_start = s;
539  }
540  }
541  return best_start;
542 }
543 
544 // Returns the cumulative score of the given labels starting at start, and
545 // using one label per time-step.
546 TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
547  int length = labels.size();
548  TFloat score = 0;
549  for (int i = 0; i < length; ++i) {
550  score += f_(start + i, labels[i]);
551  }
552  return score;
553 }
554 
555 // Helper function sets all the outputs for a single timestep, such that
556 // label has value ok_score, and the other labels share 1 - ok_score.
557 void NetworkIO::SetActivations(int t, int label, float ok_score) {
558  ASSERT_HOST(!int_mode_);
559  int num_classes = NumFeatures();
560  float bad_score = (1.0f - ok_score) / (num_classes - 1);
561  float *targets = f_[t];
562  for (int i = 0; i < num_classes; ++i) {
563  targets[i] = bad_score;
564  }
565  targets[label] = ok_score;
566 }
567 
568 // Modifies the values, only if needed, so that the given label is
569 // the winner at the given time step t.
570 void NetworkIO::EnsureBestLabel(int t, int label) {
571  ASSERT_HOST(!int_mode_);
572  if (BestLabel(t, nullptr) != label) {
573  // Output value needs enhancing. Third all the other elements and add the
574  // remainder to best_label.
575  int num_classes = NumFeatures();
576  float *targets = f_[t];
577  for (int c = 0; c < num_classes; ++c) {
578  if (c == label) {
579  targets[c] += (1.0 - targets[c]) * (2 / 3.0);
580  } else {
581  targets[c] /= 3.0;
582  }
583  }
584  }
585 }
586 
587 // Helper function converts prob to certainty taking the minimum into account.
588 /* static */
589 float NetworkIO::ProbToCertainty(float prob) {
590  return prob > kMinProb ? std::log(prob) : kMinCertainty;
591 }
592 
593 // Returns true if there is any bad value that is suspiciously like a GT
594 // error. Assuming that *this is the difference(gradient) between target
595 // and forward output, returns true if there is a large negative value
596 // (correcting a very confident output) for which there is no corresponding
597 // positive value in an adjacent timestep for the same feature index. This
598 // allows the box-truthed samples to make fine adjustments to position while
599 // stopping other disagreements of confident output with ground truth.
600 bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
601  int num_features = NumFeatures();
602  for (int t = 0; t < Width(); ++t) {
603  const float *features = f_[t];
604  for (int y = 0; y < num_features; ++y) {
605  float grad = features[y];
606  if (grad < -confidence_thr) {
607  // Correcting strong output. Check for movement.
608  if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
609  (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) {
610  return true; // No strong positive on either side.
611  }
612  }
613  }
614  }
615  return false;
616 }
617 
618 // Reads a single timestep to floats in the range [-1, 1].
619 void NetworkIO::ReadTimeStep(int t, TFloat *output) const {
620  if (int_mode_) {
621  const int8_t *line = i_[t];
622  for (int i = 0; i < i_.dim2(); ++i) {
623  output[i] = static_cast<TFloat>(line[i]) / INT8_MAX;
624  }
625  } else {
626  const float *line = f_[t];
627  for (int i = 0; i < f_.dim2(); ++i) {
628  output[i] = static_cast<TFloat>(line[i]);
629  }
630  }
631 }
632 
633 // Adds a single timestep to floats.
634 void NetworkIO::AddTimeStep(int t, TFloat *inout) const {
635  int num_features = NumFeatures();
636  if (int_mode_) {
637  const int8_t *line = i_[t];
638  for (int i = 0; i < num_features; ++i) {
639  inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX;
640  }
641  } else {
642  const float *line = f_[t];
643  for (int i = 0; i < num_features; ++i) {
644  inout[i] += line[i];
645  }
646  }
647 }
648 
649 // Adds part of a single timestep to floats.
650 void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inout) const {
651  if (int_mode_) {
652  const int8_t *line = i_[t] + offset;
653  for (int i = 0; i < num_features; ++i) {
654  inout[i] += static_cast<float>(line[i]) / INT8_MAX;
655  }
656  } else {
657  const float *line = f_[t] + offset;
658  for (int i = 0; i < num_features; ++i) {
659  inout[i] += line[i];
660  }
661  }
662 }
663 
664 // Writes a single timestep from floats in the range [-1, 1].
665 void NetworkIO::WriteTimeStep(int t, const TFloat *input) {
666  WriteTimeStepPart(t, 0, NumFeatures(), input);
667 }
668 
669 // Writes a single timestep from floats in the range [-1, 1] writing only
670 // num_features elements of input to (*this)[t], starting at offset.
671 void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) {
672  if (int_mode_) {
673  int8_t *line = i_[t] + offset;
674  for (int i = 0; i < num_features; ++i) {
675  line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX), -INT8_MAX, INT8_MAX);
676  }
677  } else {
678  float *line = f_[t] + offset;
679  for (int i = 0; i < num_features; ++i) {
680  line[i] = static_cast<float>(input[i]);
681  }
682  }
683 }
684 
685 // Maxpools a single time step from src.
686 void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line) {
687  ASSERT_HOST(int_mode_ == src.int_mode_);
688  if (int_mode_) {
689  int dim = i_.dim2();
690  int8_t *dest_line = i_[dest_t];
691  const int8_t *src_line = src.i_[src_t];
692  for (int i = 0; i < dim; ++i) {
693  if (dest_line[i] < src_line[i]) {
694  dest_line[i] = src_line[i];
695  max_line[i] = src_t;
696  }
697  }
698  } else {
699  int dim = f_.dim2();
700  float *dest_line = f_[dest_t];
701  const float *src_line = src.f_[src_t];
702  for (int i = 0; i < dim; ++i) {
703  if (dest_line[i] < src_line[i]) {
704  dest_line[i] = src_line[i];
705  max_line[i] = src_t;
706  }
707  }
708  }
709 }
710 
711 // Runs maxpool backward, using maxes to index timesteps in *this.
713  ASSERT_HOST(!int_mode_);
714  Zero();
715  StrideMap::Index index(fwd.stride_map_);
716  do {
717  int t = index.t();
718  const int *max_line = maxes[t];
719  const float *fwd_line = fwd.f_[t];
720  int num_features = fwd.f_.dim2();
721  for (int i = 0; i < num_features; ++i) {
722  f_[max_line[i]][i] = fwd_line[i];
723  }
724  } while (index.Increment());
725 }
726 
727 // Returns the min over time of the maxes over features of the outputs.
728 float NetworkIO::MinOfMaxes() const {
729  float min_max = 0.0f;
730  int width = Width();
731  int num_features = NumFeatures();
732  for (int t = 0; t < width; ++t) {
733  float max_value = -FLT_MAX;
734  if (int_mode_) {
735  const int8_t *column = i_[t];
736  for (int i = 0; i < num_features; ++i) {
737  if (column[i] > max_value) {
738  max_value = column[i];
739  }
740  }
741  } else {
742  const float *column = f_[t];
743  for (int i = 0; i < num_features; ++i) {
744  if (column[i] > max_value) {
745  max_value = column[i];
746  }
747  }
748  }
749  if (t == 0 || max_value < min_max) {
750  min_max = max_value;
751  }
752  }
753  return min_max;
754 }
755 
756 // Computes combined results for a combiner that chooses between an existing
757 // input and itself, with an additional output to indicate the choice.
758 void NetworkIO::CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output) {
759  int no = base_output.NumFeatures();
760  ASSERT_HOST(combiner_output.NumFeatures() == no + 1);
761  Resize(base_output, no);
762  int width = Width();
763  if (int_mode_) {
764  // Number of outputs from base and final result.
765  for (int t = 0; t < width; ++t) {
766  int8_t *out_line = i_[t];
767  const int8_t *base_line = base_output.i_[t];
768  const int8_t *comb_line = combiner_output.i_[t];
769  float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;
770  float boost_weight = 1.0f - base_weight;
771  for (int i = 0; i < no; ++i) {
772  out_line[i] = IntCastRounded(base_line[i] * base_weight + comb_line[i] * boost_weight);
773  }
774  }
775  } else {
776  for (int t = 0; t < width; ++t) {
777  float *out_line = f_[t];
778  const float *base_line = base_output.f_[t];
779  const float *comb_line = combiner_output.f_[t];
780  float base_weight = comb_line[no];
781  float boost_weight = 1.0f - base_weight;
782  for (int i = 0; i < no; ++i) {
783  out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;
784  }
785  }
786  }
787 }
788 
789 // Computes deltas for a combiner that chooses between 2 sets of inputs.
790 void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output) {
791  ASSERT_HOST(!int_mode_);
792  // Compute the deltas for the combiner.
793  int width = Width();
794  int no = NumFeatures() - 1;
795  ASSERT_HOST(fwd_deltas.NumFeatures() == no);
796  ASSERT_HOST(base_output.NumFeatures() == no);
797  // Number of outputs from base and final result.
798  for (int t = 0; t < width; ++t) {
799  const float *delta_line = fwd_deltas.f_[t];
800  const float *base_line = base_output.f_[t];
801  float *comb_line = f_[t];
802  float base_weight = comb_line[no];
803  float boost_weight = 1.0f - base_weight;
804  float max_base_delta = 0.0;
805  for (int i = 0; i < no; ++i) {
806  // What did the combiner actually produce?
807  float output = base_line[i] * base_weight + comb_line[i] * boost_weight;
808  // Reconstruct the target from the delta.
809  float comb_target = delta_line[i] + output;
810  comb_line[i] = comb_target - comb_line[i];
811  float base_delta = std::fabs(comb_target - base_line[i]);
812  if (base_delta > max_base_delta) {
813  max_base_delta = base_delta;
814  }
815  }
816  if (max_base_delta >= 0.5) {
817  // The base network got it wrong. The combiner should output the right
818  // answer and 0 for the base network.
819  comb_line[no] = 0.0 - base_weight;
820  } else {
821  // The base network was right. The combiner should flag that.
822  for (int i = 0; i < no; ++i) {
823  // All other targets are 0.
824  if (comb_line[i] > 0.0) {
825  comb_line[i] -= 1.0;
826  }
827  }
828  comb_line[no] = 1.0 - base_weight;
829  }
830  }
831 }
832 
833 // Copies the array checking that the types match.
834 void NetworkIO::CopyAll(const NetworkIO &src) {
835  ASSERT_HOST(src.int_mode_ == int_mode_);
836  f_ = src.f_;
837 }
838 
839 // Checks that both are floats and adds the src array to *this.
841  ASSERT_HOST(!int_mode_);
842  ASSERT_HOST(!src.int_mode_);
843  f_ += src.f_;
844 }
845 
846 // Subtracts the array from a float array. src must also be float.
848  ASSERT_HOST(!int_mode_);
849  ASSERT_HOST(!src.int_mode_);
850  f_ -= src.f_;
851 }
852 
853 // Copies src to *this, with maxabs normalization to match scale.
854 void NetworkIO::CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale) {
855  ASSERT_HOST(!int_mode_);
856  ASSERT_HOST(!src.int_mode_);
857  ASSERT_HOST(!scale.int_mode_);
858  float src_max = src.f_.MaxAbs();
859  ASSERT_HOST(std::isfinite(src_max));
860  float scale_max = scale.f_.MaxAbs();
861  ASSERT_HOST(std::isfinite(scale_max));
862  if (src_max > 0.0f) {
863  float factor = scale_max / src_max;
864  for (int t = 0; t < src.Width(); ++t) {
865  const float *src_ptr = src.f_[t];
866  float *dest_ptr = f_[t];
867  for (int i = 0; i < src.f_.dim2(); ++i) {
868  dest_ptr[i] = src_ptr[i] * factor;
869  }
870  }
871  } else {
872  f_.Clear();
873  }
874 }
875 
876 // Copies src to *this with independent reversal of the y dimension.
878  int num_features = src.NumFeatures();
879  Resize(src, num_features);
880  StrideMap::Index b_index(src.stride_map_);
881  do {
882  int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
883  StrideMap::Index fwd_index(b_index);
884  StrideMap::Index rev_index(b_index);
885  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);
886  do {
887  int fwd_t = fwd_index.t();
888  int rev_t = rev_index.t();
889  for (int x = 0; x < width; ++x) {
890  CopyTimeStepFrom(rev_t++, src, fwd_t++);
891  }
892  } while (fwd_index.AddOffset(1, FD_HEIGHT) && rev_index.AddOffset(-1, FD_HEIGHT));
893  } while (b_index.AddOffset(1, FD_BATCH));
894 }
895 
896 // Copies src to *this with independent reversal of the x dimension.
898  int num_features = src.NumFeatures();
899  Resize(src, num_features);
900  StrideMap::Index b_index(src.stride_map_);
901  do {
902  StrideMap::Index y_index(b_index);
903  do {
904  StrideMap::Index fwd_index(y_index);
905  StrideMap::Index rev_index(y_index);
906  rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);
907  do {
908  CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());
909  } while (fwd_index.AddOffset(1, FD_WIDTH) && rev_index.AddOffset(-1, FD_WIDTH));
910  } while (y_index.AddOffset(1, FD_HEIGHT));
911  } while (b_index.AddOffset(1, FD_BATCH));
912 }
913 
914 // Copies src to *this with independent transpose of the x and y dimensions.
916  int num_features = src.NumFeatures();
917  stride_map_ = src.stride_map_;
918  stride_map_.TransposeXY();
919  ResizeToMap(src.int_mode(), stride_map_, num_features);
920  StrideMap::Index src_b_index(src.stride_map_);
921  StrideMap::Index dest_b_index(stride_map_);
922  do {
923  StrideMap::Index src_y_index(src_b_index);
924  StrideMap::Index dest_x_index(dest_b_index);
925  do {
926  StrideMap::Index src_x_index(src_y_index);
927  StrideMap::Index dest_y_index(dest_x_index);
928  do {
929  CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());
930  } while (src_x_index.AddOffset(1, FD_WIDTH) && dest_y_index.AddOffset(1, FD_HEIGHT));
931  } while (src_y_index.AddOffset(1, FD_HEIGHT) && dest_x_index.AddOffset(1, FD_WIDTH));
932  } while (src_b_index.AddOffset(1, FD_BATCH) && dest_b_index.AddOffset(1, FD_BATCH));
933 }
934 
935 // Copies src to *this, at the given feature_offset, returning the total
936 // feature offset after the copy. Multiple calls will stack outputs from
937 // multiple sources in feature space.
938 int NetworkIO::CopyPacking(const NetworkIO &src, int feature_offset) {
939  ASSERT_HOST(int_mode_ == src.int_mode_);
940  int width = src.Width();
941  ASSERT_HOST(width <= Width());
942  int num_features = src.NumFeatures();
943  ASSERT_HOST(num_features + feature_offset <= NumFeatures());
944  if (int_mode_) {
945  for (int t = 0; t < width; ++t) {
946  memcpy(i_[t] + feature_offset, src.i_[t], num_features * sizeof(i_[t][0]));
947  }
948  for (int t = width; t < i_.dim1(); ++t) {
949  memset(i_[t], 0, num_features * sizeof(i_[t][0]));
950  }
951  } else {
952  for (int t = 0; t < width; ++t) {
953  memcpy(f_[t] + feature_offset, src.f_[t], num_features * sizeof(f_[t][0]));
954  }
955  for (int t = width; t < f_.dim1(); ++t) {
956  memset(f_[t], 0, num_features * sizeof(f_[t][0]));
957  }
958  }
959  return num_features + feature_offset;
960 }
961 
962 // Opposite of CopyPacking, fills *this with a part of src, starting at
963 // feature_offset, and picking num_features.
964 void NetworkIO::CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features) {
965  Resize(src, num_features);
966  int width = src.Width();
967  ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());
968  if (int_mode_) {
969  for (int t = 0; t < width; ++t) {
970  memcpy(i_[t], src.i_[t] + feature_offset, num_features * sizeof(i_[t][0]));
971  }
972  } else {
973  for (int t = 0; t < width; ++t) {
974  memcpy(f_[t], src.f_[t] + feature_offset, num_features * sizeof(f_[t][0]));
975  }
976  }
977 }
978 
979 // Transposes the float part of *this into dest.
981  int width = Width();
982  dest->ResizeNoInit(NumFeatures(), width);
983  for (int t = 0; t < width; ++t) {
984  dest->WriteStrided(t, f_[t]);
985  }
986 }
987 
988 // Clips the content of a single time-step to +/-range.
989 void NetworkIO::ClipVector(int t, float range) {
990  ASSERT_HOST(!int_mode_);
991  float *v = f_[t];
992  int dim = f_.dim2();
993  for (int i = 0; i < dim; ++i) {
994  v[i] = ClipToRange<float>(v[i], -range, range);
995  }
996 }
997 
998 // Returns the padding required for the given number of features in order
999 // for the SIMD operations to be safe.
1000 /* static */
1001 int NetworkIO::GetPadding(int num_features) {
1002  int padding = 0;
1004  padding = IntSimdMatrix::intSimdMatrix->RoundInputs(num_features) - num_features;
1005  }
1006  return padding;
1007 }
1008 
1009 } // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:59
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int IntCastRounded(double x)
Definition: helpers.h:175
const float kMinCertainty
Definition: networkio.cpp:30
double TFloat
Definition: tesstypes.h:39
@ FD_WIDTH
Definition: stridemap.h:35
@ FD_BATCH
Definition: stridemap.h:33
@ FD_HEIGHT
Definition: stridemap.h:34
void ZeroVector(int n, T *vec)
Definition: functions.h:245
const float kMinProb
Definition: networkio.cpp:32
void ResizeNoInit(int size1, int size2, int pad=0)
Definition: matrix.h:94
int RoundInputs(int size) const
Definition: intsimdmatrix.h:70
static const IntSimdMatrix * intSimdMatrix
double SignedRand(double range)
Definition: helpers.h:76
void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer)
Definition: networkio.cpp:163
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input)
Definition: networkio.cpp:671
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
Definition: networkio.cpp:790
void ResizeXTo1(const NetworkIO &src, int num_features)
Definition: networkio.cpp:68
void ZeroInvalidElements()
Definition: networkio.cpp:86
const StrideMap & stride_map() const
Definition: networkio.h:129
float MinOfMaxes() const
Definition: networkio.cpp:728
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
Definition: networkio.cpp:686
bool int_mode() const
Definition: networkio.h:123
void FromPixes(const StaticShape &shape, const std::vector< Image > &pixes, TRand *randomizer)
Definition: networkio.cpp:171
Image ToPix() const
Definition: networkio.cpp:300
const int8_t * i(int t) const
Definition: networkio.h:119
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
Definition: networkio.cpp:405
void ClipVector(int t, float range)
Definition: networkio.cpp:989
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
Definition: networkio.cpp:650
void WriteTimeStep(int t, const TFloat *input)
Definition: networkio.cpp:665
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:897
void Print(int num) const
Definition: networkio.cpp:378
float * f(int t)
Definition: networkio.h:111
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:463
static float ProbToCertainty(float prob)
Definition: networkio.cpp:589
int CopyPacking(const NetworkIO &src, int feature_offset)
Definition: networkio.cpp:938
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:915
void ReadTimeStep(int t, TFloat *output) const
Definition: networkio.cpp:619
int Width() const
Definition: networkio.h:103
void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:257
void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:216
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:35
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
Definition: networkio.cpp:712
void AddTimeStep(int t, TFloat *inout) const
Definition: networkio.cpp:634
void ZeroTimeStep(int t)
Definition: networkio.h:148
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:557
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
Definition: networkio.cpp:758
void Transpose(TransposedArray *dest) const
Definition: networkio.cpp:980
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
Definition: networkio.cpp:61
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:600
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:877
void ZeroTimeStepGeneral(int t, int offset, int num_features)
Definition: networkio.cpp:416
void Randomize(int t, int offset, int num_features, TRand *randomizer)
Definition: networkio.cpp:425
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
Definition: networkio.cpp:964
void EnsureBestLabel(int t, int label)
Definition: networkio.cpp:570
void AddAllToFloat(const NetworkIO &src)
Definition: networkio.cpp:840
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:847
TFloat ScoreOfLabels(const std::vector< int > &labels, int start) const
Definition: networkio.cpp:546
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
Definition: networkio.cpp:46
void CopyAll(const NetworkIO &src)
Definition: networkio.cpp:834
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
Definition: networkio.cpp:395
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:441
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
Definition: networkio.cpp:854
int NumFeatures() const
Definition: networkio.h:107
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:290
int PositionOfBestMatch(const std::vector< int > &labels, int start, int end) const
Definition: networkio.cpp:529
int BestLabel(int t, float *score) const
Definition: networkio.h:163
void SetStride(const std::vector< std::pair< int, int >> &h_w_pairs)
Definition: stridemap.cpp:131
int Width() const
Definition: stridemap.h:123
int Size(FlexDimensions dimension) const
Definition: stridemap.h:119
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:153
int index(FlexDimensions dimension) const
Definition: stridemap.h:59
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:67
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:46
void WriteStrided(int t, const float *data)
Definition: weightmatrix.h:40