tesseract  5.0.0
degradeimage.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: degradeimage.cpp
3  * Description: Function to degrade an image (usually of text) as if it
4  * has been printed and then scanned.
5  * Authors: Ray Smith
6  *
7  * (C) Copyright 2013, Google Inc.
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  * http://www.apache.org/licenses/LICENSE-2.0
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "degradeimage.h"
21 
22 #include <allheaders.h> // from leptonica
23 #include <cstdlib>
24 #include "helpers.h" // For TRand.
25 #include "rect.h"
26 
27 namespace tesseract {
28 
29 // A randomized perspective distortion can be applied to synthetic input.
30 // The perspective distortion comes from leptonica, which uses 2 sets of 4
31 // corners to determine the distortion. There are random values for each of
32 // the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
33 // defined in terms of a single shear value. This reduces the degrees of
34 // freedom enough to make the distortion more realistic than it would otherwise
35 // be if all 8 coordinates could move independently.
36 // One additional factor is used for the color of the pixels that don't exist
37 // in the source image.
38 // Name for each of the randomizing factors.
48  // x2 = x1 - shear
49  // x3 = x0 + shear
51 };
52 
53 // Rotation is +/- kRotationRange radians.
54 const float kRotationRange = 0.02f;
55 // Number of grey levels to shift by for each exposure step.
56 const int kExposureFactor = 16;
57 // Salt and pepper noise is +/- kSaltnPepper.
58 const int kSaltnPepper = 5;
59 // Min sum of width + height on which to operate the ramp.
60 const int kMinRampSize = 1000;
61 
62 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0
63 // corresponding to darkening on the copier and <0 lighter and 0 not copied.
64 // Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
65 // If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the
66 // pix is rotated by *rotation else it is randomly rotated and *rotation is
67 // modified.
68 //
69 // HOW IT WORKS:
70 // Most of the process is really dictated by the fact that the minimum
71 // available convolution is 3X3, which is too big really to simulate a
72 // good quality print/scan process. (2X2 would be better.)
73 // 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
74 // images generally biased to being too light, so most of the work is to make
75 // them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
76 // (using a greyscale erosion) one heavy (by being before convolution) and one
77 // light (after convolution).
78 // With no dilation, after covolution, the images are so light that a heavy
79 // constant offset is required to make the 0 image look reasonable. A simple
80 // constant offset multiple of exposure to undo this value is enough to achieve
81 // all the required lighting. This gives the advantage that exposure level 1
82 // with a single dilation gives a good impression of the broken-yet-too-dark
83 // problem that is often seen in scans.
84 // A small random rotation gives some varying greyscale values on the edges,
85 // and some random salt and pepper noise on top helps to realistically jaggy-up
86 // the edges.
87 // Finally a greyscale ramp provides a continuum of effects between exposure
88 // levels.
89 Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) {
90  Image pix = pixConvertTo8(input, false);
91  input.destroy();
92  input = pix;
93  int width = pixGetWidth(input);
94  int height = pixGetHeight(input);
95 
96  if (exposure >= 2) {
97  // An erosion simulates the spreading darkening of a dark copy.
98  // This is backwards to binary morphology,
99  // see http://www.leptonica.com/grayscale-morphology.html
100  pix = input;
101  input = pixErodeGray(pix, 3, 3);
102  pix.destroy();
103  }
104  // A convolution is essential to any mode as no scanner produces an
105  // image as sharp as the electronic image.
106  pix = pixBlockconv(input, 1, 1);
107  input.destroy();
108  // A small random rotation helps to make the edges jaggy in a realistic way.
109  if (rotation != nullptr) {
110  float radians_clockwise = 0.0f;
111  if (*rotation) {
112  radians_clockwise = *rotation;
113  } else if (randomizer != nullptr) {
114  radians_clockwise = randomizer->SignedRand(kRotationRange);
115  }
116 
117  input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
118  // Rotate the boxes to match.
119  *rotation = radians_clockwise;
120  pix.destroy();
121  } else {
122  input = pix;
123  }
124 
125  if (exposure >= 3 || exposure == 1) {
126  // Erosion after the convolution is not as heavy as before, so it is
127  // good for level 1 and in addition as a level 3.
128  // This is backwards to binary morphology,
129  // see http://www.leptonica.com/grayscale-morphology.html
130  pix = input;
131  input = pixErodeGray(pix, 3, 3);
132  pix.destroy();
133  }
134  // The convolution really needed to be 2x2 to be realistic enough, but
135  // we only have 3x3, so we have to bias the image darker or lose thin
136  // strokes.
137  int erosion_offset = 0;
138  // For light and 0 exposure, there is no dilation, so compensate for the
139  // convolution with a big darkening bias which is undone for lighter
140  // exposures.
141  if (exposure <= 0) {
142  erosion_offset = -3 * kExposureFactor;
143  }
144  // Add in a general offset of the greyscales for the exposure level so
145  // a threshold of 128 gives a reasonable binary result.
146  erosion_offset -= exposure * kExposureFactor;
147  // Add a gradual fade over the page and a small amount of salt and pepper
148  // noise to simulate noise in the sensor/paper fibres and varying
149  // illumination.
150  l_uint32 *data = pixGetData(input);
151  for (int y = 0; y < height; ++y) {
152  for (int x = 0; x < width; ++x) {
153  int pixel = GET_DATA_BYTE(data, x);
154  if (randomizer != nullptr) {
155  pixel += randomizer->IntRand() % (kSaltnPepper * 2 + 1) - kSaltnPepper;
156  }
157  if (height + width > kMinRampSize) {
158  pixel -= (2 * x + y) * 32 / (height + width);
159  }
160  pixel += erosion_offset;
161  if (pixel < 0) {
162  pixel = 0;
163  }
164  if (pixel > 255) {
165  pixel = 255;
166  }
167  SET_DATA_BYTE(data, x, pixel);
168  }
169  data += input->wpl;
170  }
171  return input;
172 }
173 
174 // Creates and returns a Pix distorted by various means according to the bool
175 // flags. If boxes is not nullptr, the boxes are resized/positioned according to
176 // any spatial distortion and also by the integer reduction factor box_scale
177 // so they will match what the network will output.
178 // Returns nullptr on error. The returned Pix must be pixDestroyed.
179 Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
180  bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
181  std::vector<TBOX> *boxes) {
182  Image distorted = pix.copy();
183  // Things to do to synthetic training data.
184  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
185  // TODO(rays) Cook noise in a more thread-safe manner than rand().
186  // Attempt to make the sequences reproducible.
187  srand(randomizer->IntRand());
188  Image pixn = pixAddGaussianNoise(distorted, 8.0);
189  distorted.destroy();
190  if (smooth_noise) {
191  distorted = pixBlockconv(pixn, 1, 1);
192  pixn.destroy();
193  } else {
194  distorted = pixn;
195  }
196  }
197  if (blur && randomizer->SignedRand(1.0) > 0.0) {
198  Image blurred = pixBlockconv(distorted, 1, 1);
199  distorted.destroy();
200  distorted = blurred;
201  }
202  if (perspective) {
203  GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
204  }
205  if (boxes != nullptr) {
206  for (auto &b : *boxes) {
207  b.scale(1.0f / box_reduction);
208  if (b.width() <= 0) {
209  b.set_right(b.left() + 1);
210  }
211  }
212  }
213  if (invert && randomizer->SignedRand(1.0) < -0) {
214  pixInvert(distorted, distorted);
215  }
216  return distorted;
217 }
218 
219 // Distorts anything that has a non-null pointer with the same pseudo-random
220 // perspective distortion. Width and height only need to be set if there
221 // is no pix. If there is a pix, then they will be taken from there.
222 void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
223  std::vector<TBOX> *boxes) {
224  if (pix != nullptr && *pix != nullptr) {
225  width = pixGetWidth(*pix);
226  height = pixGetHeight(*pix);
227  }
228  float *im_coeffs = nullptr;
229  float *box_coeffs = nullptr;
230  l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
231  if (pix != nullptr && *pix != nullptr) {
232  // Transform the image.
233  Image transformed = pixProjective(*pix, im_coeffs, incolor);
234  if (transformed == nullptr) {
235  tprintf("Projective transformation failed!!\n");
236  return;
237  }
238  pix->destroy();
239  *pix = transformed;
240  }
241  if (boxes != nullptr) {
242  // Transform the boxes.
243  for (auto &b : *boxes) {
244  int x1, y1, x2, y2;
245  const TBOX &box = b;
246  projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1);
247  projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2);
248  TBOX new_box1(x1, height - y2, x2, height - y1);
249  projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1);
250  projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2);
251  TBOX new_box2(x1, height - y1, x2, height - y2);
252  b = new_box1.bounding_union(new_box2);
253  }
254  }
255  lept_free(im_coeffs);
256  lept_free(box_coeffs);
257 }
258 
259 // Computes the coefficients of a randomized projective transformation.
260 // The image transform requires backward transformation coefficient, and the
261 // box transform the forward coefficients.
262 // Returns the incolor arg to pixProjective.
263 int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs,
264  float **box_coeffs) {
265  // Setup "from" points.
266  Pta *src_pts = ptaCreate(4);
267  ptaAddPt(src_pts, 0.0f, 0.0f);
268  ptaAddPt(src_pts, width, 0.0f);
269  ptaAddPt(src_pts, width, height);
270  ptaAddPt(src_pts, 0.0f, height);
271  // Extract factors from pseudo-random sequence.
272  float factors[FN_NUM_FACTORS];
273  float shear = 0.0f; // Shear is signed.
274  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
275  // Everything is squared to make wild values rarer.
276  if (i == FN_SHEAR) {
277  // Shear is signed.
278  shear = randomizer->SignedRand(0.5 / 3.0);
279  shear = shear >= 0.0 ? shear * shear : -shear * shear;
280  // Keep the sheared points within the original rectangle.
281  if (shear < -factors[FN_X0]) {
282  shear = -factors[FN_X0];
283  }
284  if (shear > factors[FN_X1]) {
285  shear = factors[FN_X1];
286  }
287  factors[i] = shear;
288  } else if (i != FN_INCOLOR) {
289  factors[i] = fabs(randomizer->SignedRand(1.0));
290  if (i <= FN_Y3) {
291  factors[i] *= 5.0 / 8.0;
292  } else {
293  factors[i] *= 0.5;
294  }
295  factors[i] *= factors[i];
296  }
297  }
298  // Setup "to" points.
299  Pta *dest_pts = ptaCreate(4);
300  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
301  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
302  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, (1 - factors[FN_Y2]) * height);
303  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, (1 - factors[FN_Y3]) * height);
304  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306  ptaDestroy(&src_pts);
307  ptaDestroy(&dest_pts);
308  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309 }
310 
311 } // namespace tesseract
const float kRotationRange
const int kExposureFactor
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int kSaltnPepper
Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, std::vector< TBOX > *boxes)
const int kMinRampSize
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, std::vector< TBOX > *boxes)
Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation)
Image copy() const
Definition: image.cpp:28
void destroy()
Definition: image.cpp:32
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:128
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
double SignedRand(double range)
Definition: helpers.h:76
int32_t IntRand()
Definition: helpers.h:72