tesseract  5.0.0
polyaprx.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: polyaprx.cpp
3  * Description: Code for polygonal approximation from old edgeprog.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1993, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "polyaprx.h"
20 
21 #include "blobs.h" // for EDGEPT, TPOINT, VECTOR, TESSLINE
22 #include "coutln.h" // for C_OUTLINE
23 #include "errcode.h" // for ASSERT_HOST
24 #include "mod128.h" // for DIR128
25 #include "params.h" // for BoolParam, BOOL_VAR
26 #include "points.h" // for ICOORD
27 #include "rect.h" // for TBOX
28 #include "tprintf.h" // for tprintf
29 
30 #include <cstdint> // for INT16_MAX, int8_t
31 
32 namespace tesseract {
33 
34 #define FASTEDGELENGTH 256
35 
36 static BOOL_VAR(poly_debug, false, "Debug old poly");
37 static BOOL_VAR(poly_wide_objects_better, true,
38  "More accurate approx on wide things");
39 
40 #define fixed_dist 20 // really an int_variable
41 #define approx_dist 15 // really an int_variable
42 
43 const int par1 = 4500 / (approx_dist * approx_dist);
44 const int par2 = 6750 / (approx_dist * approx_dist);
45 
46 /**********************************************************************
47  *cutline(first,last,area) straightens out a line by partitioning
48  *and joining the ends by a straight line*
49  **********************************************************************/
50 
51 static void cutline( // recursive refine
52  EDGEPT *first, // ends of line
53  EDGEPT *last, int area // area of object
54 ) {
55  EDGEPT *edge; // current edge
56  TPOINT vecsum; // vector sum
57  int vlen; // approx length of vecsum
58  TPOINT vec; // accumulated vector
59  EDGEPT *maxpoint; // worst point
60  int maxperp; // max deviation
61  int perp; // perp distance
62  int ptcount; // no of points
63  int squaresum; // sum of perps
64 
65  edge = first; // start of line
66  if (edge->next == last) {
67  return; // simple line
68  }
69 
70  // vector sum
71  vecsum.x = last->pos.x - edge->pos.x;
72  vecsum.y = last->pos.y - edge->pos.y;
73  if (vecsum.x == 0 && vecsum.y == 0) {
74  // special case
75  vecsum.x = -edge->prev->vec.x;
76  vecsum.y = -edge->prev->vec.y;
77  }
78  // absolute value
79  vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
80  if (vecsum.y > vlen) {
81  vlen = vecsum.y; // maximum
82  } else if (-vecsum.y > vlen) {
83  vlen = -vecsum.y; // absolute value
84  }
85 
86  vec.x = edge->vec.x; // accumulated vector
87  vec.y = edge->vec.y;
88  maxperp = 0; // none yet
89  squaresum = ptcount = 0;
90  edge = edge->next; // move to actual point
91  maxpoint = edge; // in case there isn't one
92  do {
93  perp = vec.cross(vecsum); // get perp distance
94  if (perp != 0) {
95  perp *= perp; // squared deviation
96  }
97  squaresum += perp; // sum squares
98  ptcount++; // count points
99  if (poly_debug) {
100  tprintf("Cutline:Final perp=%d\n", perp);
101  }
102  if (perp > maxperp) {
103  maxperp = perp;
104  maxpoint = edge; // find greatest deviation
105  }
106  vec.x += edge->vec.x; // accumulate vectors
107  vec.y += edge->vec.y;
108  edge = edge->next;
109  } while (edge != last); // test all line
110 
111  perp = vecsum.length();
112  ASSERT_HOST(perp != 0);
113 
114  if (maxperp < 256 * INT16_MAX) {
115  maxperp <<= 8;
116  maxperp /= perp; // true max perp
117  } else {
118  maxperp /= perp;
119  maxperp <<= 8; // avoid overflow
120  }
121  if (squaresum < 256 * INT16_MAX) {
122  // mean squared perp
123  perp = (squaresum << 8) / (perp * ptcount);
124  } else {
125  // avoid overflow
126  perp = (squaresum / perp << 8) / ptcount;
127  }
128 
129  if (poly_debug) {
130  tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area,
131  maxperp / 256.0, maxperp * 200.0 / area, perp / 256.0,
132  perp * 300.0 / area);
133  }
134  if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
135  maxpoint->fixed = true;
136  // partitions
137  cutline(first, maxpoint, area);
138  cutline(maxpoint, last, area);
139  }
140 }
141 
142 /**********************************************************************
143  * edgesteps_to_edgepts
144  *
145  * Convert a C_OUTLINE to EDGEPTs.
146  **********************************************************************/
147 
148 static EDGEPT *edgesteps_to_edgepts( // convert outline
149  C_OUTLINE *c_outline, // input
150  EDGEPT edgepts[] // output is array
151 ) {
152  int32_t length; // steps in path
153  ICOORD pos; // current coords
154  int32_t stepindex; // current step
155  int32_t stepinc; // increment
156  int32_t epindex; // current EDGEPT
157  ICOORD vec; // for this 8 step
158  ICOORD prev_vec;
159  int8_t epdir; // of this step
160  DIR128 prevdir; // previous dir
161  DIR128 dir; // of this step
162 
163  pos = c_outline->start_pos(); // start of loop
164  length = c_outline->pathlength();
165  stepindex = 0;
166  epindex = 0;
167  prevdir = -1;
168  // repeated steps
169  uint32_t count = 0;
170  int prev_stepindex = 0;
171  do {
172  dir = c_outline->step_dir(stepindex);
173  vec = c_outline->step(stepindex);
174  if (stepindex < length - 1 &&
175  c_outline->step_dir(stepindex + 1) - dir == -32) {
176  dir += 128 - 16;
177  vec += c_outline->step(stepindex + 1);
178  stepinc = 2;
179  } else {
180  stepinc = 1;
181  }
182  if (count == 0) {
183  prevdir = dir;
184  prev_vec = vec;
185  }
186  if (prevdir.get_dir() != dir.get_dir()) {
187  edgepts[epindex].pos.x = pos.x();
188  edgepts[epindex].pos.y = pos.y();
189  prev_vec *= count;
190  edgepts[epindex].vec.x = prev_vec.x();
191  edgepts[epindex].vec.y = prev_vec.y();
192  pos += prev_vec;
193  edgepts[epindex].runlength = count;
194  edgepts[epindex].prev = &edgepts[epindex - 1];
195  // TODO: reset is_hidden, too?
196  edgepts[epindex].fixed = false;
197  edgepts[epindex].next = &edgepts[epindex + 1];
198  prevdir += 64;
199  epdir = DIR128(0) - prevdir;
200  epdir >>= 4;
201  epdir &= 7;
202  edgepts[epindex].dir = epdir;
203  edgepts[epindex].src_outline = c_outline;
204  edgepts[epindex].start_step = prev_stepindex;
205  edgepts[epindex].step_count = stepindex - prev_stepindex;
206  epindex++;
207  prevdir = dir;
208  prev_vec = vec;
209  count = 1;
210  prev_stepindex = stepindex;
211  } else {
212  count++;
213  }
214  stepindex += stepinc;
215  } while (stepindex < length);
216  edgepts[epindex].pos.x = pos.x();
217  edgepts[epindex].pos.y = pos.y();
218  prev_vec *= count;
219  edgepts[epindex].vec.x = prev_vec.x();
220  edgepts[epindex].vec.y = prev_vec.y();
221  pos += prev_vec;
222  edgepts[epindex].runlength = count;
223  // TODO: reset is_hidden, too?
224  edgepts[epindex].fixed = false;
225  edgepts[epindex].src_outline = c_outline;
226  edgepts[epindex].start_step = prev_stepindex;
227  edgepts[epindex].step_count = stepindex - prev_stepindex;
228  edgepts[epindex].prev = &edgepts[epindex - 1];
229  edgepts[epindex].next = &edgepts[0];
230  prevdir += 64;
231  epdir = DIR128(0) - prevdir;
232  epdir >>= 4;
233  epdir &= 7;
234  edgepts[epindex].dir = epdir;
235  edgepts[0].prev = &edgepts[epindex];
236  ASSERT_HOST(pos.x() == c_outline->start_pos().x() &&
237  pos.y() == c_outline->start_pos().y());
238  return &edgepts[0];
239 }
240 
241 /**********************************************************************
242  *fix2(start,area) fixes points on the outline according to a trial method*
243  **********************************************************************/
244 
245 static void fix2( // polygonal approx
246  EDGEPT *start, // loop to approximate
247  int area) {
248  EDGEPT *edgept; // current point
249  EDGEPT *edgept1;
250  EDGEPT *loopstart; // modified start of loop
251  EDGEPT *linestart; // start of line segment
252  int fixed_count; // no of fixed points
253  int8_t dir;
254  int d01, d12, d23, gapmin;
255  TPOINT d01vec, d12vec, d23vec;
256  EDGEPT *edgefix, *startfix;
257  EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
258 
259  edgept = start; // start of loop
260  while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
261  (dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
262  edgept = edgept->next; // find suitable start
263  }
264  loopstart = edgept; // remember start
265 
266  // completed flag
267  bool stopped = false;
268  edgept->fixed = true; // fix it
269  do {
270  linestart = edgept; // possible start of line
271  auto dir1 = edgept->dir; // first direction
272  // length of dir1
273  auto sum1 = edgept->runlength;
274  edgept = edgept->next;
275  auto dir2 = edgept->dir; // 2nd direction
276  // length in dir2
277  auto sum2 = edgept->runlength;
278  if (((dir1 - dir2 + 1) & 7) < 3) {
279  while (edgept->prev->dir == edgept->next->dir) {
280  edgept = edgept->next; // look at next
281  if (edgept->dir == dir1) {
282  // sum lengths
283  sum1 += edgept->runlength;
284  } else {
285  sum2 += edgept->runlength;
286  }
287  }
288 
289  if (edgept == loopstart) {
290  // finished
291  stopped = true;
292  }
293  if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
294  (linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
295  // start is back one
296  linestart = linestart->prev;
297  linestart->fixed = true;
298  }
299 
300  if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
301  (edgept->dir == dir1 && sum1 >= sum2) ||
302  ((edgept->prev->runlength < edgept->runlength ||
303  (edgept->dir == dir2 && sum2 >= sum1)) &&
304  linestart->next != edgept)) {
305  edgept = edgept->next;
306  }
307  }
308  // sharp bend
309  edgept->fixed = true;
310  }
311  // do whole loop
312  while (edgept != loopstart && !stopped);
313 
314  edgept = start;
315  do {
316  if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
317  (edgept->dir != 6)) ||
318  ((edgept->runlength >= 8) &&
319  ((edgept->dir == 2) || (edgept->dir == 6)))) {
320  edgept->fixed = true;
321  edgept1 = edgept->next;
322  edgept1->fixed = true;
323  }
324  edgept = edgept->next;
325  } while (edgept != start);
326 
327  edgept = start;
328  do {
329  // single fixed step
330  if (edgept->fixed &&
331  edgept->runlength == 1
332  // and neighbours free
333  && edgept->next->fixed &&
334  !edgept->prev->fixed
335  // same pair of dirs
336  && !edgept->next->next->fixed &&
337  edgept->prev->dir == edgept->next->dir &&
338  edgept->prev->prev->dir == edgept->next->next->dir &&
339  ((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
340  // unfix it
341  edgept->fixed = false;
342  edgept->next->fixed = false;
343  }
344  edgept = edgept->next; // do all points
345  } while (edgept != start); // until finished
346 
347  stopped = false;
348  if (area < 450) {
349  area = 450;
350  }
351 
352  gapmin = area * fixed_dist * fixed_dist / 44000;
353 
354  edgept = start;
355  fixed_count = 0;
356  do {
357  if (edgept->fixed) {
358  fixed_count++;
359  }
360  edgept = edgept->next;
361  } while (edgept != start);
362  while (!edgept->fixed) {
363  edgept = edgept->next;
364  }
365  edgefix0 = edgept;
366 
367  edgept = edgept->next;
368  while (!edgept->fixed) {
369  edgept = edgept->next;
370  }
371  edgefix1 = edgept;
372 
373  edgept = edgept->next;
374  while (!edgept->fixed) {
375  edgept = edgept->next;
376  }
377  edgefix2 = edgept;
378 
379  edgept = edgept->next;
380  while (!edgept->fixed) {
381  edgept = edgept->next;
382  }
383  edgefix3 = edgept;
384 
385  startfix = edgefix2;
386 
387  do {
388  if (fixed_count <= 3) {
389  break; // already too few
390  }
391  d12vec.diff(edgefix1->pos, edgefix2->pos);
392  d12 = d12vec.length();
393  // TODO(rays) investigate this change:
394  // Only unfix a point if it is part of a low-curvature section
395  // of outline and the total angle change of the outlines is
396  // less than 90 degrees, ie the scalar product is positive.
397  // if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
398  if (d12 <= gapmin) {
399  d01vec.diff(edgefix0->pos, edgefix1->pos);
400  d01 = d01vec.length();
401  d23vec.diff(edgefix2->pos, edgefix3->pos);
402  d23 = d23vec.length();
403  if (d01 > d23) {
404  edgefix2->fixed = false;
405  fixed_count--;
406  } else {
407  edgefix1->fixed = false;
408  fixed_count--;
409  edgefix1 = edgefix2;
410  }
411  } else {
412  edgefix0 = edgefix1;
413  edgefix1 = edgefix2;
414  }
415  edgefix2 = edgefix3;
416  edgept = edgept->next;
417  while (!edgept->fixed) {
418  if (edgept == startfix) {
419  stopped = true;
420  }
421  edgept = edgept->next;
422  }
423  edgefix3 = edgept;
424  edgefix = edgefix2;
425  } while ((edgefix != startfix) && (!stopped));
426 }
427 
428 /**********************************************************************
429  *poly2(startpt,area,path) applies a second approximation to the outline
430  *using the points which have been fixed by the first approximation*
431  **********************************************************************/
432 
433 static EDGEPT *poly2( // second poly
434  EDGEPT *startpt, // start of loop
435  int area // area of blob box
436 ) {
437  EDGEPT *edgept; // current outline point
438  EDGEPT *loopstart; // starting point
439  EDGEPT *linestart; // start of line
440  int edgesum; // correction count
441 
442  if (area < 1200) {
443  area = 1200; // minimum value
444  }
445 
446  loopstart = nullptr; // not found it yet
447  edgept = startpt; // start of loop
448 
449  do {
450  // current point fixed and next not
451  if (edgept->fixed && !edgept->next->fixed) {
452  loopstart = edgept; // start of repoly
453  break;
454  }
455  edgept = edgept->next; // next point
456  } while (edgept != startpt); // until found or finished
457 
458  if (loopstart == nullptr && !startpt->fixed) {
459  // fixed start of loop
460  startpt->fixed = true;
461  loopstart = startpt; // or start of loop
462  }
463  if (loopstart) {
464  do {
465  edgept = loopstart; // first to do
466  do {
467  linestart = edgept;
468  edgesum = 0; // sum of lengths
469  do {
470  // sum lengths
471  edgesum += edgept->runlength;
472  edgept = edgept->next; // move on
473  } while (!edgept->fixed && edgept != loopstart && edgesum < 126);
474  if (poly_debug) {
475  tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
476  linestart->pos.x, linestart->pos.y, linestart->dir,
477  linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
478  edgept->pos.y);
479  }
480  // reapproximate
481  cutline(linestart, edgept, area);
482 
483  while (edgept->next->fixed && edgept != loopstart) {
484  edgept = edgept->next; // look for next non-fixed
485  }
486  }
487  // do all the loop
488  while (edgept != loopstart);
489  edgesum = 0;
490  do {
491  if (edgept->fixed) {
492  edgesum++;
493  }
494  edgept = edgept->next;
495  }
496  // count fixed pts
497  while (edgept != loopstart);
498  if (edgesum < 3) {
499  area /= 2; // must have 3 pts
500  }
501  } while (edgesum < 3);
502  do {
503  linestart = edgept;
504  do {
505  edgept = edgept->next;
506  } while (!edgept->fixed);
507  linestart->next = edgept;
508  edgept->prev = linestart;
509  linestart->vec.x = edgept->pos.x - linestart->pos.x;
510  linestart->vec.y = edgept->pos.y - linestart->pos.y;
511  } while (edgept != loopstart);
512  } else {
513  edgept = startpt; // start of loop
514  }
515 
516  loopstart = edgept; // new start
517  return loopstart; // correct exit
518 }
519 
520 /**********************************************************************
521  * tesspoly_outline
522  *
523  * Approximate an outline from chain codes form using the old tess algorithm.
524  * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
525  * contain pointers to the input C_OUTLINEs that enable higher-resolution
526  * feature extraction that does not use the polygonal approximation.
527  **********************************************************************/
528 
529 TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
530  EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
531  EDGEPT *edgepts = stack_edgepts;
532 
533  // Use heap memory if the stack buffer is not big enough.
534  if (c_outline->pathlength() > FASTEDGELENGTH) {
535  edgepts = new EDGEPT[c_outline->pathlength()];
536  }
537 
538  // bounding box
539  const auto &loop_box = c_outline->bounding_box();
540  int32_t area = loop_box.height();
541  if (!poly_wide_objects_better && loop_box.width() > area) {
542  area = loop_box.width();
543  }
544  area *= area;
545  edgesteps_to_edgepts(c_outline, edgepts);
546  fix2(edgepts, area);
547  EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
548  EDGEPT *startpt = edgept;
549  EDGEPT *result = nullptr;
550  EDGEPT *prev_result = nullptr;
551  do {
552  auto *new_pt = new EDGEPT;
553  new_pt->pos = edgept->pos;
554  new_pt->prev = prev_result;
555  if (prev_result == nullptr) {
556  result = new_pt;
557  } else {
558  prev_result->next = new_pt;
559  new_pt->prev = prev_result;
560  }
561  if (allow_detailed_fx) {
562  new_pt->src_outline = edgept->src_outline;
563  new_pt->start_step = edgept->start_step;
564  new_pt->step_count = edgept->step_count;
565  }
566  prev_result = new_pt;
567  edgept = edgept->next;
568  } while (edgept != startpt);
569  prev_result->next = result;
570  result->prev = prev_result;
571  if (edgepts != stack_edgepts) {
572  delete[] edgepts;
573  }
574  return TESSLINE::BuildFromOutlineList(result);
575 }
576 
577 } // namespace tesseract
#define fixed_dist
Definition: polyaprx.cpp:40
#define FASTEDGELENGTH
Definition: polyaprx.cpp:34
#define approx_dist
Definition: polyaprx.cpp:41
#define ASSERT_HOST(x)
Definition: errcode.h:59
#define BOOL_VAR(name, val, comment)
Definition: params.h:359
@ TPOINT
LIST last(LIST var_list)
Definition: oldlist.cpp:153
const int par2
Definition: polyaprx.cpp:44
TESSLINE * ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline)
Definition: polyaprx.cpp:529
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const int par1
Definition: polyaprx.cpp:43
TDimension x
Definition: blobs.h:89
int cross(const TPOINT &other) const
Definition: blobs.h:75
int length() const
Definition: blobs.h:85
TDimension y
Definition: blobs.h:90
EDGEPT * next
Definition: blobs.h:200
EDGEPT * prev
Definition: blobs.h:201
TPOINT pos
Definition: blobs.h:194
VECTOR vec
Definition: blobs.h:195
C_OUTLINE * src_outline
Definition: blobs.h:202
static TESSLINE * BuildFromOutlineList(EDGEPT *outline)
Definition: blobs.cpp:91
int32_t pathlength() const
Definition: coutln.h:134
const TBOX & bounding_box() const
Definition: coutln.h:113
TDimension height() const
Definition: rect.h:118