tesseract  5.0.0
seam.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * File: seam.cpp (Formerly seam.c)
4  * Author: Mark Seaman, OCR Technology
5  *
6  * (c) Copyright 1987, Hewlett-Packard Company.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  *****************************************************************************/
18 /*----------------------------------------------------------------------
19  I n c l u d e s
20 ----------------------------------------------------------------------*/
21 #include "seam.h"
22 
23 #include "blobs.h"
24 #include "tprintf.h"
25 
26 namespace tesseract {
27 
28 /*----------------------------------------------------------------------
29  Public Function Code
30 ----------------------------------------------------------------------*/
31 
32 // Returns the bounding box of all the points in the seam.
34  TBOX box(location_.x, location_.y, location_.x, location_.y);
35  for (int s = 0; s < num_splits_; ++s) {
36  box += splits_[s].bounding_box();
37  }
38  return box;
39 }
40 
41 // Returns true if the splits in *this SEAM appear OK in the sense that they
42 // do not cross any outlines and do not chop off any ridiculously small
43 // pieces.
44 bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
45  // TODO(rays) Try testing all the splits. Duplicating original code for now,
46  // which tested only the first.
47  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
48 }
49 
50 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
51 // seam, which is about to be inserted at insert_index. Returns false if
52 // any of the computations fails, as this indicates an invalid chop.
53 // widthn_/widthp_ are only changed if modify is true.
54 bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
55  const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
56  for (int s = 0; s < insert_index; ++s) {
57  if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
58  return false;
59  }
60  }
61  if (!FindBlobWidth(blobs, insert_index, modify)) {
62  return false;
63  }
64  for (unsigned s = insert_index; s < seams.size(); ++s) {
65  if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
66  return false;
67  }
68  }
69  return true;
70 }
71 
72 // Computes the widthp_/widthn_ range. Returns false if not all the splits
73 // are accounted for. widthn_/widthp_ are only changed if modify is true.
74 bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modify) {
75  int num_found = 0;
76  if (modify) {
77  widthp_ = 0;
78  widthn_ = 0;
79  }
80  for (int s = 0; s < num_splits_; ++s) {
81  const SPLIT &split = splits_[s];
82  bool found_split = split.ContainedByBlob(*blobs[index]);
83  // Look right.
84  for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
85  found_split = split.ContainedByBlob(*blobs[b]);
86  if (found_split && b - index > widthp_ && modify) {
87  widthp_ = b - index;
88  }
89  }
90  // Look left.
91  for (int b = index - 1; !found_split && b >= 0; --b) {
92  found_split = split.ContainedByBlob(*blobs[b]);
93  if (found_split && index - b > widthn_ && modify) {
94  widthn_ = index - b;
95  }
96  }
97  if (found_split) {
98  ++num_found;
99  }
100  }
101  return num_found == num_splits_;
102 }
103 
104 // Splits this blob into two blobs by applying the splits included in
105 // *this SEAM
106 void SEAM::ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const {
107  for (int s = 0; s < num_splits_; ++s) {
108  splits_[s].SplitOutlineList(blob->outlines);
109  }
110  blob->ComputeBoundingBoxes();
111 
112  divide_blobs(blob, other_blob, italic_blob, location_);
113 
115  other_blob->EliminateDuplicateOutlines();
116 
117  blob->CorrectBlobOrder(other_blob);
118 }
119 
120 // Undoes ApplySeam by removing the seam between these two blobs.
121 // Produces one blob as a result, and deletes other_blob.
122 void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
123  if (blob->outlines == nullptr) {
124  blob->outlines = other_blob->outlines;
125  other_blob->outlines = nullptr;
126  }
127 
128  TESSLINE *outline = blob->outlines;
129  while (outline->next) {
130  outline = outline->next;
131  }
132  outline->next = other_blob->outlines;
133  other_blob->outlines = nullptr;
134  delete other_blob;
135 
136  for (int s = 0; s < num_splits_; ++s) {
137  splits_[s].UnsplitOutlineList(blob);
138  }
139  blob->ComputeBoundingBoxes();
141 }
142 
143 // Prints everything in *this SEAM.
144 void SEAM::Print(const char *label) const {
145  tprintf("%s", label);
146  tprintf(" %6.2f @ (%d,%d), p=%u, n=%u ", priority_, location_.x, location_.y, widthp_, widthn_);
147  for (int s = 0; s < num_splits_; ++s) {
148  splits_[s].Print();
149  if (s + 1 < num_splits_) {
150  tprintf(", ");
151  }
152  }
153  tprintf("\n");
154 }
155 
156 // Prints a collection of SEAMs.
157 /* static */
158 void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
159  if (!seams.empty()) {
160  tprintf("%s\n", label);
161  for (unsigned x = 0; x < seams.size(); ++x) {
162  tprintf("%2u: ", x);
163  seams[x]->Print("");
164  }
165  tprintf("\n");
166  }
167 }
168 
169 #ifndef GRAPHICS_DISABLED
170 // Draws the seam in the given window.
171 void SEAM::Mark(ScrollView *window) const {
172  for (int s = 0; s < num_splits_; ++s) {
173  splits_[s].Mark(window);
174  }
175 }
176 #endif
177 
178 // Break up the blobs in this chain so that they are all independent.
179 // This operation should undo the affect of join_pieces.
180 /* static */
181 void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
182  int first, int last) {
183  for (int x = first; x < last; ++x) {
184  seams[x]->Reveal();
185  }
186 
187  TESSLINE *outline = blobs[first]->outlines;
188  int next_blob = first + 1;
189 
190  while (outline != nullptr && next_blob <= last) {
191  if (outline->next == blobs[next_blob]->outlines) {
192  outline->next = nullptr;
193  outline = blobs[next_blob]->outlines;
194  ++next_blob;
195  } else {
196  outline = outline->next;
197  }
198  }
199 }
200 
201 // Join a group of base level pieces into a single blob that can then
202 // be classified.
203 /* static */
204 void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
205  int first, int last) {
206  TESSLINE *outline = blobs[first]->outlines;
207  if (!outline) {
208  return;
209  }
210 
211  for (int x = first; x < last; ++x) {
212  SEAM *seam = seams[x];
213  if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
214  seam->Hide();
215  }
216  while (outline->next) {
217  outline = outline->next;
218  }
219  outline->next = blobs[x + 1]->outlines;
220  }
221 }
222 
223 // Hides the seam so the outlines appear not to be cut by it.
224 void SEAM::Hide() const {
225  for (int s = 0; s < num_splits_; ++s) {
226  splits_[s].Hide();
227  }
228 }
229 
230 // Undoes hide, so the outlines are cut by the seam.
231 void SEAM::Reveal() const {
232  for (int s = 0; s < num_splits_; ++s) {
233  splits_[s].Reveal();
234  }
235 }
236 
237 // Computes and returns, but does not set, the full priority of *this SEAM.
238 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
239  double center_knob, double width_change_knob) const {
240  if (num_splits_ == 0) {
241  return 0.0f;
242  }
243  for (int s = 1; s < num_splits_; ++s) {
244  splits_[s].SplitOutline();
245  }
246  float full_priority =
247  priority_ + splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth, center_knob,
248  width_change_knob);
249  for (int s = num_splits_ - 1; s >= 1; --s) {
250  splits_[s].UnsplitOutlines();
251  }
252  return full_priority;
253 }
254 
262 void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array) {
263  seam_array->clear();
264  TPOINT location;
265 
266  for (unsigned b = 1; b < word->NumBlobs(); ++b) {
267  TBOX bbox = word->blobs[b - 1]->bounding_box();
268  TBOX nbox = word->blobs[b]->bounding_box();
269  location.x = (bbox.right() + nbox.left()) / 2;
270  location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
271  seam_array->push_back(new SEAM(0.0f, location));
272  }
273 }
274 
275 } // namespace tesseract
const std::vector< std::string > split(const std::string &s, char c)
Definition: helpers.h:41
LIST last(LIST var_list)
Definition: oldlist.cpp:153
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
Definition: blobs.cpp:970
void start_seam_list(TWERD *word, std::vector< SEAM * > *seam_array)
Definition: seam.cpp:262
TDimension x
Definition: blobs.h:89
TDimension y
Definition: blobs.h:90
TESSLINE * next
Definition: blobs.h:288
void ComputeBoundingBoxes()
Definition: blobs.cpp:445
void EliminateDuplicateOutlines()
Definition: blobs.cpp:480
void CorrectBlobOrder(TBLOB *next)
Definition: blobs.cpp:500
TESSLINE * outlines
Definition: blobs.h:404
std::vector< TBLOB * > blobs
Definition: blobs.h:462
unsigned NumBlobs() const
Definition: blobs.h:449
TDimension left() const
Definition: rect.h:82
TDimension top() const
Definition: rect.h:68
TDimension right() const
Definition: rect.h:89
TDimension bottom() const
Definition: rect.h:75
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:106
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:238
TBOX bounding_box() const
Definition: seam.cpp:33
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:122
static void JoinPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:204
static void PrintSeams(const char *label, const std::vector< SEAM * > &seams)
Definition: seam.cpp:158
bool FindBlobWidth(const std::vector< TBLOB * > &blobs, int index, bool modify)
Definition: seam.cpp:74
bool PrepareToInsertSeam(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:54
void Mark(ScrollView *window) const
Definition: seam.cpp:171
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:44
void Reveal() const
Definition: seam.cpp:231
void Hide() const
Definition: seam.cpp:224
static void BreakPieces(const std::vector< SEAM * > &seams, const std::vector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:181
void Print(const char *label) const
Definition: seam.cpp:144
TBOX bounding_box() const
Definition: split.h:38
void SplitOutline() const
Definition: split.cpp:254
void UnsplitOutlines() const
Definition: split.cpp:295
void Mark(ScrollView *window) const
Definition: split.cpp:224
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:279
void Hide() const
Definition: split.cpp:44
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:74
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:114
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:233
void Print() const
Definition: split.cpp:218
void Reveal() const
Definition: split.cpp:58