tesseract  5.0.0
devanagari_processing.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15 
16 #include <allheaders.h>
17 #include "ocrblock.h"
18 #include "params.h"
19 
20 struct Pix;
21 struct Box;
22 struct Boxa;
23 
24 namespace tesseract {
25 
27 
29 
30 class TBOX;
31 class DebugPixa;
32 
34 public:
36  hist_ = nullptr;
37  length_ = 0;
38  }
39 
41  Clear();
42  }
43 
44  void Clear() {
45  delete[] hist_;
46  length_ = 0;
47  }
48 
49  int *hist() const {
50  return hist_;
51  }
52 
53  int length() const {
54  return length_;
55  }
56 
57  // Methods to construct histograms from images. These clear any existing data.
60 
61  // This method returns the global-maxima for the histogram. The frequency of
62  // the global maxima is returned in count, if specified.
63  int GetHistogramMaximum(int *count) const;
64 
65 private:
66  int *hist_;
67  int length_;
68 };
69 
71 public:
73  NO_SPLIT = 0, // No splitting is performed for the phase.
74  MINIMAL_SPLIT, // Blobs are split minimally.
75  MAXIMAL_SPLIT // Blobs are split maximally.
76  };
77 
79  virtual ~ShiroRekhaSplitter();
80 
81  // Top-level method to perform splitting based on current settings.
82  // Returns true if a split was actually performed.
83  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
84  // splitting. If false, the ocr_split_strategy_ is used.
85  bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);
86 
87  // Clears the memory held by this object.
88  void Clear();
89 
90  // Refreshes the words in the segmentation block list by using blobs in the
91  // input blob list.
92  // The segmentation block list must be set.
93  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);
94 
95  // Returns true if the split strategies for pageseg and ocr are different.
97  return pageseg_split_strategy_ != ocr_split_strategy_;
98  }
99 
100  // This only keeps a copy of the block list pointer. At split call, the list
101  // object should still be alive. This block list is used as a golden
102  // segmentation when performing splitting.
103  void set_segmentation_block_list(BLOCK_LIST *block_list) {
104  segmentation_block_list_ = block_list;
105  }
106 
107  static const int kUnspecifiedXheight = -1;
108 
109  void set_global_xheight(int xheight) {
110  global_xheight_ = xheight;
111  }
112 
113  void set_perform_close(bool perform) {
114  perform_close_ = perform;
115  }
116 
117  // Returns the image obtained from shiro-rekha splitting. The returned object
118  // is owned by this class. Callers may want to clone the returned pix to keep
119  // it alive beyond the life of ShiroRekhaSplitter object.
121  return splitted_image_;
122  }
123 
124  // On setting the input image, a clone of it is owned by this class.
125  void set_orig_pix(Image pix);
126 
127  // Returns the input image provided to the object. This object is owned by
128  // this class. Callers may want to clone the returned pix to work with it.
130  return orig_pix_;
131  }
132 
134  return ocr_split_strategy_;
135  }
136 
138  ocr_split_strategy_ = strategy;
139  }
140 
142  return pageseg_split_strategy_;
143  }
144 
146  pageseg_split_strategy_ = strategy;
147  }
148 
149  BLOCK_LIST *segmentation_block_list() {
150  return segmentation_block_list_;
151  }
152 
153  // This method returns the computed mode-height of blobs in the pix.
154  // It also prunes very small blobs from calculation. Could be used to provide
155  // a global xheight estimate for images which have the same point-size text.
156  static int GetModeHeight(Image pix);
157 
158 private:
159  // Method to perform a close operation on the input image. The xheight
160  // estimate decides the size of sel used.
161  static void PerformClose(Image pix, int xheight_estimate);
162 
163  // This method resolves the cc bbox to a particular row and returns the row's
164  // xheight. This uses block_list_ if available, else just returns the
165  // global_xheight_ estimate currently set in the object.
166  int GetXheightForCC(Box *cc_bbox);
167 
168  // Returns a list of regions (boxes) which should be cleared in the original
169  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
170  // (or less) word only. Xheight measure could be the global estimate, the row
171  // estimate, or unspecified. If unspecified, over splitting may occur, since a
172  // conservative estimate of stroke width along with an associated multiplier
173  // is used in its place. It is advisable to have a specified xheight when
174  // splitting for classification/training.
175  void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
176  int word_top, Boxa *regions_to_clear);
177 
178  // Returns a new box object for the corresponding TBOX, based on the original
179  // image's coordinate system.
180  Box *GetBoxForTBOX(const TBOX &tbox) const;
181 
182  // This method returns y-extents of the shiro-rekha computed from the input
183  // word image.
184  static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
185  int *shirorekha_ylevel);
186 
187  Image orig_pix_; // Just a clone of the input image passed.
188  Image splitted_image_; // Image produced after the last splitting round. The
189  // object is owned by this class.
190  SplitStrategy pageseg_split_strategy_;
191  SplitStrategy ocr_split_strategy_;
192  Image debug_image_;
193  // This block list is used as a golden segmentation when performing splitting.
194  BLOCK_LIST *segmentation_block_list_;
195  int global_xheight_;
196  bool perform_close_; // Whether a morphological close operation should be
197  // performed before CCs are run through splitting.
198 };
199 
200 } // namespace tesseract.
201 
202 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
bool devanagari_split_debugimage
BOOL_VAR_H(wordrec_display_splits)
INT_VAR_H(editor_image_xpos)
int GetHistogramMaximum(int *count) const
void ConstructHorizontalCountHist(Image pix)
void set_pageseg_split_strategy(SplitStrategy strategy)
void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs)
bool Split(bool split_for_pageseg, DebugPixa *pixa_debug)
void set_segmentation_block_list(BLOCK_LIST *block_list)
SplitStrategy pageseg_split_strategy() const
SplitStrategy ocr_split_strategy() const
void set_ocr_split_strategy(SplitStrategy strategy)