tesseract  5.0.0
tesseract::ImageThresholder Class Reference

#include <thresholder.h>

Public Member Functions

 ImageThresholder ()
 
virtual ~ImageThresholder ()
 
virtual void Clear ()
 Destroy the Pix if there is one, freeing memory. More...
 
bool IsEmpty () const
 Return true if no image has been set. More...
 
void SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void SetRectangle (int left, int top, int width, int height)
 
virtual void GetImageSizes (int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
 
bool IsColor () const
 Return true if the source image is color. More...
 
bool IsBinary () const
 Returns true if the source image is binary. More...
 
int GetScaleFactor () const
 
void SetSourceYResolution (int ppi)
 
int GetSourceYResolution () const
 
int GetScaledYResolution () const
 
void SetEstimatedResolution (int ppi)
 
int GetScaledEstimatedResolution () const
 
void SetImage (const Image pix)
 
virtual bool ThresholdToPix (Image *pix)
 Returns false on error. More...
 
virtual std::tuple< bool, Image, Image, ImageThreshold (TessBaseAPI *api, ThresholdMethod method)
 
virtual Image GetPixRectThresholds ()
 
Image GetPixRect ()
 
virtual Image GetPixRectGrey ()
 

Protected Member Functions

virtual void Init ()
 Common initialization shared between SetImage methods. More...
 
bool IsFullImage () const
 Return true if we are processing the full image. More...
 
void OtsuThresholdRectToPix (Image src_pix, Image *out_pix) const
 
void ThresholdRectToPix (Image src_pix, int num_channels, const std::vector< int > &thresholds, const std::vector< int > &hi_values, Image *pix) const
 

Protected Attributes

Image pix_
 
int image_width_
 Width of source pix_. More...
 
int image_height_
 Height of source pix_. More...
 
int pix_channels_
 Number of 8-bit channels in pix_. More...
 
int pix_wpl_
 Words per line of pix_. More...
 
int scale_
 Scale factor from original image. More...
 
int yres_
 y pixels/inch in source image. More...
 
int estimated_res_
 Resolution estimate from text size. More...
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Detailed Description

Base class for all tesseract image thresholding classes. Specific classes can add new thresholding methods by overriding ThresholdToPix. Each instance deals with a single image, but the design is intended to be useful for multiple calls to SetRectangle and ThresholdTo* if desired.

Definition at line 45 of file thresholder.h.

Constructor & Destructor Documentation

◆ ImageThresholder()

tesseract::ImageThresholder::ImageThresholder ( )

Definition at line 36 of file thresholder.cpp.

37  : pix_(nullptr)
38  , image_width_(0)
39  , image_height_(0)
40  , pix_channels_(0)
41  , pix_wpl_(0)
42  , scale_(1)
43  , yres_(300)
44  , estimated_res_(300) {
45  SetRectangle(0, 0, 0, 0);
46 }
int pix_wpl_
Words per line of pix_.
Definition: thresholder.h:188
int estimated_res_
Resolution estimate from text size.
Definition: thresholder.h:192
void SetRectangle(int left, int top, int width, int height)
int scale_
Scale factor from original image.
Definition: thresholder.h:190
int pix_channels_
Number of 8-bit channels in pix_.
Definition: thresholder.h:187
int yres_
y pixels/inch in source image.
Definition: thresholder.h:191
int image_width_
Width of source pix_.
Definition: thresholder.h:185
int image_height_
Height of source pix_.
Definition: thresholder.h:186

◆ ~ImageThresholder()

tesseract::ImageThresholder::~ImageThresholder ( )
virtual

Definition at line 48 of file thresholder.cpp.

48  {
49  Clear();
50 }
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:53

Member Function Documentation

◆ Clear()

void tesseract::ImageThresholder::Clear ( )
virtual

Destroy the Pix if there is one, freeing memory.

Definition at line 53 of file thresholder.cpp.

53  {
54  pix_.destroy();
55 }
void destroy()
Definition: image.cpp:32

◆ GetImageSizes()

void tesseract::ImageThresholder::GetImageSizes ( int *  left,
int *  top,
int *  width,
int *  height,
int *  imagewidth,
int *  imageheight 
)
virtual

Get enough parameters to be able to rebuild bounding boxes in the original image (not just within the rectangle). Left and top are enough with top-down coordinates, but the height of the rectangle and the image are needed for bottom-up.

Definition at line 142 of file thresholder.cpp.

143  {
144  *left = rect_left_;
145  *top = rect_top_;
146  *width = rect_width_;
147  *height = rect_height_;
148  *imagewidth = image_width_;
149  *imageheight = image_height_;
150 }

◆ GetPixRect()

Image tesseract::ImageThresholder::GetPixRect ( )

Get a clone/copy of the source image rectangle. The returned Pix must be pixDestroyed. This function will be used in the future by the page layout analysis, and the layout analysis that uses it will only be available with Leptonica, so there is no raw equivalent.

Definition at line 339 of file thresholder.cpp.

339  {
340  if (IsFullImage()) {
341  // Just clone the whole thing.
342  return pix_.clone();
343  } else {
344  // Crop to the given rectangle.
345  Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
346  Image cropped = pixClipRectangle(pix_, box, nullptr);
347  boxDestroy(&box);
348  return cropped;
349  }
350 }
bool IsFullImage() const
Return true if we are processing the full image.
Definition: thresholder.h:165
Image clone() const
Definition: image.cpp:24

◆ GetPixRectGrey()

Image tesseract::ImageThresholder::GetPixRectGrey ( )
virtual

Definition at line 356 of file thresholder.cpp.

356  {
357  auto pix = GetPixRect(); // May have to be reduced to grey.
358  int depth = pixGetDepth(pix);
359  if (depth != 8) {
360  if (depth == 24) {
361  auto tmp = pixConvert24To32(pix);
362  pix.destroy();
363  pix = tmp;
364  }
365  auto result = pixConvertTo8(pix, false);
366  pix.destroy();
367  return result;
368  }
369  return pix;
370 }

◆ GetPixRectThresholds()

Image tesseract::ImageThresholder::GetPixRectThresholds ( )
virtual

Definition at line 312 of file thresholder.cpp.

312  {
313  if (IsBinary()) {
314  return nullptr;
315  }
316  Image pix_grey = GetPixRectGrey();
317  int width = pixGetWidth(pix_grey);
318  int height = pixGetHeight(pix_grey);
319  std::vector<int> thresholds;
320  std::vector<int> hi_values;
321  OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
322  pix_grey.destroy();
323  Image pix_thresholds = pixCreate(width, height, 8);
324  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
325  pixSetAllArbitrary(pix_thresholds, threshold);
326  return pix_thresholds;
327 }
int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector< int > &thresholds, std::vector< int > &hi_values)
Definition: otsuthr.cpp:38
virtual Image GetPixRectGrey()
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:84

◆ GetScaledEstimatedResolution()

int tesseract::ImageThresholder::GetScaledEstimatedResolution ( ) const
inline

Definition at line 115 of file thresholder.h.

115  {
116  return scale_ * estimated_res_;
117  }

◆ GetScaledYResolution()

int tesseract::ImageThresholder::GetScaledYResolution ( ) const
inline

Definition at line 102 of file thresholder.h.

102  {
103  return scale_ * yres_;
104  }

◆ GetScaleFactor()

int tesseract::ImageThresholder::GetScaleFactor ( ) const
inline

Definition at line 88 of file thresholder.h.

88  {
89  return scale_;
90  }

◆ GetSourceYResolution()

int tesseract::ImageThresholder::GetSourceYResolution ( ) const
inline

Definition at line 99 of file thresholder.h.

99  {
100  return yres_;
101  }

◆ Init()

void tesseract::ImageThresholder::Init ( )
protectedvirtual

Common initialization shared between SetImage methods.

Definition at line 330 of file thresholder.cpp.

330  {
332 }

◆ IsBinary()

bool tesseract::ImageThresholder::IsBinary ( ) const
inline

Returns true if the source image is binary.

Definition at line 84 of file thresholder.h.

84  {
85  return pix_channels_ == 0;
86  }

◆ IsColor()

bool tesseract::ImageThresholder::IsColor ( ) const
inline

Return true if the source image is color.

Definition at line 79 of file thresholder.h.

79  {
80  return pix_channels_ >= 3;
81  }

◆ IsEmpty()

bool tesseract::ImageThresholder::IsEmpty ( ) const

Return true if no image has been set.

Definition at line 58 of file thresholder.cpp.

58  {
59  return pix_ == nullptr;
60 }

◆ IsFullImage()

bool tesseract::ImageThresholder::IsFullImage ( ) const
inlineprotected

Return true if we are processing the full image.

Definition at line 165 of file thresholder.h.

165  {
166  return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ &&
168  }

◆ OtsuThresholdRectToPix()

void tesseract::ImageThresholder::OtsuThresholdRectToPix ( Image  src_pix,
Image out_pix 
) const
protected

Definition at line 373 of file thresholder.cpp.

373  {
374  std::vector<int> thresholds;
375  std::vector<int> hi_values;
376 
377  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
378  thresholds, hi_values);
379  // only use opencl if compiled w/ OpenCL and selected device is opencl
380 #ifdef USE_OPENCL
381  OpenclDevice od;
382  if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
383  od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels,
384  pixGetWpl(src_pix) * 4, &thresholds[0], &hi_values[0], out_pix /*pix_OCL*/,
386  } else {
387 #endif
388  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
389 #ifdef USE_OPENCL
390  }
391 #endif
392 }
void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector< int > &thresholds, const std::vector< int > &hi_values, Image *pix) const

◆ SetEstimatedResolution()

void tesseract::ImageThresholder::SetEstimatedResolution ( int  ppi)
inline

Definition at line 110 of file thresholder.h.

110  {
111  estimated_res_ = ppi;
112  }

◆ SetImage() [1/2]

void tesseract::ImageThresholder::SetImage ( const Image  pix)

Pix vs raw, which to use? Pix is the preferred input for efficiency, since raw buffers are copied. SetImage for Pix clones its input, so the source pix may be pixDestroyed immediately after, but may not go away until after the Thresholder has finished with it.

Definition at line 157 of file thresholder.cpp.

157  {
158  if (pix_ != nullptr) {
159  pix_.destroy();
160  }
161  Image src = pix;
162  int depth;
163  pixGetDimensions(src, &image_width_, &image_height_, &depth);
164  // Convert the image as necessary so it is one of binary, plain RGB, or
165  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
166  // not just a clone of the input.
167  if (pixGetColormap(src)) {
168  Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
169  depth = pixGetDepth(tmp);
170  if (depth > 1 && depth < 8) {
171  pix_ = pixConvertTo8(tmp, false);
172  tmp.destroy();
173  } else {
174  pix_ = tmp;
175  }
176  } else if (depth > 1 && depth < 8) {
177  pix_ = pixConvertTo8(src, false);
178  } else {
179  pix_ = src.copy();
180  }
181  depth = pixGetDepth(pix_);
182  pix_channels_ = depth / 8;
183  pix_wpl_ = pixGetWpl(pix_);
184  scale_ = 1;
185  estimated_res_ = yres_ = pixGetYRes(pix_);
186  Init();
187 }
virtual void Init()
Common initialization shared between SetImage methods.
Image copy() const
Definition: image.cpp:28

◆ SetImage() [2/2]

void tesseract::ImageThresholder::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

SetImage makes a copy of all the image data, so it may be deleted immediately after this call. Greyscale of 8 and color of 24 or 32 bits per pixel may be given. Palette color images will not work properly and must be converted to 24 bit. Binary images of 1 bit per pixel may also be given but they must be byte packed with the MSB of the first byte being the first pixel, and a one pixel is WHITE. For binary images set bytes_per_pixel=0.

Definition at line 70 of file thresholder.cpp.

71  {
72  int bpp = bytes_per_pixel * 8;
73  if (bpp == 0) {
74  bpp = 1;
75  }
76  Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
77  l_uint32 *data = pixGetData(pix);
78  int wpl = pixGetWpl(pix);
79  switch (bpp) {
80  case 1:
81  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
82  for (int x = 0; x < width; ++x) {
83  if (imagedata[x / 8] & (0x80 >> (x % 8))) {
84  CLEAR_DATA_BIT(data, x);
85  } else {
86  SET_DATA_BIT(data, x);
87  }
88  }
89  }
90  break;
91 
92  case 8:
93  // Greyscale just copies the bytes in the right order.
94  for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
95  for (int x = 0; x < width; ++x) {
96  SET_DATA_BYTE(data, x, imagedata[x]);
97  }
98  }
99  break;
100 
101  case 24:
102  // Put the colors in the correct places in the line buffer.
103  for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
104  for (int x = 0; x < width; ++x, ++data) {
105  SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
106  SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
107  SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
108  }
109  }
110  break;
111 
112  case 32:
113  // Maintain byte order consistency across different endianness.
114  for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
115  for (int x = 0; x < width; ++x) {
116  data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
117  (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
118  }
119  }
120  break;
121 
122  default:
123  tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
124  }
125  SetImage(pix);
126  pix.destroy();
127 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:70

◆ SetRectangle()

void tesseract::ImageThresholder::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Store the coordinates of the rectangle to process for later use. Doesn't actually do any thresholding.

Definition at line 131 of file thresholder.cpp.

131  {
132  rect_left_ = left;
133  rect_top_ = top;
134  rect_width_ = width;
135  rect_height_ = height;
136 }

◆ SetSourceYResolution()

void tesseract::ImageThresholder::SetSourceYResolution ( int  ppi)
inline

Definition at line 95 of file thresholder.h.

95  {
96  yres_ = ppi;
97  estimated_res_ = ppi;
98  }

◆ Threshold()

std::tuple< bool, Image, Image, Image > tesseract::ImageThresholder::Threshold ( TessBaseAPI api,
ThresholdMethod  method 
)
virtual

Definition at line 189 of file thresholder.cpp.

191  {
192  Image pix_binary = nullptr;
193  Image pix_thresholds = nullptr;
194 
195  if (pix_channels_ == 0) {
196  // We have a binary image, but it still has to be copied, as this API
197  // allows the caller to modify the output.
198  Image original = GetPixRect();
199  pix_binary = original.copy();
200  original.destroy();
201  return std::make_tuple(true, nullptr, pix_binary, nullptr);
202  }
203 
204  auto pix_grey = GetPixRectGrey();
205 
206  int r;
207 
208  l_int32 pix_w, pix_h;
209  pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
210 
211  bool thresholding_debug;
212  api->GetBoolVariable("thresholding_debug", &thresholding_debug);
213  if (thresholding_debug) {
214  tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_);
215  }
216 
217  if (method == ThresholdMethod::Sauvola) {
218  int window_size;
219  double window_size_factor;
220  api->GetDoubleVariable("thresholding_window_size", &window_size_factor);
221  window_size = window_size_factor * yres_;
222  window_size = std::max(7, window_size);
223  window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size);
224  int half_window_size = window_size / 2;
225 
226  // factor for image division into tiles; >= 1
227  l_int32 nx, ny;
228  // tiles size will be approx. 250 x 250 pixels
229  nx = std::max(1, (pix_w + 125) / 250);
230  ny = std::max(1, (pix_h + 125) / 250);
231  auto xrat = pix_w / nx;
232  auto yrat = pix_h / ny;
233  if (xrat < half_window_size + 2) {
234  nx = pix_w / (half_window_size + 2);
235  }
236  if (yrat < half_window_size + 2) {
237  ny = pix_h / (half_window_size + 2);
238  }
239 
240  double kfactor;
241  api->GetDoubleVariable("thresholding_kfactor", &kfactor);
242  kfactor = std::max(0.0, kfactor);
243 
244  if (thresholding_debug) {
245  tprintf("window size: %d kfactor: %.3f nx:%d ny: %d\n", window_size, kfactor, nx, ny);
246  }
247 
248  r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
249  (PIX**)pix_thresholds,
250  (PIX**)pix_binary);
251  } else { // if (method == ThresholdMethod::LeptonicaOtsu)
252  int tile_size;
253  double tile_size_factor;
254  api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor);
255  tile_size = tile_size_factor * yres_;
256  tile_size = std::max(16, tile_size);
257 
258  int smooth_size;
259  double smooth_size_factor;
260  api->GetDoubleVariable("thresholding_smooth_kernel_size",
261  &smooth_size_factor);
262  smooth_size_factor = std::max(0.0, smooth_size_factor);
263  smooth_size = smooth_size_factor * yres_;
264  int half_smooth_size = smooth_size / 2;
265 
266  double score_fraction;
267  api->GetDoubleVariable("thresholding_score_fraction", &score_fraction);
268 
269  if (thresholding_debug) {
270  tprintf("tile size: %d smooth_size: %d score_fraction: %.2f\n", tile_size, smooth_size, score_fraction);
271  }
272 
273  r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
274  half_smooth_size, half_smooth_size,
275  score_fraction,
276  (PIX**)pix_thresholds,
277  (PIX**)pix_binary);
278  }
279 
280  bool ok = (r == 0);
281  return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
282 }

◆ ThresholdRectToPix()

void tesseract::ImageThresholder::ThresholdRectToPix ( Image  src_pix,
int  num_channels,
const std::vector< int > &  thresholds,
const std::vector< int > &  hi_values,
Image pix 
) const
protected

Threshold the rectangle, taking everything except the src_pix from the class, using thresholds/hi_values to the output pix. NOTE that num_channels is the size of the thresholds and hi_values

Definition at line 398 of file thresholder.cpp.

399  {
400  *pix = pixCreate(rect_width_, rect_height_, 1);
401  uint32_t *pixdata = pixGetData(*pix);
402  int wpl = pixGetWpl(*pix);
403  int src_wpl = pixGetWpl(src_pix);
404  uint32_t *srcdata = pixGetData(src_pix);
405  pixSetXRes(*pix, pixGetXRes(src_pix));
406  pixSetYRes(*pix, pixGetYRes(src_pix));
407  for (int y = 0; y < rect_height_; ++y) {
408  const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
409  uint32_t *pixline = pixdata + y * wpl;
410  for (int x = 0; x < rect_width_; ++x) {
411  bool white_result = true;
412  for (int ch = 0; ch < num_channels; ++ch) {
413  int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
414  if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
415  white_result = false;
416  break;
417  }
418  }
419  if (white_result) {
420  CLEAR_DATA_BIT(pixline, x);
421  } else {
422  SET_DATA_BIT(pixline, x);
423  }
424  }
425  }
426 }

◆ ThresholdToPix()

bool tesseract::ImageThresholder::ThresholdToPix ( Image pix)
virtual

Returns false on error.

Threshold the source image as efficiently as possible to the output Pix. Creates a Pix and sets pix to point to the resulting pointer. Caller must use pixDestroy to free the created Pix. Returns false on error.

Definition at line 288 of file thresholder.cpp.

288  {
289  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
290  tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
291  return false;
292  }
293  if (pix_channels_ == 0) {
294  // We have a binary image, but it still has to be copied, as this API
295  // allows the caller to modify the output.
296  Image original = GetPixRect();
297  *pix = original.copy();
298  original.destroy();
299  } else {
301  }
302  return true;
303 }
void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const

Member Data Documentation

◆ estimated_res_

int tesseract::ImageThresholder::estimated_res_
protected

Resolution estimate from text size.

Definition at line 192 of file thresholder.h.

◆ image_height_

int tesseract::ImageThresholder::image_height_
protected

Height of source pix_.

Definition at line 186 of file thresholder.h.

◆ image_width_

int tesseract::ImageThresholder::image_width_
protected

Width of source pix_.

Definition at line 185 of file thresholder.h.

◆ pix_

Image tesseract::ImageThresholder::pix_
protected

Clone or other copy of the source Pix. The pix will always be PixDestroy()ed on destruction of the class.

Definition at line 183 of file thresholder.h.

◆ pix_channels_

int tesseract::ImageThresholder::pix_channels_
protected

Number of 8-bit channels in pix_.

Definition at line 187 of file thresholder.h.

◆ pix_wpl_

int tesseract::ImageThresholder::pix_wpl_
protected

Words per line of pix_.

Definition at line 188 of file thresholder.h.

◆ rect_height_

int tesseract::ImageThresholder::rect_height_
protected

Definition at line 196 of file thresholder.h.

◆ rect_left_

int tesseract::ImageThresholder::rect_left_
protected

Definition at line 193 of file thresholder.h.

◆ rect_top_

int tesseract::ImageThresholder::rect_top_
protected

Definition at line 194 of file thresholder.h.

◆ rect_width_

int tesseract::ImageThresholder::rect_width_
protected

Definition at line 195 of file thresholder.h.

◆ scale_

int tesseract::ImageThresholder::scale_
protected

Scale factor from original image.

Definition at line 190 of file thresholder.h.

◆ yres_

int tesseract::ImageThresholder::yres_
protected

y pixels/inch in source image.

Definition at line 191 of file thresholder.h.


The documentation for this class was generated from the following files: