tesseract  5.0.0
otsuthr.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: otsuthr.cpp
3  * Description: Simple Otsu thresholding for binarizing images.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 2008, Google Inc.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "otsuthr.h"
20 
21 #include <allheaders.h>
22 #include <cstring>
23 #include "helpers.h"
24 #if defined(USE_OPENCL)
25 # include "openclwrapper.h" // for OpenclDevice
26 #endif
27 
28 namespace tesseract {
29 
30 // Computes the Otsu threshold(s) for the given image rectangle, making one
31 // for each channel. Each channel is always one byte per pixel.
32 // Returns an array of threshold values and an array of hi_values, such
33 // that a pixel value >threshold[channel] is considered foreground if
34 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
35 // that there is no apparent foreground. At least one hi_value will not be -1.
36 // The return value is the number of channels in the input image, being
37 // the size of the output thresholds and hi_values arrays.
38 int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
39  std::vector<int> &hi_values) {
40  int num_channels = pixGetDepth(src_pix) / 8;
41  // Of all channels with no good hi_value, keep the best so we can always
42  // produce at least one answer.
43  int best_hi_value = 1;
44  int best_hi_index = 0;
45  bool any_good_hivalue = false;
46  double best_hi_dist = 0.0;
47  thresholds.resize(num_channels);
48  hi_values.resize(num_channels);
49 
50  // only use opencl if compiled w/ OpenCL and selected device is opencl
51 #ifdef USE_OPENCL
52  // all of channel 0 then all of channel 1...
53  std::vector<int> histogramAllChannels(kHistogramSize * num_channels);
54 
55  // Calculate Histogram on GPU
56  OpenclDevice od;
57  if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 &&
58  left == 0) {
59  od.HistogramRectOCL(pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width,
60  height, kHistogramSize, &histogramAllChannels[0]);
61 
62  // Calculate Threshold from Histogram on cpu
63  for (int ch = 0; ch < num_channels; ++ch) {
64  thresholds[ch] = -1;
65  hi_values[ch] = -1;
66  int *histogram = &histogramAllChannels[kHistogramSize * ch];
67  int H;
68  int best_omega_0;
69  int best_t = OtsuStats(histogram, &H, &best_omega_0);
70  if (best_omega_0 == 0 || best_omega_0 == H) {
71  // This channel is empty.
72  continue;
73  }
74  // To be a convincing foreground we must have a small fraction of H
75  // or to be a convincing background we must have a large fraction of H.
76  // In between we assume this channel contains no thresholding information.
77  int hi_value = best_omega_0 < H * 0.5;
78  thresholds[ch] = best_t;
79  if (best_omega_0 > H * 0.75) {
80  any_good_hivalue = true;
81  hi_values[ch] = 0;
82  } else if (best_omega_0 < H * 0.25) {
83  any_good_hivalue = true;
84  hi_values[ch] = 1;
85  } else {
86  // In case all channels are like this, keep the best of the bad lot.
87  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
88  if (hi_dist > best_hi_dist) {
89  best_hi_dist = hi_dist;
90  best_hi_value = hi_value;
91  best_hi_index = ch;
92  }
93  }
94  }
95  } else {
96 #endif
97  for (int ch = 0; ch < num_channels; ++ch) {
98  thresholds[ch] = -1;
99  hi_values[ch] = -1;
100  // Compute the histogram of the image rectangle.
101  int histogram[kHistogramSize];
102  HistogramRect(src_pix, ch, left, top, width, height, histogram);
103  int H;
104  int best_omega_0;
105  int best_t = OtsuStats(histogram, &H, &best_omega_0);
106  if (best_omega_0 == 0 || best_omega_0 == H) {
107  // This channel is empty.
108  continue;
109  }
110  // To be a convincing foreground we must have a small fraction of H
111  // or to be a convincing background we must have a large fraction of H.
112  // In between we assume this channel contains no thresholding information.
113  int hi_value = best_omega_0 < H * 0.5;
114  thresholds[ch] = best_t;
115  if (best_omega_0 > H * 0.75) {
116  any_good_hivalue = true;
117  hi_values[ch] = 0;
118  } else if (best_omega_0 < H * 0.25) {
119  any_good_hivalue = true;
120  hi_values[ch] = 1;
121  } else {
122  // In case all channels are like this, keep the best of the bad lot.
123  double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
124  if (hi_dist > best_hi_dist) {
125  best_hi_dist = hi_dist;
126  best_hi_value = hi_value;
127  best_hi_index = ch;
128  }
129  }
130  }
131 #ifdef USE_OPENCL
132  }
133 #endif // USE_OPENCL
134 
135  if (!any_good_hivalue) {
136  // Use the best of the ones that were not good enough.
137  hi_values[best_hi_index] = best_hi_value;
138  }
139  return num_channels;
140 }
141 
142 // Computes the histogram for the given image rectangle, and the given
143 // single channel. Each channel is always one byte per pixel.
144 // Histogram is always a kHistogramSize(256) element array to count
145 // occurrences of each pixel value.
146 void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
147  int *histogram) {
148  int num_channels = pixGetDepth(src_pix) / 8;
149  channel = ClipToRange(channel, 0, num_channels - 1);
150  int bottom = top + height;
151  memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
152  int src_wpl = pixGetWpl(src_pix);
153  l_uint32 *srcdata = pixGetData(src_pix);
154  for (int y = top; y < bottom; ++y) {
155  const l_uint32 *linedata = srcdata + y * src_wpl;
156  for (int x = 0; x < width; ++x) {
157  int pixel = GET_DATA_BYTE(linedata, (x + left) * num_channels + channel);
158  ++histogram[pixel];
159  }
160  }
161 }
162 
163 // Computes the Otsu threshold(s) for the given histogram.
164 // Also returns H = total count in histogram, and
165 // omega0 = count of histogram below threshold.
166 int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
167  int H = 0;
168  double mu_T = 0.0;
169  for (int i = 0; i < kHistogramSize; ++i) {
170  H += histogram[i];
171  mu_T += static_cast<double>(i) * histogram[i];
172  }
173 
174  // Now maximize sig_sq_B over t.
175  // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
176  int best_t = -1;
177  int omega_0, omega_1;
178  int best_omega_0 = 0;
179  double best_sig_sq_B = 0.0;
180  double mu_0, mu_1, mu_t;
181  omega_0 = 0;
182  mu_t = 0.0;
183  for (int t = 0; t < kHistogramSize - 1; ++t) {
184  omega_0 += histogram[t];
185  mu_t += t * static_cast<double>(histogram[t]);
186  if (omega_0 == 0) {
187  continue;
188  }
189  omega_1 = H - omega_0;
190  if (omega_1 == 0) {
191  break;
192  }
193  mu_0 = mu_t / omega_0;
194  mu_1 = (mu_T - mu_t) / omega_1;
195  double sig_sq_B = mu_1 - mu_0;
196  sig_sq_B *= sig_sq_B * omega_0 * omega_1;
197  if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
198  best_sig_sq_B = sig_sq_B;
199  best_t = t;
200  best_omega_0 = omega_0;
201  }
202  }
203  if (H_out != nullptr) {
204  *H_out = H;
205  }
206  if (omega0_out != nullptr) {
207  *omega0_out = best_omega_0;
208  }
209  return best_t;
210 }
211 
212 } // namespace tesseract.
const int kHistogramSize
Definition: otsuthr.h:30
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:110
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height, int *histogram)
Definition: otsuthr.cpp:146
int OtsuStats(const int *histogram, int *H_out, int *omega0_out)
Definition: otsuthr.cpp:166
int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector< int > &thresholds, std::vector< int > &hi_values)
Definition: otsuthr.cpp:38