118 beam_search.Decode(output, 3.5, -0.125, -25.0,
nullptr);
121 std::vector<int> labels, xcoords;
122 beam_search.ExtractBestPathAsLabels(&labels, &xcoords);
123 LOG(
INFO) <<
"Labels size = " << labels.size() <<
" coords " << xcoords.size() <<
"\n";
127 for (
unsigned start = 0; start < labels.size(); start = end) {
129 unsigned index = start;
130 int uni_id = INVALID_UNICHAR_ID;
132 code.Set(code.length(), labels[index++]);
136 EXPECT_NE(INVALID_UNICHAR_ID, uni_id) <<
"index=" << index <<
"/" << labels.size();
145 EXPECT_EQ(truth_utf8, decoded);
148 std::vector<int> unichar_ids;
149 std::vector<float> certainties, ratings;
150 beam_search.ExtractBestPathAsUnicharIds(
false, &
ccutil_.
unicharset, &unichar_ids, &certainties,
152 std::string u_decoded;
153 float total_rating = 0.0f;
154 for (
unsigned u = 0; u < unichar_ids.size(); ++u) {
158 if (u_decoded.size() < truth_utf8.size()) {
160 total_rating += ratings[u];
161 LOG(
INFO) << u <<
":u_id=" << unichar_ids[u] <<
"=" << str <<
", c="
162 << certainties[u] <<
", r=" << ratings[u] <<
"r_sum="
163 << total_rating <<
" @" << xcoords[u] <<
"\n";
170 EXPECT_EQ(truth_utf8, u_decoded);
173 TBOX line_box(0, 0, 100, 10);
174 for (
int i = 0; i < 2; ++i) {
175 beam_search.ExtractBestPathAsWords(line_box, 1.0f,
false, &
ccutil_.
unicharset, words);
176 std::string w_decoded;
177 for (
int w = 0; w < words->size(); ++w) {
178 const WERD_RES *word = (*words)[w];
179 if (w_decoded.size() < truth_utf8.size()) {
180 if (!w_decoded.empty() && word->word->space()) {
183 w_decoded += word->best_choice->unichar_string().c_str();
185 LOG(
INFO) <<
"Word:" << w <<
" = " << word->best_choice->unichar_string()
186 <<
", c=" << word->best_choice->certainty() <<
", r=" << word->best_choice->rating()
187 <<
", perm=" << word->best_choice->permuter() <<
"\n";
189 std::string w_trunc(w_decoded.data(), truth_utf8.size());
190 if (truth_utf8 != w_trunc) {
194 w_trunc.assign(w_decoded.data(), truth_utf8.size());
196 EXPECT_EQ(truth_utf8, w_trunc);
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
bool IsValidFirstCode(int code) const
int DecodeUnichar(const RecodedCharID &code) const
const char * id_to_unichar(UNICHAR_ID id) const