tesseract  5.0.0
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

TESS_API DocumentData (const std::string &name)
 
TESS_API ~DocumentData ()
 
TESS_API bool LoadDocument (const char *filename, int start_page, int64_t max_memory, FileReader reader)
 
void SetDocument (const char *filename, int64_t max_memory, FileReader reader)
 
TESS_API bool SaveDocument (const char *filename, FileWriter writer)
 
TESS_API void AddPageToDocument (ImageData *page)
 
const std::string & document_name () const
 
int NumPages () const
 
size_t PagesSize () const
 
int64_t memory_used () const
 
void LoadPageInBackground (int index)
 
TESS_API const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
int64_t UnCache ()
 
void Shuffle ()
 

Detailed Description

Definition at line 169 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const std::string &  name)
explicit

Definition at line 381 of file imagedata.cpp.

382  : document_name_(name),
383  pages_offset_(-1),
384  total_pages_(-1),
385  memory_used_(0),
386  max_memory_(0),
387  reader_(nullptr) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 389 of file imagedata.cpp.

389  {
390  if (thread.joinable()) {
391  thread.join();
392  }
393  std::lock_guard<std::mutex> lock_p(pages_mutex_);
394  std::lock_guard<std::mutex> lock_g(general_mutex_);
395  for (auto data : pages_) {
396  delete data;
397  }
398 }

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 433 of file imagedata.cpp.

433  {
434  std::lock_guard<std::mutex> lock(pages_mutex_);
435  pages_.push_back(page);
436  set_memory_used(memory_used() + page->MemoryUsed());
437 }
int64_t memory_used() const
Definition: imagedata.h:201

◆ document_name()

const std::string& tesseract::DocumentData::document_name ( ) const
inline

Definition at line 190 of file imagedata.h.

190  {
191  std::lock_guard<std::mutex> lock(general_mutex_);
192  return document_name_;
193  }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 467 of file imagedata.cpp.

467  {
468  ImageData *page = nullptr;
469  while (!IsPageAvailable(index, &page)) {
470  // If there is no background load scheduled, schedule one now.
471  pages_mutex_.lock();
472  bool needs_loading = pages_offset_ != index;
473  pages_mutex_.unlock();
474  if (needs_loading) {
475  LoadPageInBackground(index);
476  }
477  // We can't directly load the page, or the background load will delete it
478  // while the caller is using it, so give it a chance to work.
479  std::this_thread::yield();
480  }
481  return page;
482 }
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:487
void LoadPageInBackground(int index)
Definition: imagedata.cpp:441

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 234 of file imagedata.h.

234  {
235  return NumPages() >= 0;
236  }
int NumPages() const
Definition: imagedata.h:194

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 487 of file imagedata.cpp.

487  {
488  std::lock_guard<std::mutex> lock(pages_mutex_);
489  int num_pages = NumPages();
490  if (num_pages == 0 || index < 0) {
491  *page = nullptr; // Empty Document.
492  return true;
493  }
494  if (num_pages > 0) {
495  index = Modulo(index, num_pages);
496  if (pages_offset_ <= index &&
497  static_cast<unsigned>(index) < pages_offset_ + pages_.size()) {
498  *page = pages_[index - pages_offset_]; // Page is available already.
499  return true;
500  }
501  }
502  return false;
503 }
int Modulo(int a, int b)
Definition: helpers.h:158

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 402 of file imagedata.cpp.

403  {
404  SetDocument(filename, max_memory, reader);
405  pages_offset_ = start_page;
406  return ReCachePages();
407 }
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
Definition: imagedata.cpp:410

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 441 of file imagedata.cpp.

441  {
442  ImageData *page = nullptr;
443  if (IsPageAvailable(index, &page)) {
444  return;
445  }
446  {
447  std::lock_guard<std::mutex> lock(pages_mutex_);
448  if (pages_offset_ == index) {
449  return;
450  }
451  pages_offset_ = index;
452  for (auto page : pages_) {
453  delete page;
454  }
455  pages_.clear();
456  }
457  if (thread.joinable()) {
458  thread.join();
459  }
460  // Don't run next statement asynchronously because that would
461  // create too many threads on Linux (see issue #3111).
462  ReCachePages();
463 }

◆ memory_used()

int64_t tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 201 of file imagedata.h.

201  {
202  std::lock_guard<std::mutex> lock(general_mutex_);
203  return memory_used_;
204  }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 194 of file imagedata.h.

194  {
195  std::lock_guard<std::mutex> lock(general_mutex_);
196  return total_pages_;
197  }

◆ PagesSize()

size_t tesseract::DocumentData::PagesSize ( ) const
inline

Definition at line 198 of file imagedata.h.

198  {
199  return pages_.size();
200  }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 421 of file imagedata.cpp.

421  {
422  std::lock_guard<std::mutex> lock(pages_mutex_);
423  TFile fp;
424  fp.OpenWrite(nullptr);
425  if (!fp.Serialize(pages_) || !fp.CloseWrite(filename, writer)) {
426  tprintf("Serialize failed: %s\n", filename);
427  return false;
428  }
429  return true;
430 }
void tprintf(const char *format,...)
Definition: tprintf.cpp:41

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 410 of file imagedata.cpp.

411  {
412  std::lock_guard<std::mutex> lock_p(pages_mutex_);
413  std::lock_guard<std::mutex> lock(general_mutex_);
414  document_name_ = filename;
415  pages_offset_ = -1;
416  max_memory_ = max_memory;
417  reader_ = reader;
418 }

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 523 of file imagedata.cpp.

523  {
524  TRand random;
525  // Different documents get shuffled differently, but the same for the same
526  // name.
527  random.set_seed(document_name_.c_str());
528  int num_pages = pages_.size();
529  // Execute one random swap for each page in the document.
530  for (int i = 0; i < num_pages; ++i) {
531  int src = random.IntRand() % num_pages;
532  int dest = random.IntRand() % num_pages;
533  std::swap(pages_[src], pages_[dest]);
534  }
535 }

◆ TakePage()

ImageData* tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 226 of file imagedata.h.

226  {
227  std::lock_guard<std::mutex> lock(pages_mutex_);
228  ImageData *page = pages_[index];
229  pages_[index] = nullptr;
230  return page;
231  }

◆ UnCache()

int64_t tesseract::DocumentData::UnCache ( )

Definition at line 507 of file imagedata.cpp.

507  {
508  std::lock_guard<std::mutex> lock(pages_mutex_);
509  int64_t memory_saved = memory_used();
510  for (auto page : pages_) {
511  delete page;
512  }
513  pages_.clear();
514  pages_offset_ = -1;
515  set_total_pages(-1);
516  set_memory_used(0);
517  tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
518  document_name_.c_str(), memory_saved);
519  return memory_saved;
520 }

The documentation for this class was generated from the following files: