tesseract  5.0.0
serialis.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: serialis.h (Formerly serialmac.h)
3  * Description: Inline routines and macros for serialisation functions
4  * Author: Phil Cheatle
5  *
6  * (C) Copyright 1990, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef SERIALIS_H
20 #define SERIALIS_H
21 
22 #include <tesseract/baseapi.h> // FileReader
23 #include <cstdint> // uint8_t
24 #include <cstdio>
25 #include <cstdlib>
26 #include <cstring>
27 #include <type_traits>
28 #include <vector> // std::vector
29 
30 namespace tesseract {
31 
32 /***********************************************************************
33  QUOTE_IT MACRO DEFINITION
34  ===========================
35 Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
36 ***********************************************************************/
37 
38 #define QUOTE_IT(parm) #parm
39 
40 // Return number of elements of an array.
41 template <typename T, size_t N>
42 constexpr size_t countof(T const (&)[N]) noexcept {
43  return N;
44 }
45 
46 // Function to write a std::vector<char> to a whole file.
47 // Returns false on failure.
48 using FileWriter = bool (*)(const std::vector<char> &data, const char *filename);
49 
51 bool LoadDataFromFile(const char *filename, std::vector<char> *data);
53 bool SaveDataToFile(const std::vector<char> &data, const char *filename);
54 
55 // Deserialize data from file.
56 template <typename T>
57 bool DeSerialize(FILE *fp, T *data, size_t n = 1) {
58  return fread(data, sizeof(T), n, fp) == n;
59 }
60 
61 // Serialize data to file.
62 template <typename T>
63 bool Serialize(FILE *fp, const T *data, size_t n = 1) {
64  return fwrite(data, sizeof(T), n, fp) == n;
65 }
66 
67 // Simple file class.
68 // Allows for portable file input from memory and from foreign file systems.
69 class TESS_API TFile {
70 public:
71  TFile();
72  ~TFile();
73 
74  // All the Open methods load the whole file into memory for reading.
75  // Opens a file with a supplied reader, or nullptr to use the default.
76  // Note that mixed read/write is not supported.
77  bool Open(const char *filename, FileReader reader);
78  // From an existing memory buffer.
79  bool Open(const char *data, size_t size);
80  // From an open file and an end offset.
81  bool Open(FILE *fp, int64_t end_offset);
82  // Sets the value of the swap flag, so that FReadEndian does the right thing.
83  void set_swap(bool value) {
84  swap_ = value;
85  }
86 
87  // Deserialize data.
88  bool DeSerializeSize(int32_t *data);
89  bool DeSerializeSkip(size_t size = 1);
90  bool DeSerialize(std::string &data);
91  bool DeSerialize(std::vector<char> &data);
92  //bool DeSerialize(std::vector<std::string> &data);
93  template <typename T>
94  bool DeSerialize(T *data, size_t count = 1) {
95  return FReadEndian(data, sizeof(T), count) == count;
96  }
97  template <typename T>
98  bool DeSerialize(std::vector<T> &data) {
99  uint32_t size;
100  if (!DeSerialize(&size)) {
101  return false;
102  } else if (size == 0) {
103  data.clear();
104  } else if (size > 50000000) {
105  // Arbitrarily limit the number of elements to protect against bad data.
106  return false;
107  } else if constexpr (std::is_same<T, std::string>::value) {
108  // Deserialize a string.
109  // TODO: optimize.
110  data.resize(size);
111  for (auto &item : data) {
112  if (!DeSerialize(item)) {
113  return false;
114  }
115  }
116  } else if constexpr (std::is_class<T>::value) {
117  // Deserialize a tesseract class.
118  // TODO: optimize.
119  data.resize(size);
120  for (auto &item : data) {
121  if (!item.DeSerialize(this)) {
122  return false;
123  }
124  }
125  } else if constexpr (std::is_pointer<T>::value) {
126  // Deserialize pointers.
127  // TODO: optimize.
128  data.resize(size);
129  for (uint32_t i = 0; i < size; i++) {
130  uint8_t non_null;
131  if (!DeSerialize(&non_null)) {
132  return false;
133  }
134  if (non_null) {
135  typedef typename std::remove_pointer<T>::type ST;
136  auto item = new ST;
137  if (!item->DeSerialize(this)) {
138  delete item;
139  return false;
140  }
141  data[i] = item;
142  }
143  }
144  } else {
145  // Deserialize a non-class.
146  // TODO: optimize.
147  data.resize(size);
148  return DeSerialize(&data[0], size);
149  }
150  return true;
151  }
152 
153  // Serialize data.
154  bool Serialize(const std::string &data);
155  bool Serialize(const std::vector<char> &data);
156  template <typename T>
157  bool Serialize(const T *data, size_t count = 1) {
158  return FWrite(data, sizeof(T), count) == count;
159  }
160  template <typename T>
161  bool Serialize(const std::vector<T> &data) {
162  // Serialize number of elements first.
163  uint32_t size = data.size();
164  if (!Serialize(&size)) {
165  return false;
166  } else if constexpr (std::is_same<T, std::string>::value) {
167  // Serialize strings.
168  for (auto string : data) {
169  if (!Serialize(string)) {
170  return false;
171  }
172  }
173  } else if constexpr (std::is_class<T>::value) {
174  // Serialize a tesseract class.
175  for (auto &item : data) {
176  if (!item.Serialize(this)) {
177  return false;
178  }
179  }
180  } else if constexpr (std::is_pointer<T>::value) {
181  // Serialize pointers.
182  for (auto &item : data) {
183  uint8_t non_null = (item != nullptr);
184  if (!Serialize(&non_null)) {
185  return false;
186  }
187  if (non_null) {
188  if (!item->Serialize(this)) {
189  return false;
190  }
191  }
192  }
193  } else if (size > 0) {
194  // Serialize a non-class.
195  return Serialize(&data[0], size);
196  }
197  return true;
198  }
199 
200  // Skip data.
201  bool Skip(size_t count);
202 
203  // Reads a line like fgets. Returns nullptr on EOF, otherwise buffer.
204  // Reads at most buffer_size bytes, including '\0' terminator, even if
205  // the line is longer. Does nothing if buffer_size <= 0.
206  char *FGets(char *buffer, int buffer_size);
207  // Replicates fread, followed by a swap of the bytes if needed, returning the
208  // number of items read. If swap_ is true then the count items will each have
209  // size bytes reversed.
210  size_t FReadEndian(void *buffer, size_t size, size_t count);
211  // Replicates fread, returning the number of items read.
212  size_t FRead(void *buffer, size_t size, size_t count);
213  // Resets the TFile as if it has been Opened, but nothing read.
214  // Only allowed while reading!
215  void Rewind();
216 
217  // Open for writing. Either supply a non-nullptr data with OpenWrite before
218  // calling FWrite, (no close required), or supply a nullptr data to OpenWrite
219  // and call CloseWrite to write to a file after the FWrites.
220  void OpenWrite(std::vector<char> *data);
221  bool CloseWrite(const char *filename, FileWriter writer);
222 
223  // Replicates fwrite, returning the number of items written.
224  // To use fprintf, use snprintf and FWrite.
225  size_t FWrite(const void *buffer, size_t size, size_t count);
226 
227 private:
228  // The buffered data from the file.
229  std::vector<char> *data_ = nullptr;
230  // The number of bytes used so far.
231  unsigned offset_ = 0;
232  // True if the data_ pointer is owned by *this.
233  bool data_is_owned_ = false;
234  // True if the TFile is open for writing.
235  bool is_writing_ = false;
236  // True if bytes need to be swapped in FReadEndian.
237  bool swap_ = false;
238 };
239 
240 } // namespace tesseract.
241 
242 #endif
bool(*)(const std::vector< char > &data, const char *filename) FileWriter
Definition: serialis.h:48
bool DeSerialize(bool swap, FILE *fp, std::vector< T > &data)
Definition: helpers.h:220
bool Serialize(FILE *fp, const std::vector< T > &data)
Definition: helpers.h:251
constexpr size_t countof(T const (&)[N]) noexcept
Definition: serialis.h:42
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
bool(*)(const char *filename, std::vector< char > *data) FileReader
Definition: baseapi.h:63
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool Serialize(const T *data, size_t count=1)
Definition: serialis.h:157
bool DeSerialize(T *data, size_t count=1)
Definition: serialis.h:94
bool DeSerialize(std::vector< T > &data)
Definition: serialis.h:98
void set_swap(bool value)
Definition: serialis.h:83
bool Serialize(const std::vector< T > &data)
Definition: serialis.h:161
#define TESS_API
Definition: export.h:34