/[svn]/libgig/trunk/src/Serialization.h
ViewVC logotype

Contents of /libgig/trunk/src/Serialization.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3146 - (show annotations) (download) (as text)
Wed May 3 19:54:08 2017 UTC (6 years, 11 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 27098 byte(s)
- Serialization.h/.cpp: trivial corrections.

1 /***************************************************************************
2 * *
3 * Copyright (C) 2017 Christian Schoenebeck *
4 * <cuse@users.sourceforge.net> *
5 * *
6 * This library is part of libgig. *
7 * *
8 * This library is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this library; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21 * MA 02111-1307 USA *
22 ***************************************************************************/
23
24 #ifndef LIBGIG_SERIALIZATION_H
25 #define LIBGIG_SERIALIZATION_H
26
27 #ifdef HAVE_CONFIG_H
28 # include <config.h>
29 #endif
30
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <typeinfo>
34 #include <string>
35 #include <vector>
36 #include <map>
37
38 /** @brief Serialization / deserialization framework.
39 *
40 * See class Archive as starting point for how to implement serialization and
41 * deserialization with your application.
42 *
43 * The classes in this namespace allow to serialize and deserialize native
44 * C++ objects in a portable, easy and flexible way. Serialization is a
45 * technique that allows to transform the current state and data of native
46 * (in this case C++) objects into a data stream (including all other objects
47 * the "serialized" objects relate to); the data stream may then be sent over
48 * "wire" (for example via network connection to another computer, which might
49 * also have a different OS, CPU architecture, native memory word size and
50 * endian type); and finally the data stream would be "deserialized" on that
51 * receiver side, that is transformed again to modify all objects and data
52 * structures on receiver side to resemble the objects' state and data as it
53 * was originally on sender side.
54 *
55 * In contrast to many other already existing serialization frameworks, this
56 * implementation has a strong focus on robustness regarding long-term changes
57 * to the serialized C++ classes of the serialized objects. So even if sender
58 * and receiver are using different versions of their serialized/deserialized
59 * C++ classes, structures and data types (thus having different data structure
60 * layout to a certain extent), this framework aims trying to automatically
61 * adapt its serialization and deserialization process in that case so that
62 * the deserialized objects on receiver side would still reflect the overall
63 * expected states and overall data as intended by the sender. For being able to
64 * do so, this framework stores all kind of additional information about each
65 * serialized object and each data structure member (for example name of each
66 * data structure member, but also the offset of each member within its
67 * containing data structure, precise data types, and more).
68 *
69 * Like most other serialization frameworks, this frameworks does not require a
70 * tree-structured layout of the serialized data structures. So it automatically
71 * handles also cyclic dependencies between serialized data structures
72 * correctly, without i.e. causing endless recursion or redundancy.
73 *
74 * Additionally this framework also allows partial deserialization. Which means
75 * the receiver side may for example decide that it wants to restrict
76 * deserialization so that it would only modify certain objects or certain
77 * members by the deserialization process, leaving all other ones untouched.
78 * So this partial deserialization technique for example allows to implement
79 * flexible preset features for applications in a powerful and easy way.
80 */
81 namespace Serialization {
82
83 // just symbol prototyping
84 class DataType;
85 class Object;
86 class Member;
87 class Archive;
88 class ObjectPool;
89 class Exception;
90
91 typedef std::string String;
92
93 typedef std::vector<uint8_t> RawData;
94
95 typedef void* ID;
96
97 typedef uint32_t Version;
98
99 enum operation_t {
100 OPERATION_NONE,
101 OPERATION_SERIALIZE,
102 OPERATION_DESERIALIZE
103 };
104
105 template<typename T>
106 bool IsEnum(const T& data) {
107 return __is_enum(T);
108 }
109
110 template<typename T>
111 bool IsUnion(const T& data) {
112 return __is_union(T);
113 }
114
115 template<typename T>
116 bool IsClass(const T& data) {
117 return __is_class(T);
118 }
119
120 /*template<typename T>
121 bool IsTrivial(T data) {
122 return __is_trivial(T);
123 }*/
124
125 /*template<typename T>
126 bool IsPOD(T data) {
127 return __is_pod(T);
128 }*/
129
130 /** @brief Unique identifier for one specific C++ object, member or fundamental variable.
131 *
132 * Reflects a unique identifier for one specific serialized C++ class
133 * instance, struct instance, member, primitive pointer, or fundamental
134 * variables.
135 */
136 class UID {
137 public:
138 ID id;
139 size_t size;
140
141 bool isValid() const;
142 operator bool() const { return isValid(); }
143 //bool operator()() const { return isValid(); }
144 bool operator==(const UID& other) const { return id == other.id && size == other.size; }
145 bool operator!=(const UID& other) const { return id != other.id || size != other.size; }
146 bool operator<(const UID& other) const { return id < other.id || (id == other.id && size < other.size); }
147 bool operator>(const UID& other) const { return id > other.id || (id == other.id && size > other.size); }
148
149 template<typename T>
150 static UID from(const T& obj) {
151 return Resolver<T>::resolve(obj);
152 }
153
154 protected:
155 // UID resolver for non-pointer types
156 template<typename T>
157 struct Resolver {
158 static UID resolve(const T& obj) {
159 return (UID) { (ID) &obj, sizeof(obj) };
160 }
161 };
162
163 // UID resolver for pointer types (of 1st degree)
164 template<typename T>
165 struct Resolver<T*> {
166 static UID resolve(const T* const & obj) {
167 return (UID) { (ID) obj, sizeof(*obj) };
168 }
169 };
170 };
171
172 /**
173 * Reflects an invalid UID and behaves similar to NULL as invalid value for
174 * pointer types.
175 */
176 extern const UID NO_UID;
177
178 typedef std::vector<UID> UIDChain;
179
180 // prototyping of private internal friend functions
181 static DataType _popDataTypeBlob(const char*& p, const char* end);
182 static Member _popMemberBlob(const char*& p, const char* end);
183 static Object _popObjectBlob(const char*& p, const char* end);
184 static void _popPrimitiveValue(const char*& p, const char* end, Object& obj);
185
186 /** @brief Abstract reflection of a native C++ data type.
187 *
188 * Provides detailed information about a C++ data type, whether it is a
189 * fundamental C/C++ data type (like int, float, char, etc.) or custom
190 * defined data type like a C++ class, struct, enum, as well as other
191 * features of the data type like its native memory size and more.
192 */
193 class DataType {
194 public:
195 DataType();
196 size_t size() const { return m_size; }
197 bool isValid() const;
198 bool isPointer() const;
199 bool isClass() const;
200 bool isPrimitive() const;
201 bool isInteger() const;
202 bool isReal() const;
203 bool isBool() const;
204 bool isEnum() const;
205 bool isSigned() const;
206 operator bool() const { return isValid(); }
207 //bool operator()() const { return isValid(); }
208 bool operator==(const DataType& other) const;
209 bool operator!=(const DataType& other) const;
210 bool operator<(const DataType& other) const;
211 bool operator>(const DataType& other) const;
212 String asLongDescr() const;
213 String baseTypeName() const { return m_baseTypeName; }
214 String customTypeName() const { return m_customTypeName; }
215
216 template<typename T>
217 static DataType dataTypeOf(const T& data) {
218 return Resolver<T>::resolve(data);
219 }
220
221 protected:
222 DataType(bool isPointer, int size, String baseType, String customType = "");
223
224 template<typename T, bool T_isPointer>
225 struct ResolverBase {
226 static DataType resolve(const T& data) {
227 const std::type_info& type = typeid(data);
228 const int sz = sizeof(data);
229
230 // for primitive types we are using our own type names instead of
231 // using std:::type_info::name(), because the precise output of the
232 // latter may vary between compilers
233 if (type == typeid(int8_t)) return DataType(T_isPointer, sz, "int8");
234 if (type == typeid(uint8_t)) return DataType(T_isPointer, sz, "uint8");
235 if (type == typeid(int16_t)) return DataType(T_isPointer, sz, "int16");
236 if (type == typeid(uint16_t)) return DataType(T_isPointer, sz, "uint16");
237 if (type == typeid(int32_t)) return DataType(T_isPointer, sz, "int32");
238 if (type == typeid(uint32_t)) return DataType(T_isPointer, sz, "uint32");
239 if (type == typeid(int64_t)) return DataType(T_isPointer, sz, "int64");
240 if (type == typeid(uint64_t)) return DataType(T_isPointer, sz, "uint64");
241 if (type == typeid(bool)) return DataType(T_isPointer, sz, "bool");
242 if (type == typeid(float)) return DataType(T_isPointer, sz, "real32");
243 if (type == typeid(double)) return DataType(T_isPointer, sz, "real64");
244
245 if (IsEnum(data)) return DataType(T_isPointer, sz, "enum", rawCppTypeNameOf(data));
246 if (IsUnion(data)) return DataType(T_isPointer, sz, "union", rawCppTypeNameOf(data));
247 if (IsClass(data)) return DataType(T_isPointer, sz, "class", rawCppTypeNameOf(data));
248
249 return DataType();
250 }
251 };
252
253 // DataType resolver for non-pointer types
254 template<typename T>
255 struct Resolver : ResolverBase<T,false> {
256 static DataType resolve(const T& data) {
257 return ResolverBase<T,false>::resolve(data);
258 }
259 };
260
261 // DataType resolver for pointer types (of 1st degree)
262 template<typename T>
263 struct Resolver<T*> : ResolverBase<T,true> {
264 static DataType resolve(const T*& data) {
265 return ResolverBase<T,true>::resolve(*data);
266 }
267 };
268
269 template<typename T>
270 static String rawCppTypeNameOf(const T& data) {
271 #if defined _MSC_VER // Microsoft compiler ...
272 # warning type_info::raw_name() demangling has not been tested yet with Microsoft compiler! Feedback appreciated!
273 String name = typeid(data).raw_name(); //NOTE: I haven't checked yet what MSC actually outputs here exactly
274 #else // i.e. especially GCC and clang ...
275 String name = typeid(data).name();
276 #endif
277 //while (!name.empty() && name[0] >= 0 && name[0] <= 9)
278 // name = name.substr(1);
279 return name;
280 }
281
282 private:
283 String m_baseTypeName;
284 String m_customTypeName;
285 int m_size;
286 bool m_isPointer;
287
288 friend DataType _popDataTypeBlob(const char*& p, const char* end);
289 };
290
291 /** @brief Abstract reflection of a native C++ class/struct's member variable.
292 *
293 * Provides detailed information about a specific C++ member variable of
294 * serialized C++ object, like its C++ data type, offset of this member
295 * within its containing data structure/class, its C++ member variable name
296 * and more.
297 */
298 class Member {
299 public:
300 Member();
301 UID uid() const { return m_uid; }
302 String name() const { return m_name; }
303 size_t offset() const { return m_offset; }
304 const DataType& type() const { return m_type; }
305 bool isValid() const;
306 operator bool() const { return isValid(); }
307 //bool operator()() const { return isValid(); }
308 bool operator==(const Member& other) const;
309 bool operator!=(const Member& other) const;
310 bool operator<(const Member& other) const;
311 bool operator>(const Member& other) const;
312
313 protected:
314 Member(String name, UID uid, size_t offset, DataType type);
315 friend class Archive;
316
317 private:
318 UID m_uid;
319 size_t m_offset;
320 String m_name;
321 DataType m_type;
322
323 friend Member _popMemberBlob(const char*& p, const char* end);
324 };
325
326 /** @brief Abstract reflection of a native C++ class/struct instance.
327 *
328 * Provides detailed information about a specific serialized C++ object,
329 * like its C++ member variables, its C++ class/struct name, its native
330 * memory size and more.
331 */
332 class Object {
333 public:
334 Object();
335 Object(UIDChain uidChain, DataType type);
336
337 UID uid(int index = 0) const {
338 return (index < m_uid.size()) ? m_uid[index] : NO_UID;
339 }
340
341 const UIDChain& uidChain() const { return m_uid; }
342 const DataType& type() const { return m_type; }
343 const RawData& rawData() const { return m_data; }
344
345 Version version() const { return m_version; }
346
347 void setVersion(Version v) {
348 m_version = v;
349 }
350
351 Version minVersion() const { return m_minVersion; }
352
353 void setMinVersion(Version v) {
354 m_minVersion = v;
355 }
356
357 bool isVersionCompatibleTo(const Object& other) const;
358
359 std::vector<Member>& members() { return m_members; }
360 const std::vector<Member>& members() const { return m_members; }
361 Member memberNamed(String name) const;
362 void remove(const Member& member);
363 std::vector<Member> membersOfType(const DataType& type) const;
364 int sequenceIndexOf(const Member& member) const;
365 bool isValid() const;
366 operator bool() const { return isValid(); }
367 //bool operator()() const { return isValid(); }
368 bool operator==(const Object& other) const;
369 bool operator!=(const Object& other) const;
370 bool operator<(const Object& other) const;
371 bool operator>(const Object& other) const;
372
373 private:
374 DataType m_type;
375 UIDChain m_uid;
376 Version m_version;
377 Version m_minVersion;
378 RawData m_data;
379 std::vector<Member> m_members;
380
381 friend Object _popObjectBlob(const char*& p, const char* end);
382 friend void _popPrimitiveValue(const char*& p, const char* end, Object& obj);
383 };
384
385 /** @brief Destination container for serialization, and source container for deserialization.
386 *
387 * This is the main class for implementing serialization and deserialization
388 * with your C++ application. This framework does not require a a tree
389 * structured layout of your C++ objects being serialized/deserialized, it
390 * uses a concept of a "root" object though. So to start serialization
391 * construct an empty Archive object and then instruct it to serialize your
392 * C++ objects by pointing it to your "root" object:
393 * @code
394 * Archive a;
395 * a.serialize(&myRootObject);
396 * @endcode
397 * Or if you prefer the look of operator based code:
398 * @code
399 * Archive a;
400 * a << myRootObject;
401 * @endcode
402 * The Archive object will then serialize all members of the passed C++
403 * object, and will recursively serialize all other C++ objects which it
404 * contains or points to. So the root object is the starting point for the
405 * overall serialization. After the serialize() method returned, you can
406 * then access the serialized data stream by calling rawData() and send that
407 * data stream over "wire", or store it on disk or whatever you may intend
408 * to do with it.
409 *
410 * Then on receiver side likewise, you create a new Archive object, pass the
411 * received data stream i.e. via constructor to the Archive object and call
412 * deserialize() by pointing it to the root object on receiver side:
413 * @code
414 * Archive a(rawDataStream);
415 * a.deserialize(&myRootObject);
416 * @endcode
417 * Or with operator instead:
418 * @code
419 * Archive a(rawDataStream);
420 * a >> myRootObject;
421 * @endcode
422 * Now this framework automatically handles serialization and
423 * deserialization of fundamental data types automatically for you (like
424 * i.e. char, int, long int, float, double, etc.). However for your own
425 * custom C++ classes and structs you must implement one method which
426 * defines which members of your class should actually be serialized and
427 * deserialized. That method to be added must have the following signature:
428 * @code
429 * void serialize(Serialization::Archive* archive);
430 * @endcode
431 * So let's say you have the following simple data structures:
432 * @code
433 * struct Foo {
434 * int a;
435 * bool b;
436 * double c;
437 * };
438 *
439 * struct Bar {
440 * char one;
441 * float two;
442 * Foo foo1;
443 * Foo* pFoo2;
444 * Foo* pFoo3DontTouchMe; // shall not be serialized/deserialized
445 * };
446 * @endcode
447 * So in order to be able to serialize and deserialize objects of those two
448 * structures you would first add the mentioned method to each struct
449 * definition (i.e. in your header file):
450 * @code
451 * struct Foo {
452 * int a;
453 * bool b;
454 * double c;
455 *
456 * void serialize(Serialization::Archive* archive);
457 * };
458 *
459 * struct Bar {
460 * char one;
461 * float two;
462 * Foo foo1;
463 * Foo* pFoo2;
464 * Foo* pFoo3DontTouchMe; // shall not be serialized/deserialized
465 *
466 * void serialize(Serialization::Archive* archive);
467 * };
468 * @endcode
469 * And then you would implement those two new methods like this (i.e. in
470 * your .cpp file):
471 * @code
472 * #define SRLZ(member) \
473 * archive->serializeMember(*this, member, #member);
474 *
475 * void Foo::serialize(Serialization::Archive* archive) {
476 * SRLZ(a);
477 * SRLZ(b);
478 * SRLZ(c);
479 * }
480 *
481 * void Bar::serialize(Serialization::Archive* archive) {
482 * SRLZ(one);
483 * SRLZ(two);
484 * SRLZ(foo1);
485 * SRLZ(pFoo2);
486 * // leaving out pFoo3DontTouchMe here
487 * }
488 * @endcode
489 * Now when you serialize such a Bar object, this framework will also
490 * automatically serialize the respective Foo object(s) accordingly, also
491 * for the pFoo2 pointer for instance (as long as it is not a NULL pointer
492 * that is).
493 *
494 * Note that there is only one method that you need to implement. So the
495 * respective serialize() method implementation of your classes/structs are
496 * both called for serialization, as well as for deserialization!
497 */
498 class Archive {
499 public:
500 Archive();
501 Archive(const RawData& data);
502 Archive(const uint8_t* data, size_t size);
503 virtual ~Archive();
504
505 template<typename T>
506 void serialize(const T* obj) {
507 m_operation = OPERATION_SERIALIZE;
508 m_allObjects.clear();
509 m_rawData.clear();
510 m_root = UID::from(obj);
511 const_cast<T*>(obj)->serialize(this);
512 encode();
513 m_operation = OPERATION_NONE;
514 }
515
516 template<typename T>
517 void deserialize(T* obj) {
518 Archive a;
519 m_operation = OPERATION_DESERIALIZE;
520 obj->serialize(&a);
521 a.m_root = UID::from(obj);
522 Syncer s(a, *this);
523 m_operation = OPERATION_NONE;
524 }
525
526 template<typename T>
527 void operator<<(const T& obj) {
528 serialize(&obj);
529 }
530
531 template<typename T>
532 void operator>>(T& obj) {
533 deserialize(&obj);
534 }
535
536 const RawData& rawData() const { return m_rawData; }
537 virtual String rawDataFormat() const;
538
539 template<typename T_classType, typename T_memberType>
540 void serializeMember(const T_classType& nativeObject, const T_memberType& nativeMember, const char* memberName) {
541 const size_t offset =
542 ((const uint8_t*)(const void*)&nativeMember) -
543 ((const uint8_t*)(const void*)&nativeObject);
544 const UIDChain uids = UIDChainResolver<T_memberType>(nativeMember);
545 const DataType type = DataType::dataTypeOf(nativeMember);
546 const Member member(memberName, uids[0], offset, type);
547 const UID parentUID = UID::from(nativeObject);
548 Object& parent = m_allObjects[parentUID];
549 if (!parent) {
550 const UIDChain uids = UIDChainResolver<T_classType>(nativeObject);
551 const DataType type = DataType::dataTypeOf(nativeObject);
552 parent = Object(uids, type);
553 }
554 parent.members().push_back(member);
555 const Object obj(uids, type);
556 const bool bExistsAlready = m_allObjects.count(uids[0]);
557 const bool isValidObject = obj;
558 const bool bExistingObjectIsInvalid = !m_allObjects[uids[0]];
559 if (!bExistsAlready || (bExistingObjectIsInvalid && isValidObject)) {
560 m_allObjects[uids[0]] = obj;
561 // recurse serialization for all members of this member
562 // (only for struct/class types, noop for primitive types)
563 SerializationRecursion<T_memberType>::serializeObject(this, nativeMember);
564 }
565 }
566
567 virtual void decode(const RawData& data);
568 virtual void decode(const uint8_t* data, size_t size);
569 void clear();
570 void remove(const Object& obj);
571 Object& rootObject();
572 Object& objectByUID(const UID& uid);
573
574 protected:
575 // UID resolver for non-pointer types
576 template<typename T>
577 class UIDChainResolver {
578 public:
579 UIDChainResolver(const T& data) {
580 m_uid.push_back(UID::from(data));
581 }
582
583 operator UIDChain() const { return m_uid; }
584 UIDChain operator()() const { return m_uid; }
585 private:
586 UIDChain m_uid;
587 };
588
589 // UID resolver for pointer types (of 1st degree)
590 template<typename T>
591 class UIDChainResolver<T*> {
592 public:
593 UIDChainResolver(const T*& data) {
594 m_uid.push_back((UID) { &data, sizeof(data) });
595 m_uid.push_back((UID) { data, sizeof(*data) });
596 }
597
598 operator UIDChain() const { return m_uid; }
599 UIDChain operator()() const { return m_uid; }
600 private:
601 UIDChain m_uid;
602 };
603
604 // SerializationRecursion for non-pointer class/struct types.
605 template<typename T, bool T_isRecursive>
606 struct SerializationRecursionImpl {
607 static void serializeObject(Archive* archive, const T& obj) {
608 const_cast<T&>(obj).serialize(archive);
609 }
610 };
611
612 // SerializationRecursion for pointers (of 1st degree) to class/structs.
613 template<typename T, bool T_isRecursive>
614 struct SerializationRecursionImpl<T*,T_isRecursive> {
615 static void serializeObject(Archive* archive, const T*& obj) {
616 if (!obj) return;
617 const_cast<T*&>(obj)->serialize(archive);
618 }
619 };
620
621 // NOOP SerializationRecursion for primitive types.
622 template<typename T>
623 struct SerializationRecursionImpl<T,false> {
624 static void serializeObject(Archive* archive, const T& obj) {}
625 };
626
627 // NOOP SerializationRecursion for pointers (of 1st degree) to primitive types.
628 template<typename T>
629 struct SerializationRecursionImpl<T*,false> {
630 static void serializeObject(Archive* archive, const T*& obj) {}
631 };
632
633 // Automatically handles recursion for class/struct types, while ignoring all primitive types.
634 template<typename T>
635 struct SerializationRecursion : SerializationRecursionImpl<T, __is_class(T)> {
636 };
637
638 class ObjectPool : public std::map<UID,Object> {
639 public:
640 // prevent passing obvious invalid UID values from creating a new pair entry
641 Object& operator[](const UID& k) {
642 static Object invalid;
643 if (!k.isValid()) {
644 invalid = Object();
645 return invalid;
646 }
647 return std::map<UID,Object>::operator[](k);
648 }
649 };
650
651 friend String _encode(const ObjectPool& objects);
652
653 private:
654 String _encodeRootBlob();
655 void _popRootBlob(const char*& p, const char* end);
656 void _popObjectsBlob(const char*& p, const char* end);
657
658 protected:
659 class Syncer {
660 public:
661 Syncer(Archive& dst, Archive& src);
662 protected:
663 void syncObject(const Object& dst, const Object& src);
664 void syncPrimitive(const Object& dst, const Object& src);
665 void syncPointer(const Object& dst, const Object& src);
666 void syncMember(const Member& dstMember, const Member& srcMember);
667 static Member dstMemberMatching(const Object& dstObj, const Object& srcObj, const Member& srcMember);
668 private:
669 Archive& m_dst;
670 Archive& m_src;
671 };
672
673 virtual void encode();
674
675 ObjectPool m_allObjects;
676 operation_t m_operation;
677 UID m_root;
678 RawData m_rawData;
679 };
680
681 /**
682 * Will be thrown whenever an error occurs during an serialization or
683 * deserialization process.
684 */
685 class Exception {
686 public:
687 String Message;
688
689 Exception(String Message) { Exception::Message = Message; }
690 void PrintMessage();
691 virtual ~Exception() {}
692 };
693
694 } // namespace Serialization
695
696 #endif // LIBGIG_SERIALIZATION_H

  ViewVC Help
Powered by ViewVC