Skip to content
Snippets Groups Projects
pugixml.cpp 266 KiB
Newer Older
  • Learn to ignore specific revisions
  • Radim Vavřík's avatar
    Radim Vavřík committed

    /**
     * pugixml parser - version 1.2
     * --------------------------------------------------------
     * Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
     * Report bugs and download new versions at http://pugixml.org/
     *
     * This library is distributed under the MIT License. See notice at the end
     * of this file.
     *
     * This work is based on the pugxml parser, which is:
     * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
     */
    
    #ifndef SOURCE_PUGIXML_CPP
    #define SOURCE_PUGIXML_CPP
    
    #include "pugixml.hpp"
    
    #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    #include <wchar.h>
    
    #ifndef PUGIXML_NO_XPATH
    #	include <math.h>
    #	include <float.h>
    #	ifdef PUGIXML_NO_EXCEPTIONS
    #		include <setjmp.h>
    #	endif
    #endif
    
    #ifndef PUGIXML_NO_STL
    #	include <istream>
    #	include <ostream>
    #	include <string>
    #endif
    
    // For placement new
    #include <new>
    
    #ifdef _MSC_VER
    #	pragma warning(push)
    #	pragma warning(disable: 4127) // conditional expression is constant
    #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
    #	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
    #	pragma warning(disable: 4702) // unreachable code
    #	pragma warning(disable: 4996) // this function or variable may be unsafe
    #	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
    #endif
    
    #ifdef __INTEL_COMPILER
    #	pragma warning(disable: 177) // function was declared but never referenced 
    #	pragma warning(disable: 279) // controlling expression is constant
    #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
    #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
    #endif
    
    #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
    #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
    #endif
    
    #ifdef __BORLANDC__
    #	pragma option push
    #	pragma warn -8008 // condition is always false
    #	pragma warn -8066 // unreachable code
    #endif
    
    #ifdef __SNC__
    // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
    #	pragma diag_suppress=178 // function was declared but never referenced
    #	pragma diag_suppress=237 // controlling expression is constant
    #endif
    
    // Inlining controls
    #if defined(_MSC_VER) && _MSC_VER >= 1300
    #	define PUGI__NO_INLINE __declspec(noinline)
    #elif defined(__GNUC__)
    #	define PUGI__NO_INLINE __attribute__((noinline))
    #else
    #	define PUGI__NO_INLINE 
    #endif
    
    // Simple static assertion
    #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
    
    // Digital Mars C++ bug workaround for passing char loaded from memory via stack
    #ifdef __DMC__
    #	define PUGI__DMC_VOLATILE volatile
    #else
    #	define PUGI__DMC_VOLATILE
    #endif
    
    // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
    #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
    using std::memcpy;
    using std::memmove;
    #endif
    
    // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
    #if defined(_MSC_VER) && !defined(__S3E__)
    #	define PUGI__MSVC_CRT_VERSION _MSC_VER
    #endif
    
    #ifdef PUGIXML_HEADER_ONLY
    #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
    #	define PUGI__NS_END } }
    #	define PUGI__FN inline
    #	define PUGI__FN_NO_INLINE inline
    #else
    #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
    #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
    #		define PUGI__NS_END } }
    #	else
    #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
    #		define PUGI__NS_END } } }
    #	endif
    #	define PUGI__FN
    #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
    #endif
    
    // uintptr_t
    #if !defined(_MSC_VER) || _MSC_VER >= 1600
    #	include <stdint.h>
    #else
    #	ifndef _UINTPTR_T_DEFINED
    // No native uintptr_t in MSVC6 and in some WinCE versions
    typedef size_t uintptr_t;
    #define _UINTPTR_T_DEFINED
    #	endif
    PUGI__NS_BEGIN
    	typedef unsigned __int8 uint8_t;
    	typedef unsigned __int16 uint16_t;
    	typedef unsigned __int32 uint32_t;
    PUGI__NS_END
    #endif
    
    // Memory allocation
    PUGI__NS_BEGIN
    	PUGI__FN void* default_allocate(size_t size)
    	{
    		return malloc(size);
    	}
    
    	PUGI__FN void default_deallocate(void* ptr)
    	{
    		free(ptr);
    	}
    
    	template <typename T>
    	struct xml_memory_management_function_storage
    	{
    		static allocation_function allocate;
    		static deallocation_function deallocate;
    	};
    
    	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
    	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
    
    	typedef xml_memory_management_function_storage<int> xml_memory;
    PUGI__NS_END
    
    // String utilities
    PUGI__NS_BEGIN
    	// Get string length
    	PUGI__FN size_t strlength(const char_t* s)
    	{
    		assert(s);
    
    	#ifdef PUGIXML_WCHAR_MODE
    		return wcslen(s);
    	#else
    		return strlen(s);
    	#endif
    	}
    
    	// Compare two strings
    	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
    	{
    		assert(src && dst);
    
    	#ifdef PUGIXML_WCHAR_MODE
    		return wcscmp(src, dst) == 0;
    	#else
    		return strcmp(src, dst) == 0;
    	#endif
    	}
    
    	// Compare lhs with [rhs_begin, rhs_end)
    	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
    	{
    		for (size_t i = 0; i < count; ++i)
    			if (lhs[i] != rhs[i])
    				return false;
    	
    		return lhs[count] == 0;
    	}
    	
    #ifdef PUGIXML_WCHAR_MODE
    	// Convert string to wide string, assuming all symbols are ASCII
    	PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
    	{
    		for (const char* i = source; *i; ++i) *dest++ = *i;
    		*dest = 0;
    	}
    #endif
    PUGI__NS_END
    
    #if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
    // auto_ptr-like buffer holder for exception recovery
    PUGI__NS_BEGIN
    	struct buffer_holder
    	{
    		void* data;
    		void (*deleter)(void*);
    
    		buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
    		{
    		}
    
    		~buffer_holder()
    		{
    			if (data) deleter(data);
    		}
    
    		void* release()
    		{
    			void* result = data;
    			data = 0;
    			return result;
    		}
    	};
    PUGI__NS_END
    #endif
    
    PUGI__NS_BEGIN
    	static const size_t xml_memory_page_size =
    	#ifdef PUGIXML_MEMORY_PAGE_SIZE
    		PUGIXML_MEMORY_PAGE_SIZE
    	#else
    		32768
    	#endif
    		;
    
    	static const uintptr_t xml_memory_page_alignment = 32;
    	static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
    	static const uintptr_t xml_memory_page_name_allocated_mask = 16;
    	static const uintptr_t xml_memory_page_value_allocated_mask = 8;
    	static const uintptr_t xml_memory_page_type_mask = 7;
    
    	struct xml_allocator;
    
    	struct xml_memory_page
    	{
    		static xml_memory_page* construct(void* memory)
    		{
    			if (!memory) return 0; //$ redundant, left for performance
    
    			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
    
    			result->allocator = 0;
    			result->memory = 0;
    			result->prev = 0;
    			result->next = 0;
    			result->busy_size = 0;
    			result->freed_size = 0;
    
    			return result;
    		}
    
    		xml_allocator* allocator;
    
    		void* memory;
    
    		xml_memory_page* prev;
    		xml_memory_page* next;
    
    		size_t busy_size;
    		size_t freed_size;
    
    		char data[1];
    	};
    
    	struct xml_memory_string_header
    	{
    		uint16_t page_offset; // offset from page->data
    		uint16_t full_size; // 0 if string occupies whole page
    	};
    
    	struct xml_allocator
    	{
    		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
    		{
    		}
    
    		xml_memory_page* allocate_page(size_t data_size)
    		{
    			size_t size = offsetof(xml_memory_page, data) + data_size;
    
    			// allocate block with some alignment, leaving memory for worst-case padding
    			void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
    			if (!memory) return 0;
    
    			// align upwards to page boundary
    			void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
    
    			// prepare page structure
    			xml_memory_page* page = xml_memory_page::construct(page_memory);
    
    			page->memory = memory;
    			page->allocator = _root->allocator;
    
    			return page;
    		}
    
    		static void deallocate_page(xml_memory_page* page)
    		{
    			xml_memory::deallocate(page->memory);
    		}
    
    		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
    
    		void* allocate_memory(size_t size, xml_memory_page*& out_page)
    		{
    			if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
    
    			void* buf = _root->data + _busy_size;
    
    			_busy_size += size;
    
    			out_page = _root;
    
    			return buf;
    		}
    
    		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
    		{
    			if (page == _root) page->busy_size = _busy_size;
    
    			assert(ptr >= page->data && ptr < page->data + page->busy_size);
    			(void)!ptr;
    
    			page->freed_size += size;
    			assert(page->freed_size <= page->busy_size);
    
    			if (page->freed_size == page->busy_size)
    			{
    				if (page->next == 0)
    				{
    					assert(_root == page);
    
    					// top page freed, just reset sizes
    					page->busy_size = page->freed_size = 0;
    					_busy_size = 0;
    				}
    				else
    				{
    					assert(_root != page);
    					assert(page->prev);
    
    					// remove from the list
    					page->prev->next = page->next;
    					page->next->prev = page->prev;
    
    					// deallocate
    					deallocate_page(page);
    				}
    			}
    		}
    
    		char_t* allocate_string(size_t length)
    		{
    			// allocate memory for string and header block
    			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
    			
    			// round size up to pointer alignment boundary
    			size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
    
    			xml_memory_page* page;
    			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
    
    			if (!header) return 0;
    
    			// setup header
    			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
    
    			assert(page_offset >= 0 && page_offset < (1 << 16));
    			header->page_offset = static_cast<uint16_t>(page_offset);
    
    			// full_size == 0 for large strings that occupy the whole page
    			assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
    			header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
    
    			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
    			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
    			return static_cast<char_t*>(static_cast<void*>(header + 1));
    		}
    
    		void deallocate_string(char_t* string)
    		{
    			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
    			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
    
    			// get header
    			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
    
    			// deallocate
    			size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
    			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
    
    			// if full_size == 0 then this string occupies the whole page
    			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
    
    			deallocate_memory(header, full_size, page);
    		}
    
    		xml_memory_page* _root;
    		size_t _busy_size;
    	};
    
    	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
    	{
    		const size_t large_allocation_threshold = xml_memory_page_size / 4;
    
    		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
    		out_page = page;
    
    		if (!page) return 0;
    
    		if (size <= large_allocation_threshold)
    		{
    			_root->busy_size = _busy_size;
    
    			// insert page at the end of linked list
    			page->prev = _root;
    			_root->next = page;
    			_root = page;
    
    			_busy_size = size;
    		}
    		else
    		{
    			// insert page before the end of linked list, so that it is deleted as soon as possible
    			// the last page is not deleted even if it's empty (see deallocate_memory)
    			assert(_root->prev);
    
    			page->prev = _root->prev;
    			page->next = _root;
    
    			_root->prev->next = page;
    			_root->prev = page;
    		}
    
    		// allocate inside page
    		page->busy_size = size;
    
    		return page->data;
    	}
    PUGI__NS_END
    
    namespace pugi
    {
    	/// A 'name=value' XML attribute structure.
    	struct xml_attribute_struct
    	{
    		/// Default ctor
    		xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
    		{
    		}
    
    		uintptr_t header;
    
    		char_t* name;	///< Pointer to attribute name.
    		char_t*	value;	///< Pointer to attribute value.
    
    		xml_attribute_struct* prev_attribute_c;	///< Previous attribute (cyclic list)
    		xml_attribute_struct* next_attribute;	///< Next attribute
    	};
    
    	/// An XML document tree node.
    	struct xml_node_struct
    	{
    		/// Default ctor
    		/// \param type - node type
    		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
    		{
    		}
    
    		uintptr_t header;
    
    		xml_node_struct*		parent;					///< Pointer to parent
    
    		char_t*					name;					///< Pointer to element name.
    		char_t*					value;					///< Pointer to any associated string data.
    
    		xml_node_struct*		first_child;			///< First child
    		
    		xml_node_struct*		prev_sibling_c;			///< Left brother (cyclic list)
    		xml_node_struct*		next_sibling;			///< Right brother
    		
    		xml_attribute_struct*	first_attribute;		///< First attribute
    	};
    }
    
    PUGI__NS_BEGIN
    	struct xml_document_struct: public xml_node_struct, public xml_allocator
    	{
    		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
    		{
    		}
    
    		const char_t* buffer;
    	};
    
    	inline xml_allocator& get_allocator(const xml_node_struct* node)
    	{
    		assert(node);
    
    		return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
    	}
    PUGI__NS_END
    
    // Low-level DOM operations
    PUGI__NS_BEGIN
    	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
    	{
    		xml_memory_page* page;
    		void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
    
    		return new (memory) xml_attribute_struct(page);
    	}
    
    	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
    	{
    		xml_memory_page* page;
    		void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
    
    		return new (memory) xml_node_struct(page, type);
    	}
    
    	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
    	{
    		uintptr_t header = a->header;
    
    		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
    		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
    
    		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
    	}
    
    	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
    	{
    		uintptr_t header = n->header;
    
    		if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
    		if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
    
    		for (xml_attribute_struct* attr = n->first_attribute; attr; )
    		{
    			xml_attribute_struct* next = attr->next_attribute;
    
    			destroy_attribute(attr, alloc);
    
    			attr = next;
    		}
    
    		for (xml_node_struct* child = n->first_child; child; )
    		{
    			xml_node_struct* next = child->next_sibling;
    
    			destroy_node(child, alloc);
    
    			child = next;
    		}
    
    		alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
    	}
    
    	PUGI__FN_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
    	{
    		xml_node_struct* child = allocate_node(alloc, type);
    		if (!child) return 0;
    
    		child->parent = node;
    
    		xml_node_struct* first_child = node->first_child;
    			
    		if (first_child)
    		{
    			xml_node_struct* last_child = first_child->prev_sibling_c;
    
    			last_child->next_sibling = child;
    			child->prev_sibling_c = last_child;
    			first_child->prev_sibling_c = child;
    		}
    		else
    		{
    			node->first_child = child;
    			child->prev_sibling_c = child;
    		}
    			
    		return child;
    	}
    
    	PUGI__FN_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
    	{
    		xml_attribute_struct* a = allocate_attribute(alloc);
    		if (!a) return 0;
    
    		xml_attribute_struct* first_attribute = node->first_attribute;
    
    		if (first_attribute)
    		{
    			xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
    
    			last_attribute->next_attribute = a;
    			a->prev_attribute_c = last_attribute;
    			first_attribute->prev_attribute_c = a;
    		}
    		else
    		{
    			node->first_attribute = a;
    			a->prev_attribute_c = a;
    		}
    			
    		return a;
    	}
    PUGI__NS_END
    
    // Helper classes for code generation
    PUGI__NS_BEGIN
    	struct opt_false
    	{
    		enum { value = 0 };
    	};
    
    	struct opt_true
    	{
    		enum { value = 1 };
    	};
    PUGI__NS_END
    
    // Unicode utilities
    PUGI__NS_BEGIN
    	inline uint16_t endian_swap(uint16_t value)
    	{
    		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
    	}
    
    	inline uint32_t endian_swap(uint32_t value)
    	{
    		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
    	}
    
    	struct utf8_counter
    	{
    		typedef size_t value_type;
    
    		static value_type low(value_type result, uint32_t ch)
    		{
    			// U+0000..U+007F
    			if (ch < 0x80) return result + 1;
    			// U+0080..U+07FF
    			else if (ch < 0x800) return result + 2;
    			// U+0800..U+FFFF
    			else return result + 3;
    		}
    
    		static value_type high(value_type result, uint32_t)
    		{
    			// U+10000..U+10FFFF
    			return result + 4;
    		}
    	};
    
    	struct utf8_writer
    	{
    		typedef uint8_t* value_type;
    
    		static value_type low(value_type result, uint32_t ch)
    		{
    			// U+0000..U+007F
    			if (ch < 0x80)
    			{
    				*result = static_cast<uint8_t>(ch);
    				return result + 1;
    			}
    			// U+0080..U+07FF
    			else if (ch < 0x800)
    			{
    				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
    				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
    				return result + 2;
    			}
    			// U+0800..U+FFFF
    			else
    			{
    				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
    				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
    				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
    				return result + 3;
    			}
    		}
    
    		static value_type high(value_type result, uint32_t ch)
    		{
    			// U+10000..U+10FFFF
    			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
    			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
    			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
    			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
    			return result + 4;
    		}
    
    		static value_type any(value_type result, uint32_t ch)
    		{
    			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
    		}
    	};
    
    	struct utf16_counter
    	{
    		typedef size_t value_type;
    
    		static value_type low(value_type result, uint32_t)
    		{
    			return result + 1;
    		}
    
    		static value_type high(value_type result, uint32_t)
    		{
    			return result + 2;
    		}
    	};
    
    	struct utf16_writer
    	{
    		typedef uint16_t* value_type;
    
    		static value_type low(value_type result, uint32_t ch)
    		{
    			*result = static_cast<uint16_t>(ch);
    
    			return result + 1;
    		}
    
    		static value_type high(value_type result, uint32_t ch)
    		{
    			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
    			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
    
    			result[0] = static_cast<uint16_t>(0xD800 + msh);
    			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
    
    			return result + 2;
    		}
    
    		static value_type any(value_type result, uint32_t ch)
    		{
    			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
    		}
    	};
    
    	struct utf32_counter
    	{
    		typedef size_t value_type;
    
    		static value_type low(value_type result, uint32_t)
    		{
    			return result + 1;
    		}
    
    		static value_type high(value_type result, uint32_t)
    		{
    			return result + 1;
    		}
    	};
    
    	struct utf32_writer
    	{
    		typedef uint32_t* value_type;
    
    		static value_type low(value_type result, uint32_t ch)
    		{
    			*result = ch;
    
    			return result + 1;
    		}
    
    		static value_type high(value_type result, uint32_t ch)
    		{
    			*result = ch;
    
    			return result + 1;
    		}
    
    		static value_type any(value_type result, uint32_t ch)
    		{
    			*result = ch;
    
    			return result + 1;
    		}
    	};
    
    	struct latin1_writer
    	{
    		typedef uint8_t* value_type;
    
    		static value_type low(value_type result, uint32_t ch)
    		{
    			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
    
    			return result + 1;
    		}
    
    		static value_type high(value_type result, uint32_t ch)
    		{
    			(void)ch;
    
    			*result = '?';
    
    			return result + 1;
    		}
    	};
    
    	template <size_t size> struct wchar_selector;
    
    	template <> struct wchar_selector<2>
    	{
    		typedef uint16_t type;
    		typedef utf16_counter counter;
    		typedef utf16_writer writer;
    	};
    
    	template <> struct wchar_selector<4>
    	{
    		typedef uint32_t type;
    		typedef utf32_counter counter;
    		typedef utf32_writer writer;
    	};
    
    	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
    	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
    
    	template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
    	{
    		static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
    		{
    			const uint8_t utf8_byte_mask = 0x3f;
    
    			while (size)
    			{
    				uint8_t lead = *data;
    
    				// 0xxxxxxx -> U+0000..U+007F
    				if (lead < 0x80)
    				{
    					result = Traits::low(result, lead);
    					data += 1;
    					size -= 1;
    
    					// process aligned single-byte (ascii) blocks
    					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
    					{
    						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
    						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
    						{
    							result = Traits::low(result, data[0]);
    							result = Traits::low(result, data[1]);
    							result = Traits::low(result, data[2]);
    							result = Traits::low(result, data[3]);
    							data += 4;
    							size -= 4;
    						}
    					}
    				}
    				// 110xxxxx -> U+0080..U+07FF
    				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
    				{
    					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
    					data += 2;
    					size -= 2;
    				}
    				// 1110xxxx -> U+0800-U+FFFF
    				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
    				{
    					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
    					data += 3;
    					size -= 3;
    				}
    				// 11110xxx -> U+10000..U+10FFFF
    				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
    				{
    					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
    					data += 4;
    					size -= 4;
    				}
    				// 10xxxxxx or 11111xxx -> invalid
    				else
    				{
    					data += 1;
    					size -= 1;
    				}
    			}
    
    			return result;
    		}
    
    		static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
    		{
    			const uint16_t* end = data + size;
    
    			while (data < end)
    			{
    				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
    
    				// U+0000..U+D7FF
    				if (lead < 0xD800)
    				{
    					result = Traits::low(result, lead);
    					data += 1;
    				}
    				// U+E000..U+FFFF
    				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
    				{
    					result = Traits::low(result, lead);
    					data += 1;
    				}
    				// surrogate pair lead
    				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && data + 1 < end)
    				{
    					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
    
    					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
    					{
    						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
    						data += 2;
    					}
    					else
    					{
    						data += 1;
    					}
    				}
    				else
    				{
    					data += 1;
    				}
    			}
    
    			return result;
    		}
    
    		static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
    		{
    			const uint32_t* end = data + size;
    
    			while (data < end)
    			{
    				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
    
    				// U+0000..U+FFFF
    				if (lead < 0x10000)
    				{
    					result = Traits::low(result, lead);
    					data += 1;
    				}
    				// U+10000..U+10FFFF
    				else
    				{
    					result = Traits::high(result, lead);
    					data += 1;
    				}
    			}
    
    			return result;
    		}
    
    		static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
    		{
    			for (size_t i = 0; i < size; ++i)
    			{
    				result = Traits::low(result, data[i]);
    			}
    
    			return result;
    		}
    
    		static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
    		{
    			return decode_utf16_block(data, size, result);
    		}
    
    		static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
    		{
    			return decode_utf32_block(data, size, result);
    		}
    
    		static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
    		{
    			return decode_wchar_block_impl(reinterpret_cast<const wchar_selector<sizeof(wchar_t)>::type*>(data), size, result);