utf8: Update to match current version
* utf8/LICENSE, utf8/utf8/cpp11.h, utf8/utf8/cpp17.h: New files. * Makefile.am: Distribute them. * utf8/README.md, utf8/utf8/checked.h, utf8/utf8/core.h, utf8/utf8/unchecked.h: Update to the current version of utfcpp. * README: Add a link to the upstream github.
This commit is contained in:
parent
efee1c4130
commit
ff89601306
9 changed files with 897 additions and 194 deletions
|
|
@ -30,6 +30,23 @@ DEALINGS IN THE SOFTWARE.
|
|||
|
||||
#include <iterator>
|
||||
|
||||
// Determine the C++ standard version.
|
||||
// If the user defines UTF_CPP_CPLUSPLUS, use that.
|
||||
// Otherwise, trust the unreliable predefined macro __cplusplus
|
||||
|
||||
#if !defined UTF_CPP_CPLUSPLUS
|
||||
#define UTF_CPP_CPLUSPLUS __cplusplus
|
||||
#endif
|
||||
|
||||
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
|
||||
#define UTF_CPP_OVERRIDE override
|
||||
#define UTF_CPP_NOEXCEPT noexcept
|
||||
#else // C++ 98/03
|
||||
#define UTF_CPP_OVERRIDE
|
||||
#define UTF_CPP_NOEXCEPT throw()
|
||||
#endif // C++ 11 or later
|
||||
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
|
||||
|
|
@ -49,8 +66,8 @@ namespace internal
|
|||
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
|
||||
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
|
||||
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
|
||||
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
|
||||
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
|
||||
const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
|
||||
const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
|
||||
|
||||
// Maximum valid value for a Unicode code point
|
||||
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
|
||||
|
|
@ -142,7 +159,7 @@ namespace internal
|
|||
|
||||
if (!utf8::internal::is_trail(*it))
|
||||
return INCOMPLETE_SEQUENCE;
|
||||
|
||||
|
||||
return UTF8_OK;
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +182,7 @@ namespace internal
|
|||
{
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
|
||||
code_point = utf8::internal::mask8(*it);
|
||||
|
||||
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
|
||||
|
|
@ -222,7 +239,7 @@ namespace internal
|
|||
template <typename octet_iterator>
|
||||
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
|
||||
{
|
||||
if (it == end)
|
||||
if (it == end)
|
||||
return NOT_ENOUGH_ROOM;
|
||||
|
||||
// Save the original value of it so we can go back in case of failure
|
||||
|
|
@ -237,7 +254,7 @@ namespace internal
|
|||
// Get trail octets and calculate the code point
|
||||
utf_error err = UTF8_OK;
|
||||
switch (length) {
|
||||
case 0:
|
||||
case 0:
|
||||
return INVALID_LEAD;
|
||||
case 1:
|
||||
err = utf8::internal::get_sequence_1(it, end, cp);
|
||||
|
|
@ -313,18 +330,7 @@ namespace internal
|
|||
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
|
||||
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
|
||||
);
|
||||
}
|
||||
|
||||
//Deprecated in release 2.3
|
||||
template <typename octet_iterator>
|
||||
inline bool is_bom (octet_iterator it)
|
||||
{
|
||||
return (
|
||||
(utf8::internal::mask8(*it++)) == bom[0] &&
|
||||
(utf8::internal::mask8(*it++)) == bom[1] &&
|
||||
(utf8::internal::mask8(*it)) == bom[2]
|
||||
);
|
||||
}
|
||||
}
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue