utf8: Update to match current version

* utf8/LICENSE, utf8/utf8/cpp11.h, utf8/utf8/cpp17.h: New files.
* Makefile.am: Distribute them.
* utf8/README.md, utf8/utf8/checked.h, utf8/utf8/core.h,
utf8/utf8/unchecked.h: Update to the current version of utfcpp.
* README: Add a link to the upstream github.
This commit is contained in:
Alexandre Duret-Lutz 2022-07-05 10:56:57 +02:00
parent efee1c4130
commit ff89601306
9 changed files with 897 additions and 194 deletions

View file

@ -30,6 +30,23 @@ DEALINGS IN THE SOFTWARE.
#include <iterator>
// Determine the C++ standard version.
// If the user defines UTF_CPP_CPLUSPLUS, use that.
// Otherwise, trust the unreliable predefined macro __cplusplus
#if !defined UTF_CPP_CPLUSPLUS
#define UTF_CPP_CPLUSPLUS __cplusplus
#endif
#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
#define UTF_CPP_OVERRIDE override
#define UTF_CPP_NOEXCEPT noexcept
#else // C++ 98/03
#define UTF_CPP_OVERRIDE
#define UTF_CPP_NOEXCEPT throw()
#endif // C++ 11 or later
namespace utf8
{
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
@ -49,8 +66,8 @@ namespace internal
const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10)
const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
// Maximum valid value for a Unicode code point
const uint32_t CODE_POINT_MAX = 0x0010ffffu;
@ -142,7 +159,7 @@ namespace internal
if (!utf8::internal::is_trail(*it))
return INCOMPLETE_SEQUENCE;
return UTF8_OK;
}
@ -165,7 +182,7 @@ namespace internal
{
if (it == end)
return NOT_ENOUGH_ROOM;
code_point = utf8::internal::mask8(*it);
UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
@ -222,7 +239,7 @@ namespace internal
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
{
if (it == end)
if (it == end)
return NOT_ENOUGH_ROOM;
// Save the original value of it so we can go back in case of failure
@ -237,7 +254,7 @@ namespace internal
// Get trail octets and calculate the code point
utf_error err = UTF8_OK;
switch (length) {
case 0:
case 0:
return INVALID_LEAD;
case 1:
err = utf8::internal::get_sequence_1(it, end, cp);
@ -313,18 +330,7 @@ namespace internal
((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
((it != end) && (utf8::internal::mask8(*it)) == bom[2])
);
}
//Deprecated in release 2.3
template <typename octet_iterator>
inline bool is_bom (octet_iterator it)
{
return (
(utf8::internal::mask8(*it++)) == bom[0] &&
(utf8::internal::mask8(*it++)) == bom[1] &&
(utf8::internal::mask8(*it)) == bom[2]
);
}
}
} // namespace utf8
#endif // header guard