diff --git a/dep/include/utf8cpp/doc/ReleaseNotes b/dep/include/utf8cpp/doc/ReleaseNotes
index 8541c7a60..857a72b75 100644
--- a/dep/include/utf8cpp/doc/ReleaseNotes
+++ b/dep/include/utf8cpp/doc/ReleaseNotes
@@ -1,9 +1,9 @@
utf8 cpp library
-Release 2.1
+Release 2.2.4
-This is a minor feature release - added the function peek_next.
+This is a minor bug fix release that improves converting from utf-16 to utf-8 error detection.
-Changes from version 2.o
-- Implemented feature request [ 1770746 ] "Provide a const version of next() (some sort of a peek() )
+Changes from version 2.2.3
+- Bug fix [2857454] dereference invalid iterator when lead surrogate was last element of the string.
Files included in the release: utf8.h, core.h, checked.h, unchecked.h, utf8cpp.html, ReleaseNotes
diff --git a/dep/include/utf8cpp/doc/utf8cpp.html b/dep/include/utf8cpp/doc/utf8cpp.html
index 4ad7e1002..069c2be52 100644
--- a/dep/include/utf8cpp/doc/utf8cpp.html
+++ b/dep/include/utf8cpp/doc/utf8cpp.html
@@ -57,6 +57,16 @@
Reference
@@ -91,14 +101,14 @@
Many C++ developers miss an easy and portable way of handling Unicode encoded
- strings. C++ Standard is currently Unicode agnostic, and while some work is being
- done to introduce Unicode to the next incarnation called C++0x, for the moment
- nothing of the sort is available. In the meantime, developers use 3rd party
- libraries like ICU, OS specific capabilities, or simply roll out their own
- solutions.
+ strings. The original C++ Standard (known as C++98 or C++03) is Unicode agnostic,
+ and while some work is being done to introduce Unicode to the next incarnation
+ called C++0x, for the moment nothing of the sort is available. In the meantime,
+ developers use third party libraries like ICU, OS specific capabilities, or simply
+ roll out their own solutions.
- In order to easily handle UTF-8 encoded Unicode strings, I have come up with a small
+ In order to easily handle UTF-8 encoded Unicode strings, I came up with a small
generic library. For anybody used to work with STL algorithms and iterators, it should be
easy and natural to use. The code is freely available for any purpose - check out
the license at the beginning of the utf8.h file. If you run into
@@ -115,11 +125,13 @@
Examples of use
+
+ Introductionary Sample
+
- To illustrate the use of this utf8 library, we shall open a file containing UTF-8
- encoded text, check whether it starts with a byte order mark, read each line into a
- std::string, check it for validity, convert the text to UTF-16, and
- back to UTF-8:
+ To illustrate the use of the library, let's start with a small but complete program
+ that opens a file containing UTF-8 encoded text, reads it line by line, checks each line
+ for invalid UTF-8 byte sequences, and converts it to UTF-16 encoding and back to UTF-8:
#include <fstream>
@@ -128,33 +140,26 @@
#include <vector>
#include "utf8.h"
using namespace std;
-int main()
+int main(int argc, char** argv)
{
if (argc != 2) {
cout << "\nUsage: docsample filename\n";
return 0;
}
+
const char* test_file_path = argv[1];
-
+
ifstream fs8(test_file_path);
if (!fs8.is_open()) {
cout << "Could not open " << test_file_path << endl;
return 0;
}
-
+
unsigned line_count = 1;
string line;
- if (!getline(fs8, line))
- return 0;
-
- if (line.size() > 2) {
- if (utf8::is_bom(line.c_str()))
- cout << "There is a byte order mark at the beginning of the file\n";
- }
- do {
+ while (getline(fs8, line)) {
string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
if (end_it != line.end()) {
@@ -165,38 +170,88 @@
"literal">"This part is fine: " << string(line.begin(), end_it) << "\n";
}
+
int length = utf8::distance(line.begin(), end_it);
cout << "Length of line " << line_count << " is " << length << "\n";
+
vector<unsigned short> utf16line;
utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line));
+
string utf8line;
utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line));
+
if (utf8line != string(line.begin(), end_it))
cout << "Error in UTF-16 conversion at line: " << line_count << "\n";
- getline(fs8, line);
+
line_count++;
- } while (!fs8.eof());
+ }
return 0;
}
- In the previous code sample, we have seen the use of the following functions from
- utf8 namespace: first we used is_bom function to detect
- UTF-8 byte order mark at the beginning of the file; then for each line we performed
+ In the previous code sample, for each line we performed
a detection of invalid UTF-8 sequences with find_invalid; the number
- of characters (more precisely - the number of Unicode code points) in each line was
+ of characters (more precisely - the number of Unicode code points, including the end
+ of line and even BOM if there is one) in each line was
determined with a use of utf8::distance; finally, we have converted
each line to UTF-16 encoding with utf8to16 and back to UTF-8 with
utf16to8.
+ Checking if a file contains valid UTF-8 text
+
+Here is a function that checks whether the content of a file is valid UTF-8 encoded text without
+reading the content into the memory:
+
+
+bool valid_utf8_file(iconst char* file_name)
+{
+ ifstream ifs(file_name);
+ if (!ifs)
+ return false;
+
+ istreambuf_iterator<char> it(ifs.rdbuf());
+ istreambuf_iterator<char> eos;
+
+ return utf8::is_valid(it, eos);
+}
+
+
+Because the function utf8::is_valid() works with input iterators, we were able
+to pass an istreambuf_iterator to it and read the content of the file directly
+without loading it to the memory first.
+
+Note that other functions that take input iterator arguments can be used in a similar way. For
+instance, to read the content of a UTF-8 encoded text file and convert the text to UTF-16, just
+do something like:
+
+
+ utf8::utf8to16(it, eos, back_inserter(u16string));
+
+ Ensure that a string contains valid UTF-8 text
+
+If we have some text that "probably" contains UTF-8 encoded text and we want to
+replace any invalid UTF-8 sequence with a replacement character, something like
+the following function may be used:
+
+
+void fix_utf8_string(std::string& str)
+{
+ std::string temp;
+ utf8::replace_invalid(str.begin(), str.end(), back_inserter(temp));
+ str = temp;
+}
+
+The function will replace any invalid UTF-8 sequence with a Unicode replacement character.
+There is an overloaded function that enables the caller to supply their own replacement character.
+
Reference
diff --git a/dep/include/utf8cpp/utf8/checked.h b/dep/include/utf8cpp/utf8/checked.h
index 8d61a34bf..ff75eb789 100644
--- a/dep/include/utf8cpp/utf8/checked.h
+++ b/dep/include/utf8cpp/utf8/checked.h
@@ -64,7 +64,7 @@ namespace utf8
};
/// The library API - functions intended to be called by the users
-
+
template
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
{
@@ -72,7 +72,7 @@ namespace utf8
octet_iterator sequence_start = start;
internal::utf_error err_code = internal::validate_next(start, end);
switch (err_code) {
- case internal::OK :
+ case internal::UTF8_OK :
for (octet_iterator it = sequence_start; it != start; ++it)
*out++ = *it;
break;
@@ -92,7 +92,7 @@ namespace utf8
++start;
break;
}
- }
+ }
return out;
}
@@ -106,11 +106,11 @@ namespace utf8
template
octet_iterator append(uint32_t cp, octet_iterator result)
{
- if (!internal::is_code_point_valid(cp))
+ if (!internal::is_code_point_valid(cp))
throw invalid_code_point(cp);
if (cp < 0x80) // one octet
- *(result++) = static_cast(cp);
+ *(result++) = static_cast(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast((cp >> 6) | 0xc0);
*(result++) = static_cast((cp & 0x3f) | 0x80);
@@ -120,15 +120,12 @@ namespace utf8
*(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast((cp & 0x3f) | 0x80);
}
- else if (cp <= internal::CODE_POINT_MAX) { // four octets
+ else { // four octets
*(result++) = static_cast((cp >> 18) | 0xf0);
- *(result++) = static_cast(((cp >> 12)& 0x3f) | 0x80);
+ *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast((cp & 0x3f) | 0x80);
}
- else
- throw invalid_code_point(cp);
-
return result;
}
@@ -138,7 +135,7 @@ namespace utf8
uint32_t cp = 0;
internal::utf_error err_code = internal::validate_next(it, end, &cp);
switch (err_code) {
- case internal::OK :
+ case internal::UTF8_OK :
break;
case internal::NOT_ENOUGH_ROOM :
throw not_enough_room();
@@ -149,7 +146,7 @@ namespace utf8
case internal::INVALID_CODE_POINT :
throw invalid_code_point(cp);
}
- return cp;
+ return cp;
}
template
@@ -162,7 +159,7 @@ namespace utf8
uint32_t prior(octet_iterator& it, octet_iterator start)
{
octet_iterator end = it;
- while (internal::is_trail(*(--it)))
+ while (internal::is_trail(*(--it)))
if (it < start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
@@ -174,7 +171,7 @@ namespace utf8
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
{
octet_iterator end = it;
- while (internal::is_trail(*(--it)))
+ while (internal::is_trail(*(--it)))
if (it == pass_start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
@@ -193,32 +190,36 @@ namespace utf8
distance (octet_iterator first, octet_iterator last)
{
typename std::iterator_traits::difference_type dist;
- for (dist = 0; first < last; ++dist)
+ for (dist = 0; first < last; ++dist)
next(first, last);
return dist;
}
template
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
- {
+ {
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
- if (internal::is_surrogate(cp)) {
+ if (internal::is_lead_surrogate(cp)) {
if (start != end) {
uint32_t trail_surrogate = internal::mask16(*start++);
- if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX)
- cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
- else
+ if (internal::is_trail_surrogate(trail_surrogate))
+ cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+ else
throw invalid_utf16(static_cast(trail_surrogate));
}
- else
- throw invalid_utf16(static_cast(*start));
-
+ else
+ throw invalid_utf16(static_cast(cp));
+
}
+ // Lone trail surrogate
+ else if (internal::is_trail_surrogate(cp))
+ throw invalid_utf16(static_cast(cp));
+
result = append(cp, result);
}
- return result;
+ return result;
}
template
@@ -256,13 +257,13 @@ namespace utf8
// The iterator class
template
- class iterator : public std::iterator {
+ class iterator : public std::iterator {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
iterator () {};
- explicit iterator (const octet_iterator& octet_it,
+ explicit iterator (const octet_iterator& octet_it,
const octet_iterator& range_start,
const octet_iterator& range_end) :
it(octet_it), range_start(range_start), range_end(range_end)
@@ -277,8 +278,8 @@ namespace utf8
octet_iterator temp = it;
return next(temp, range_end);
}
- bool operator == (const iterator& rhs) const
- {
+ bool operator == (const iterator& rhs) const
+ {
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
@@ -287,7 +288,7 @@ namespace utf8
{
return !(operator == (rhs));
}
- iterator& operator ++ ()
+ iterator& operator ++ ()
{
next(it, range_end);
return *this;
@@ -297,7 +298,7 @@ namespace utf8
iterator temp = *this;
next(it, range_end);
return temp;
- }
+ }
iterator& operator -- ()
{
prior(it, range_start);
diff --git a/dep/include/utf8cpp/utf8/core.h b/dep/include/utf8cpp/utf8/core.h
index 538d56bd4..5a55f0655 100644
--- a/dep/include/utf8cpp/utf8/core.h
+++ b/dep/include/utf8cpp/utf8/core.h
@@ -30,27 +30,18 @@ DEALINGS IN THE SOFTWARE.
#include
-// use MaNGOS core types
-#include "Platform/Define.h"
-
namespace utf8
{
// The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
- // You may need to change them to match your system.
+ // You may need to change them to match your system.
// These typedefs have the same names as ones from cstdint, or boost/cstdint
-
- /* use MaNGOS alternatives
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
- */
- typedef uint8 uint8_t;
- typedef uint16 uint16_t;
- typedef uint32 uint32_t;
// Helper code - not intended to be directly called by the library users. May be changed at any time
namespace internal
-{
+{
// Unicode constants
// Leading (high) surrogates: 0xd800 - 0xdbff
// Trailing (low) surrogates: 0xdc00 - 0xdfff
@@ -80,6 +71,18 @@ namespace internal
return ((mask8(oc) >> 6) == 0x2);
}
+ template
+ inline bool is_lead_surrogate(u16 cp)
+ {
+ return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+ }
+
+ template
+ inline bool is_trail_surrogate(u16 cp)
+ {
+ return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+ }
+
template
inline bool is_surrogate(u16 cp)
{
@@ -90,14 +93,14 @@ namespace internal
inline bool is_code_point_valid(u32 cp)
{
return (cp <= CODE_POINT_MAX && !is_surrogate(cp) && cp != 0xfffe && cp != 0xffff);
- }
+ }
template
inline typename std::iterator_traits::difference_type
sequence_length(octet_iterator lead_it)
{
uint8_t lead = mask8(*lead_it);
- if (lead < 0x80)
+ if (lead < 0x80)
return 1;
else if ((lead >> 5) == 0x6)
return 2;
@@ -105,121 +108,196 @@ namespace internal
return 3;
else if ((lead >> 3) == 0x1e)
return 4;
- else
+ else
return 0;
}
- enum utf_error {OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+ inline bool is_overlong_sequence(uint32_t cp, int length)
+ {
+ if (cp < 0x80) {
+ if (length != 1)
+ return true;
+ }
+ else if (cp < 0x800) {
+ if (length != 2)
+ return true;
+ }
+ else if (cp < 0x10000) {
+ if (length != 3)
+ return true;
+ }
+
+ return false;
+ }
+
+ enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+ /// get_sequence_x functions decode utf-8 sequences of the length x
+
+ template
+ utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+ {
+ if (it != end) {
+ if (code_point)
+ *code_point = mask8(*it);
+ return UTF8_OK;
+ }
+ return NOT_ENOUGH_ROOM;
+ }
+
+ template
+ utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+ {
+ utf_error ret_code = NOT_ENOUGH_ROOM;
+
+ if (it != end) {
+ uint32_t cp = mask8(*it);
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+
+ if (code_point)
+ *code_point = cp;
+ ret_code = UTF8_OK;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+
+ return ret_code;
+ }
+
+ template
+ utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+ {
+ utf_error ret_code = NOT_ENOUGH_ROOM;
+
+ if (it != end) {
+ uint32_t cp = mask8(*it);
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff);
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp += (*it) & 0x3f;
+
+ if (code_point)
+ *code_point = cp;
+ ret_code = UTF8_OK;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+
+ return ret_code;
+ }
+
+ template
+ utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+ {
+ utf_error ret_code = NOT_ENOUGH_ROOM;
+
+ if (it != end) {
+ uint32_t cp = mask8(*it);
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff);
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp += (mask8(*it) << 6) & 0xfff;
+ if (++it != end) {
+ if (is_trail(*it)) {
+ cp += (*it) & 0x3f;
+
+ if (code_point)
+ *code_point = cp;
+ ret_code = UTF8_OK;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+ else
+ ret_code = INCOMPLETE_SEQUENCE;
+ }
+ else
+ ret_code = NOT_ENOUGH_ROOM;
+ }
+
+ return ret_code;
+ }
template
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
- uint32_t cp = mask8(*it);
- // Check the lead octet
+ // Save the original value of it so we can go back in case of failure
+ // Of course, it does not make much sense with i.e. stream iterators
+ octet_iterator original_it = it;
+
+ uint32_t cp = 0;
+ // Determine the sequence length based on the lead octet
typedef typename std::iterator_traits::difference_type octet_difference_type;
octet_difference_type length = sequence_length(it);
+ if (length == 0)
+ return INVALID_LEAD;
- // "Shortcut" for ASCII characters
- if (length == 1) {
- if (end - it > 0) {
- if (code_point)
- *code_point = cp;
- ++it;
- return OK;
- }
- else
- return NOT_ENOUGH_ROOM;
- }
-
- // Do we have enough memory?
- if (std::distance(it, end) < length)
- return NOT_ENOUGH_ROOM;
-
- // Check trail octets and calculate the code point
+ // Now that we have a valid sequence length, get trail octets and calculate the code point
+ utf_error err = UTF8_OK;
switch (length) {
- case 0:
- return INVALID_LEAD;
+ case 1:
+ err = get_sequence_1(it, end, &cp);
break;
case 2:
- if (is_trail(*(++it))) {
- cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
- }
- else {
- --it;
- return INCOMPLETE_SEQUENCE;
- }
+ err = get_sequence_2(it, end, &cp);
break;
case 3:
- if (is_trail(*(++it))) {
- cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff);
- if (is_trail(*(++it))) {
- cp += (*it) & 0x3f;
- }
- else {
- std::advance(it, -2);
- return INCOMPLETE_SEQUENCE;
- }
- }
- else {
- --it;
- return INCOMPLETE_SEQUENCE;
- }
+ err = get_sequence_3(it, end, &cp);
break;
case 4:
- if (is_trail(*(++it))) {
- cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff);
- if (is_trail(*(++it))) {
- cp += (mask8(*it) << 6) & 0xfff;
- if (is_trail(*(++it))) {
- cp += (*it) & 0x3f;
- }
- else {
- std::advance(it, -3);
- return INCOMPLETE_SEQUENCE;
- }
- }
- else {
- std::advance(it, -2);
- return INCOMPLETE_SEQUENCE;
- }
- }
- else {
- --it;
- return INCOMPLETE_SEQUENCE;
- }
+ err = get_sequence_4(it, end, &cp);
break;
}
- // Is the code point valid?
- if (!is_code_point_valid(cp)) {
- for (octet_difference_type i = 0; i < length - 1; ++i)
- --it;
- return INVALID_CODE_POINT;
- }
-
- if (code_point)
- *code_point = cp;
-
- if (cp < 0x80) {
- if (length != 1) {
- std::advance(it, -(length-1));
- return OVERLONG_SEQUENCE;
+
+ if (err == UTF8_OK) {
+ // Decoding succeeded. Now, security checks...
+ if (is_code_point_valid(cp)) {
+ if (!is_overlong_sequence(cp, length)){
+ // Passed! Return here.
+ if (code_point)
+ *code_point = cp;
+ ++it;
+ return UTF8_OK;
+ }
+ else
+ err = OVERLONG_SEQUENCE;
}
+ else
+ err = INVALID_CODE_POINT;
}
- else if (cp < 0x800) {
- if (length != 2) {
- std::advance(it, -(length-1));
- return OVERLONG_SEQUENCE;
- }
- }
- else if (cp < 0x10000) {
- if (length != 3) {
- std::advance(it, -(length-1));
- return OVERLONG_SEQUENCE;
- }
- }
-
- ++it;
- return OK;
+
+ // Failure branch - restore the original value of the iterator
+ it = original_it;
+ return err;
}
template
@@ -227,12 +305,12 @@ namespace internal
return validate_next(it, end, 0);
}
-} // namespace internal
+} // namespace internal
/// The library API - functions intended to be called by the users
// Byte order mark
- const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+ const uint8_t bom[] = {0xef, 0xbb, 0xbf};
template
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
@@ -240,7 +318,7 @@ namespace internal
octet_iterator result = start;
while (result != end) {
internal::utf_error err_code = internal::validate_next(result, end);
- if (err_code != internal::OK)
+ if (err_code != internal::UTF8_OK)
return result;
}
return result;
diff --git a/dep/include/utf8cpp/utf8/unchecked.h b/dep/include/utf8cpp/utf8/unchecked.h
index 4009cebe6..d3110cb88 100644
--- a/dep/include/utf8cpp/utf8/unchecked.h
+++ b/dep/include/utf8cpp/utf8/unchecked.h
@@ -45,13 +45,13 @@ namespace utf8
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast((cp >> 12) | 0xe0);
- *(result++) = static_cast((cp >> 6) & 0x3f | 0x80);
+ *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast((cp >> 18) | 0xf0);
- *(result++) = static_cast((cp >> 12)& 0x3f | 0x80);
- *(result++) = static_cast((cp >> 6) & 0x3f | 0x80);
+ *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80);
+ *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast((cp & 0x3f) | 0x80);
}
return result;
@@ -132,7 +132,7 @@ namespace utf8
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
- if (internal::is_surrogate(cp)) {
+ if (internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}
diff --git a/src/shared/revision_nr.h b/src/shared/revision_nr.h
index e7d66b70b..4c5bc67eb 100644
--- a/src/shared/revision_nr.h
+++ b/src/shared/revision_nr.h
@@ -1,4 +1,4 @@
#ifndef __REVISION_NR_H__
#define __REVISION_NR_H__
- #define REVISION_NR "9230"
+ #define REVISION_NR "9231"
#endif // __REVISION_NR_H__