bool has_multibyte_mark(unsigned char c);
uint32_t decode_utf8(const std::string& text, size_t& p);
-std::string encode_utf8(uint32_t code);
/**
* returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 2nd, 3rd or 4th byte of a multibyte utf8 string
try {
chr = decode_utf8(text, pos);
} catch (std::exception) {
- log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+ log_debug << "Malformed utf-8 sequence beginning with " << *(reinterpret_cast<const uint32_t*>(text.c_str() + pos)) << " found " << std::endl;
chr = 0;
}
}
- bool
+ bool
UTF8Iterator::done() const
{
return pos > text.size();
}
- UTF8Iterator&
+ UTF8Iterator&
UTF8Iterator::operator++() {
try {
chr = decode_utf8(text, pos);
} catch (std::exception) {
- log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+ log_debug << "Malformed utf-8 sequence beginning with " << *(reinterpret_cast<const uint32_t*>(text.c_str() + pos)) << " found " << std::endl;
chr = 0;
++pos;
}