- if ((c1 & 0200) == 0000) {
- // 0xxx.xxxx: 1 byte sequence
- p+=1;
- return c1;
- }
- else if ((c1 & 0340) == 0300) {
- // 110x.xxxx: 2 byte sequence
- if(p+1 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
- p+=2;
- return (c1 & 0037) << 6 | (c2 & 0077);
- }
- else if ((c1 & 0360) == 0340) {
- // 1110.xxxx: 3 byte sequence
- if(p+2 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- uint32_t c3 = (unsigned char) text[p+2];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
- if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
- p+=3;
- return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
- }
- else if ((c1 & 0370) == 0360) {
- // 1111.0xxx: 4 byte sequence
- if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- uint32_t c3 = (unsigned char) text[p+2];
- uint32_t c4 = (unsigned char) text[p+4];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
- if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
- if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 sequence");
- p+=4;
- return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 0077);