Moved UTF8Iterator to its own file

author Ingo Ruhnke <grumbel@gmx.de>

Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)

committer Ingo Ruhnke <grumbel@gmx.de>

Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)
author Ingo Ruhnke <grumbel@gmx.de>
Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)
committer Ingo Ruhnke <grumbel@gmx.de>
Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)
diff --git a/src/util/utf8_iterator.cpp b/src/util/utf8_iterator.cpp

new file mode 100644 (file)

index 0000000..33b061f
--- /dev/null
+++ b/src/util/utf8_iterator.cpp
@@ -0,0 +1,127 @@
+//  SuperTux
+//  Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
+//
+//  This program is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#include "util/utf8_iterator.hpp"
+
+#include <stdexcept>
+
+#include "util/log.hpp"
+
+namespace {
+
+bool     has_multibyte_mark(unsigned char c);
+uint32_t decode_utf8(const std::string& text, size_t& p);
+std::string encode_utf8(uint32_t code);
+
+/**
+ * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 2nd, 3rd or 4th byte of a multibyte utf8 string
+ */
+bool has_multibyte_mark(unsigned char c) {
+  return ((c & 0300) == 0200);
+}
+
+/**
+ * gets unicode character at byte position @a p of UTF-8 encoded @a
+ * text, then advances @a p to the next character.
+ *
+ * @throws std::runtime_error if decoding fails.
+ * See unicode standard section 3.10 table 3-5 and 3-6 for details.
+ */
+uint32_t decode_utf8(const std::string& text, size_t& p)
+{
+  uint32_t c1 = (unsigned char) text[p+0];
+
+  if (has_multibyte_mark(c1)) std::runtime_error("Malformed utf-8 sequence");
+
+  if ((c1 & 0200) == 0000) {
+    // 0xxx.xxxx: 1 byte sequence
+    p+=1;
+    return c1;
+  }
+  else if ((c1 & 0340) == 0300) {
+    // 110x.xxxx: 2 byte sequence
+    if(p+1 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+    p+=2;
+    return (c1 & 0037) << 6 | (c2 & 0077);
+  }
+  else if ((c1 & 0360) == 0340) {
+    // 1110.xxxx: 3 byte sequence
+    if(p+2 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    uint32_t c3 = (unsigned char) text[p+2];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
+    p+=3;
+    return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
+  }
+  else if ((c1 & 0370) == 0360) {
+    // 1111.0xxx: 4 byte sequence
+    if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
+    uint32_t c2 = (unsigned char) text[p+1];
+    uint32_t c3 = (unsigned char) text[p+2];
+    uint32_t c4 = (unsigned char) text[p+4];
+    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
+    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
+    if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 sequence");
+    p+=4;
+    return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 0077);
+  }
+  throw std::runtime_error("Malformed utf-8 sequence");
+}
+
+} // namespace
+
+
+UTF8Iterator::UTF8Iterator(const std::string& text_) :
+  text(text_),
+  pos(0),
+  chr()
+{
+  try {
+    chr = decode_utf8(text, pos);
+  } catch (std::exception) {
+    log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+    chr = 0;
+  }
+}
+
+  bool 
+UTF8Iterator::done() const
+  {
+    return pos > text.size();
+  }
+
+  UTF8Iterator& 
+UTF8Iterator::operator++() {
+    try {
+      chr = decode_utf8(text, pos);
+    } catch (std::exception) {
+      log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
+      chr = 0;
+      ++pos;
+    }
+
+    return *this;
+  }
+
+  uint32_t
+  UTF8Iterator::operator*() const {
+    return chr;
+  }
+
+/* EOF */
diff --git a/src/util/utf8_iterator.hpp b/src/util/utf8_iterator.hpp

new file mode 100644 (file)

index 0000000..79133c8
--- /dev/null
+++ b/src/util/utf8_iterator.hpp
@@ -0,0 +1,40 @@
+//  SuperTux
+//  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
+//                     Ingo Ruhnke <grumbel@gmx.de>
+//
+//  This program is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef HEADER_SUPERTUX_UTIL_UTF8_ITERATOR_HPP
+#define HEADER_SUPERTUX_UTIL_UTF8_ITERATOR_HPP
+
+#include <string>
+#include <stdint.h>
+
+class UTF8Iterator
+{
+public:
+  const std::string&     text;
+  std::string::size_type pos;
+  uint32_t chr;
+
+  UTF8Iterator(const std::string& text_);
+
+  bool done() const;
+  UTF8Iterator& operator++();
+  uint32_t operator*() const;
+};
+
+#endif
+
+/* EOF */
diff --git a/src/video/font.cpp b/src/video/font.cpp

index 4aac58a..86e524d 100644 (file)
--- a/src/video/font.cpp
+++ b/src/video/font.cpp
@@ -23,63 +23,18 @@
  #include <SDL_image.h>
  #include <physfs.h>
  
-#include "physfs/physfs_sdl.hpp"
-
-#include "util/file_system.hpp"
-
  #include "lisp/list_iterator.hpp"
  #include "lisp/parser.hpp"
+#include "physfs/physfs_sdl.hpp"
  #include "supertux/screen.hpp"
+#include "util/file_system.hpp"
  #include "util/log.hpp"
+#include "util/utf8_iterator.hpp"
  #include "video/drawing_context.hpp"
  #include "video/font.hpp"
  #include "video/renderer.hpp"
  
  namespace {
-bool     has_multibyte_mark(unsigned char c);
-uint32_t decode_utf8(const std::string& text, size_t& p);
-std::string encode_utf8(uint32_t code);
-
-struct UTF8Iterator
-{
-  const std::string&     text;
-  std::string::size_type pos;
-  uint32_t chr;
-
-  UTF8Iterator(const std::string& text_) :
-    text(text_),
-    pos(0),
-    chr()
-  {
-    try {
-      chr = decode_utf8(text, pos);
-    } catch (std::exception) {
-      log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
-      chr = 0;
-    }
-  }
-
-  bool done() const
-  {
-    return pos > text.size();
-  }
-
-  UTF8Iterator& operator++() {
-    try {
-      chr = decode_utf8(text, pos);
-    } catch (std::exception) {
-      log_debug << "Malformed utf-8 sequence beginning with " << *((uint32_t*)(text.c_str() + pos)) << " found " << std::endl;
-      chr = 0;
-      ++pos;
-    }
-
-    return *this;
-  }
-
-  uint32_t operator*() const {
-    return chr;
-  }
-};
  
  bool vline_empty(SDL_Surface* surface, int x, int start_y, int end_y, Uint8 threshold)
  {
@@ -95,6 +50,7 @@ bool vline_empty(SDL_Surface* surface, int x, int start_y, int end_y, Uint8 thre
    }
    return true;
  }
+
  } // namespace
  
  Font::Font(GlyphWidth glyph_width_,
@@ -471,66 +427,4 @@ Font::draw_chars(Renderer *renderer, bool notshadow, const std::string& text,
    }
  }
  
-namespace {
-
-/**
- * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the 2nd, 3rd or 4th byte of a multibyte utf8 string
- */
-bool has_multibyte_mark(unsigned char c) {
-  return ((c & 0300) == 0200);
-}
-
-/**
- * gets unicode character at byte position @a p of UTF-8 encoded @a
- * text, then advances @a p to the next character.
- *
- * @throws std::runtime_error if decoding fails.
- * See unicode standard section 3.10 table 3-5 and 3-6 for details.
- */
-uint32_t decode_utf8(const std::string& text, size_t& p)
-{
-  uint32_t c1 = (unsigned char) text[p+0];
-
-  if (has_multibyte_mark(c1)) std::runtime_error("Malformed utf-8 sequence");
-
-  if ((c1 & 0200) == 0000) {
-    // 0xxx.xxxx: 1 byte sequence
-    p+=1;
-    return c1;
-  }
-  else if ((c1 & 0340) == 0300) {
-    // 110x.xxxx: 2 byte sequence
-    if(p+1 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
-    uint32_t c2 = (unsigned char) text[p+1];
-    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
-    p+=2;
-    return (c1 & 0037) << 6 | (c2 & 0077);
-  }
-  else if ((c1 & 0360) == 0340) {
-    // 1110.xxxx: 3 byte sequence
-    if(p+2 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
-    uint32_t c2 = (unsigned char) text[p+1];
-    uint32_t c3 = (unsigned char) text[p+2];
-    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
-    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
-    p+=3;
-    return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
-  }
-  else if ((c1 & 0370) == 0360) {
-    // 1111.0xxx: 4 byte sequence
-    if(p+3 >= text.size()) throw std::range_error("Malformed utf-8 sequence");
-    uint32_t c2 = (unsigned char) text[p+1];
-    uint32_t c3 = (unsigned char) text[p+2];
-    uint32_t c4 = (unsigned char) text[p+4];
-    if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8 sequence");
-    if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8 sequence");
-    if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8 sequence");
-    p+=4;
-    return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 & 0077);
-  }
-  throw std::runtime_error("Malformed utf-8 sequence");
-}
-
-} // namespace
-
  /* EOF */
author	Ingo Ruhnke <grumbel@gmx.de>
	Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)
committer	Ingo Ruhnke <grumbel@gmx.de>
	Fri, 20 Nov 2009 16:06:13 +0000 (16:06 +0000)
src/util/utf8_iterator.cpp	[new file with mode: 0644]	patch \| blob
src/util/utf8_iterator.hpp	[new file with mode: 0644]	patch \| blob
src/video/font.cpp		patch \| blob \| history