add static import of tinygettext
[supertux.git] / external / tinygettext / tinygettext / po_parser.cpp
diff --git a/external/tinygettext/tinygettext/po_parser.cpp b/external/tinygettext/tinygettext/po_parser.cpp
new file mode 100644 (file)
index 0000000..5ceb3fd
--- /dev/null
@@ -0,0 +1,496 @@
+//  tinygettext - A gettext replacement that works directly on .po files
+//  Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
+//
+//  This program is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU General Public License
+//  as published by the Free Software Foundation; either version 2
+//  of the License, or (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with this program; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+#include "po_parser.hpp"
+
+#include <iostream>
+#include <ctype.h>
+#include <string>
+#include <istream>
+#include <string.h>
+#include <map>
+#include <stdlib.h>
+
+#include "language.hpp"
+#include "log_stream.hpp"
+#include "iconv.hpp"
+#include "dictionary.hpp"
+#include "plural_forms.hpp"
+
+namespace tinygettext {
+
+bool POParser::pedantic = true;
+\f
+void
+POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict)
+{
+  POParser parser(filename, in, dict);
+  parser.parse();
+}
+\f
+class POParserError {};
+
+POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
+  filename(filename_), 
+  in(in_), 
+  dict(dict_), 
+  use_fuzzy(use_fuzzy_),
+  running(false), 
+  eof(false), 
+  big5(false),
+  line_number(0), 
+  current_line(), 
+  conv()
+{
+}
+
+POParser::~POParser()
+{
+}
+
+void
+POParser::warning(const std::string& msg)
+{
+  log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
+  //log_warning << "Line: " << current_line << std::endl;
+}
+
+void
+POParser::error(const std::string& msg)
+{
+  log_error << filename << ":" << line_number << ": error: " << msg  << ": " << current_line << std::endl;
+
+  // Try to recover from an error by searching for start of another entry
+  do
+    next_line();
+  while(!eof && !is_empty_line());
+
+  throw POParserError();
+}
+
+void
+POParser::next_line()
+{
+  line_number += 1;
+  if (!std::getline(in, current_line))
+    eof = true;
+}
+
+void
+POParser::get_string_line(std::ostringstream& out,unsigned int skip)
+{
+  if (skip+1 >= static_cast<unsigned int>(current_line.size()))
+    error("unexpected end of line");
+
+  if (current_line[skip] != '"')
+    error("expected start of string '\"'");
+  
+  std::string::size_type i;
+  for(i = skip+1; current_line[i] != '\"'; ++i)
+  {
+    if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
+    {
+      out << current_line[i];
+
+      i += 1;
+          
+      if (i >= current_line.size())
+        error("invalid big5 encoding");
+          
+      out << current_line[i];
+    }
+    else if (i >= current_line.size())
+    {
+      error("unexpected end of string");
+    }
+    else if (current_line[i] == '\\')
+    {
+      i += 1;
+
+      if (i >= current_line.size())
+        error("unexpected end of string in handling '\\'");
+
+      switch (current_line[i])
+      {
+        case 'a':  out << '\a'; break;
+        case 'b':  out << '\b'; break;
+        case 'v':  out << '\v'; break;
+        case 'n':  out << '\n'; break;
+        case 't':  out << '\t'; break;
+        case 'r':  out << '\r'; break;
+        case '"':  out << '"'; break;
+        case '\\': out << '\\'; break;
+        default: 
+          std::ostringstream err;
+          err << "unhandled escape '\\" << current_line[i] << "'";
+          warning(err.str());
+
+          out << current_line[i-1] << current_line[i];
+          break;
+      }
+    }
+    else
+    {
+      out << current_line[i];
+    }
+  }
+
+  // process trailing garbage in line and warn if there is any
+  for(i = i+1; i < current_line.size(); ++i)
+    if (!isspace(current_line[i]))
+    {
+      warning("unexpected garbage after string ignoren");
+      break;
+    }
+}
+
+std::string
+POParser::get_string(unsigned int skip)
+{
+  std::ostringstream out;
+
+  if (skip+1 >= static_cast<unsigned int>(current_line.size()))
+    error("unexpected end of line");
+
+  if (current_line[skip] == ' ' && current_line[skip+1] == '"')
+  {
+    get_string_line(out, skip+1);
+  }
+  else
+  {
+    if (pedantic)
+      warning("keyword and string must be seperated by a single space");
+
+    for(;;)
+    {
+      if (skip >= static_cast<unsigned int>(current_line.size()))
+        error("unexpected end of line");
+      else if (current_line[skip] == '\"')
+      {
+        get_string_line(out, skip);
+        break;
+      }
+      else if (!isspace(current_line[skip]))
+      {
+        error("string must start with '\"'");
+      }
+      else
+      {
+        // skip space
+      }
+
+      skip += 1;
+    }
+  }
+  
+next:
+  next_line();
+  for(std::string::size_type i = 0; i < current_line.size(); ++i)
+  {
+    if (current_line[i] == '"')
+    {
+      if (i == 1)
+        if (pedantic)
+          warning("leading whitespace before string");
+
+      get_string_line(out, i);
+      goto next;
+    }
+    else if (isspace(current_line[i]))
+    {
+      // skip
+    }
+    else
+    {
+      break;
+    }
+  }
+
+  return out.str();
+}
+
+static bool has_prefix(const std::string& lhs, const std::string rhs)
+{
+  if (lhs.length() < rhs.length())
+    return false;
+  else
+    return lhs.compare(0, rhs.length(), rhs) == 0;
+}
+
+void
+POParser::parse_header(const std::string& header)
+{
+  std::string from_charset;
+  std::string::size_type start = 0;
+  for(std::string::size_type i = 0; i < header.length(); ++i)
+  {
+    if (header[i] == '\n')
+    {
+      std::string line = header.substr(start, i - start);
+
+      if (has_prefix(line, "Content-Type:"))
+      {
+        // from_charset = line.substr(len);
+        unsigned int len = strlen("Content-Type: text/plain; charset=");
+        if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
+        {
+          from_charset = line.substr(len);
+
+          for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
+            *ch = static_cast<char>(toupper(*ch));
+        }
+        else
+        {
+          warning("malformed Content-Type header");
+        }
+      }
+      else if (has_prefix(line, "Plural-Forms:"))
+      {
+        PluralForms plural_forms = PluralForms::from_string(line);
+        if (!plural_forms)
+        {
+          warning("unknown Plural-Forms given");
+        }
+        else
+        {
+          if (!dict.get_plural_forms())
+          {
+            dict.set_plural_forms(plural_forms);
+          }
+          else
+          {
+            if (dict.get_plural_forms() != plural_forms)
+            {
+              warning("Plural-Forms missmatch between .po file and dictionary");
+            }
+          }
+        }
+      }
+      start = i+1;
+    }
+  }
+
+  if (from_charset.empty() || from_charset == "CHARSET")
+  {
+    warning("charset not specified for .po, fallback to utf-8");
+    from_charset = "UTF-8";
+  }
+  else if (from_charset == "BIG5")
+  {
+    big5 = true;
+  }
+
+  conv.set_charsets(from_charset, dict.get_charset());
+}
+
+bool
+POParser::is_empty_line()
+{
+  if (current_line.empty())
+  {
+    return true;
+  }
+  else if (current_line[0] == '#')
+  { // handle comments as empty lines
+    if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])))
+      return true;
+    else
+      return false;
+  }
+  else
+  {
+    for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
+    {
+      if (!isspace(*i))
+        return false;
+    }
+  }
+  return true;
+}
+
+bool
+POParser::prefix(const char* prefix_str)
+{
+  return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
+}
+
+void
+POParser::parse()
+{
+  next_line();
+
+  // skip UTF-8 intro that some text editors produce
+  // see http://en.wikipedia.org/wiki/Byte-order_mark
+  if (current_line.size() >= 3 &&
+      current_line[0] == static_cast<char>(0xef) &&
+      current_line[1] == static_cast<char>(0xbb) &&
+      current_line[2] == static_cast<char>(0xbf))
+  {
+    current_line = current_line.substr(3);
+  }
+
+  // Parser structure
+  while(!eof)
+  {
+    try 
+    {
+      bool fuzzy =  false;
+      bool has_msgctxt = false;
+      std::string msgctxt;
+      std::string msgid;
+
+      while(prefix("#"))
+      {
+        if (current_line.size() >= 2 && current_line[1] == ',')
+        {
+          // FIXME: Rather simplistic hunt for fuzzy flag
+          if (current_line.find("fuzzy", 2) != std::string::npos)
+            fuzzy = true;
+        }
+
+        next_line();
+      }
+
+      if (!is_empty_line())
+      {
+        if (prefix("msgctxt"))
+        {
+          has_msgctxt = true;
+          msgctxt = get_string(7);
+        }
+
+        if (prefix("msgid"))
+          msgid = get_string(5);
+        else
+          error("expected 'msgid'");
+
+        if (prefix("msgid_plural"))
+        {
+          std::string msgid_plural = get_string(12);
+          std::vector<std::string> msgstr_num;
+         bool saw_nonempty_msgstr = false;
+
+        next:
+          if (is_empty_line())
+          {
+            if (msgstr_num.empty())
+              error("expected 'msgstr[N] (0 <= N <= 9)'");
+          }
+          else if (prefix("msgstr[") &&
+                   current_line.size() > 8 && 
+                   isdigit(current_line[7]) && current_line[8] == ']')
+          {
+            unsigned int number = static_cast<unsigned int>(current_line[7] - '0');
+           std::string msgstr = get_string(9);
+
+           if(!msgstr.empty())
+             saw_nonempty_msgstr = true;
+
+            if (number >= msgstr_num.size())
+              msgstr_num.resize(number+1);
+
+            msgstr_num[number] = conv.convert(msgstr);
+            goto next;
+          }
+          else 
+          {
+            error("expected 'msgstr[N]'");
+          }
+
+          if (!is_empty_line())
+            error("expected 'msgstr[N]' or empty line");
+
+         if (saw_nonempty_msgstr)
+         {
+           if (use_fuzzy || !fuzzy)
+            {
+             if (!dict.get_plural_forms())
+             {
+               warning("msgstr[N] seen, but no Plural-Forms given");
+             }
+             else
+             {
+               if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
+               {
+                 warning("msgstr[N] count doesn't match Plural-Forms.nplural");
+               }
+             }
+
+             if (has_msgctxt)
+               dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
+             else
+               dict.add_translation(msgid, msgid_plural, msgstr_num);
+           }
+
+           if (0)
+           {
+             std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
+             std::cout << "msgid \"" << msgid << "\"" << std::endl;
+             std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
+             for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
+               std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl;
+             std::cout << std::endl;
+           }
+         }
+        }
+        else if (prefix("msgstr"))
+        {
+          std::string msgstr = get_string(6);
+
+          if (msgid.empty())
+          {
+            parse_header(msgstr);
+          }
+          else if(!msgstr.empty())
+          {
+            if (use_fuzzy || !fuzzy)
+            {
+              if (has_msgctxt)
+                dict.add_translation(msgctxt, msgid, conv.convert(msgstr));
+              else
+                dict.add_translation(msgid, conv.convert(msgstr));
+            }
+
+            if (0)
+            {
+              std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
+              std::cout << "msgid \"" << msgid << "\"" << std::endl;
+              std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl;
+              std::cout << std::endl;
+            }
+          }
+        }
+        else
+        {
+          error("expected 'msgstr' or 'msgid_plural'");
+        }
+      }
+      
+      if (!is_empty_line())
+        error("expected empty line");
+
+      next_line();
+    }
+    catch(POParserError&)
+    {          
+    }
+  }
+}
+
+} // namespace tinygettext
+
+/* EOF */