1 // tinygettext - A gettext replacement that works directly on .po files
2 // Copyright (C) 2009 Ingo Ruhnke <grumbel@gmx.de>
4 // This program is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU General Public License
6 // as published by the Free Software Foundation; either version 2
7 // of the License, or (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 #include "po_parser.hpp"
28 #include "language.hpp"
29 #include "log_stream.hpp"
31 #include "dictionary.hpp"
32 #include "plural_forms.hpp"
34 namespace tinygettext {
36 bool POParser::pedantic = true;
39 POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict)
41 POParser parser(filename, in, dict);
45 class POParserError {};
47 POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
51 use_fuzzy(use_fuzzy_),
66 POParser::warning(const std::string& msg)
68 log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
69 //log_warning << "Line: " << current_line << std::endl;
73 POParser::error(const std::string& msg)
75 log_error << filename << ":" << line_number << ": error: " << msg << ": " << current_line << std::endl;
77 // Try to recover from an error by searching for start of another entry
80 while(!eof && !is_empty_line());
82 throw POParserError();
89 if (!std::getline(in, current_line))
94 POParser::get_string_line(std::ostringstream& out,unsigned int skip)
96 if (skip+1 >= static_cast<unsigned int>(current_line.size()))
97 error("unexpected end of line");
99 if (current_line[skip] != '"')
100 error("expected start of string '\"'");
102 std::string::size_type i;
103 for(i = skip+1; current_line[i] != '\"'; ++i)
105 if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
107 out << current_line[i];
111 if (i >= current_line.size())
112 error("invalid big5 encoding");
114 out << current_line[i];
116 else if (i >= current_line.size())
118 error("unexpected end of string");
120 else if (current_line[i] == '\\')
124 if (i >= current_line.size())
125 error("unexpected end of string in handling '\\'");
127 switch (current_line[i])
129 case 'a': out << '\a'; break;
130 case 'b': out << '\b'; break;
131 case 'v': out << '\v'; break;
132 case 'n': out << '\n'; break;
133 case 't': out << '\t'; break;
134 case 'r': out << '\r'; break;
135 case '"': out << '"'; break;
136 case '\\': out << '\\'; break;
138 std::ostringstream err;
139 err << "unhandled escape '\\" << current_line[i] << "'";
142 out << current_line[i-1] << current_line[i];
148 out << current_line[i];
152 // process trailing garbage in line and warn if there is any
153 for(i = i+1; i < current_line.size(); ++i)
154 if (!isspace(current_line[i]))
156 warning("unexpected garbage after string ignoren");
162 POParser::get_string(unsigned int skip)
164 std::ostringstream out;
166 if (skip+1 >= static_cast<unsigned int>(current_line.size()))
167 error("unexpected end of line");
169 if (current_line[skip] == ' ' && current_line[skip+1] == '"')
171 get_string_line(out, skip+1);
176 warning("keyword and string must be seperated by a single space");
180 if (skip >= static_cast<unsigned int>(current_line.size()))
181 error("unexpected end of line");
182 else if (current_line[skip] == '\"')
184 get_string_line(out, skip);
187 else if (!isspace(current_line[skip]))
189 error("string must start with '\"'");
202 for(std::string::size_type i = 0; i < current_line.size(); ++i)
204 if (current_line[i] == '"')
208 warning("leading whitespace before string");
210 get_string_line(out, i);
213 else if (isspace(current_line[i]))
226 static bool has_prefix(const std::string& lhs, const std::string rhs)
228 if (lhs.length() < rhs.length())
231 return lhs.compare(0, rhs.length(), rhs) == 0;
235 POParser::parse_header(const std::string& header)
237 std::string from_charset;
238 std::string::size_type start = 0;
239 for(std::string::size_type i = 0; i < header.length(); ++i)
241 if (header[i] == '\n')
243 std::string line = header.substr(start, i - start);
245 if (has_prefix(line, "Content-Type:"))
247 // from_charset = line.substr(len);
248 unsigned int len = strlen("Content-Type: text/plain; charset=");
249 if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
251 from_charset = line.substr(len);
253 for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
254 *ch = static_cast<char>(toupper(*ch));
258 warning("malformed Content-Type header");
261 else if (has_prefix(line, "Plural-Forms:"))
263 PluralForms plural_forms = PluralForms::from_string(line);
266 warning("unknown Plural-Forms given");
270 if (!dict.get_plural_forms())
272 dict.set_plural_forms(plural_forms);
276 if (dict.get_plural_forms() != plural_forms)
278 warning("Plural-Forms missmatch between .po file and dictionary");
287 if (from_charset.empty() || from_charset == "CHARSET")
289 warning("charset not specified for .po, fallback to utf-8");
290 from_charset = "UTF-8";
292 else if (from_charset == "BIG5")
297 conv.set_charsets(from_charset, dict.get_charset());
301 POParser::is_empty_line()
303 if (current_line.empty())
307 else if (current_line[0] == '#')
308 { // handle comments as empty lines
309 if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])))
316 for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
326 POParser::prefix(const char* prefix_str)
328 return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
336 // skip UTF-8 intro that some text editors produce
337 // see http://en.wikipedia.org/wiki/Byte-order_mark
338 if (current_line.size() >= 3 &&
339 current_line[0] == static_cast<char>(0xef) &&
340 current_line[1] == static_cast<char>(0xbb) &&
341 current_line[2] == static_cast<char>(0xbf))
343 current_line = current_line.substr(3);
352 bool has_msgctxt = false;
358 if (current_line.size() >= 2 && current_line[1] == ',')
360 // FIXME: Rather simplistic hunt for fuzzy flag
361 if (current_line.find("fuzzy", 2) != std::string::npos)
368 if (!is_empty_line())
370 if (prefix("msgctxt"))
373 msgctxt = get_string(7);
377 msgid = get_string(5);
379 error("expected 'msgid'");
381 if (prefix("msgid_plural"))
383 std::string msgid_plural = get_string(12);
384 std::vector<std::string> msgstr_num;
385 bool saw_nonempty_msgstr = false;
390 if (msgstr_num.empty())
391 error("expected 'msgstr[N] (0 <= N <= 9)'");
393 else if (prefix("msgstr[") &&
394 current_line.size() > 8 &&
395 isdigit(current_line[7]) && current_line[8] == ']')
397 unsigned int number = static_cast<unsigned int>(current_line[7] - '0');
398 std::string msgstr = get_string(9);
401 saw_nonempty_msgstr = true;
403 if (number >= msgstr_num.size())
404 msgstr_num.resize(number+1);
406 msgstr_num[number] = conv.convert(msgstr);
411 error("expected 'msgstr[N]'");
414 if (!is_empty_line())
415 error("expected 'msgstr[N]' or empty line");
417 if (saw_nonempty_msgstr)
419 if (use_fuzzy || !fuzzy)
421 if (!dict.get_plural_forms())
423 warning("msgstr[N] seen, but no Plural-Forms given");
427 if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
429 warning("msgstr[N] count doesn't match Plural-Forms.nplural");
434 dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
436 dict.add_translation(msgid, msgid_plural, msgstr_num);
441 std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
442 std::cout << "msgid \"" << msgid << "\"" << std::endl;
443 std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
444 for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
445 std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl;
446 std::cout << std::endl;
450 else if (prefix("msgstr"))
452 std::string msgstr = get_string(6);
456 parse_header(msgstr);
458 else if(!msgstr.empty())
460 if (use_fuzzy || !fuzzy)
463 dict.add_translation(msgctxt, msgid, conv.convert(msgstr));
465 dict.add_translation(msgid, conv.convert(msgstr));
470 std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
471 std::cout << "msgid \"" << msgid << "\"" << std::endl;
472 std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl;
473 std::cout << std::endl;
479 error("expected 'msgstr' or 'msgid_plural'");
483 if (!is_empty_line())
484 error("expected empty line");
488 catch(POParserError&)
494 } // namespace tinygettext