From 4af0549b2848fbbd4432d1549b0546843f45369d Mon Sep 17 00:00:00 2001 From: Celtic Minstrel Date: Fri, 10 Jun 2016 20:34:36 -0400 Subject: [PATCH] Add new translation library (from cbeck88/spirit-po@0fea1743262f8f5095b0e5db4bb5505e5c4656ee ) --- src/spirit_po.hpp | 10 + src/spirit_po/catalog.hpp | 417 +++++++++++++ src/spirit_po/catalog_metadata.hpp | 151 +++++ .../default_plural_forms_compiler.hpp | 117 ++++ .../default_plural_forms_expressions.hpp | 574 ++++++++++++++++++ src/spirit_po/exceptions.hpp | 76 +++ src/spirit_po/po_grammar.hpp | 125 ++++ src/spirit_po/po_message.hpp | 59 ++ src/spirit_po/po_message_adapted.hpp | 23 + src/spirit_po/version.hpp | 10 + 10 files changed, 1562 insertions(+) create mode 100644 src/spirit_po.hpp create mode 100644 src/spirit_po/catalog.hpp create mode 100644 src/spirit_po/catalog_metadata.hpp create mode 100644 src/spirit_po/default_plural_forms_compiler.hpp create mode 100644 src/spirit_po/default_plural_forms_expressions.hpp create mode 100644 src/spirit_po/exceptions.hpp create mode 100644 src/spirit_po/po_grammar.hpp create mode 100644 src/spirit_po/po_message.hpp create mode 100644 src/spirit_po/po_message_adapted.hpp create mode 100644 src/spirit_po/version.hpp diff --git a/src/spirit_po.hpp b/src/spirit_po.hpp new file mode 100644 index 000000000000..165708f88970 --- /dev/null +++ b/src/spirit_po.hpp @@ -0,0 +1,10 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include + diff --git a/src/spirit_po/catalog.hpp b/src/spirit_po/catalog.hpp new file mode 100644 index 000000000000..c5471bcc90e7 --- /dev/null +++ b/src/spirit_po/catalog.hpp @@ -0,0 +1,417 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef BOOST_SPIRIT_USE_PHOENIX_V3 +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace spirit_po { + +namespace spirit = boost::spirit; +namespace qi = spirit::qi; +typedef unsigned int uint; + +typedef std::function warning_channel_type; +typedef std::unordered_map default_hashmap_type; + +template +class catalog { + catalog_metadata metadata_; + + typename pf_compiler::result_type pf_function_object_; + uint singular_index_; // cached result of pf_function_object(1) + +#ifdef SPIRIT_PO_NOEXCEPT + boost::optional error_message_; + // if loading failed, error_message_ contains an error + // (rather than throwing an exception) +#endif + warning_channel_type warning_channel_; + + hashmap_type hashmap_; + +public: + static constexpr char EOT = static_cast(4); + // ASCII 4 is EOT character + // Used to separate msg context from msgid in the hashmap, in MO files + // We use the same formatting system, just for consistency. + // c.f. https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + + static std::string form_context_index(const std::string & msgctxt, const std::string & id) { + return msgctxt + EOT + id; + } + + static std::string form_index(const po_message & msg) { + return msg.context ? form_context_index(*msg.context, msg.id) : msg.id; + } + +private: + /*** + * Helper for interacting with hashmap results + * get(msg) gets the *singular* string from the message. if it's a plural message, look in singular_index_. + * if it's not a plural message, then there is only one string. also, the po header is never a plural message + */ + const std::string & get(const po_message & msg) const { + if (msg.strings().size() == 1) { return msg.strings()[0]; } + return msg.strings()[singular_index_]; + } + + const std::string & get(const po_message & msg, uint plural) const { + uint idx = (plural == 1 ? singular_index_ : pf_function_object_(plural)); + return msg.strings()[idx]; + } + + /*** + * Emplace a message into the hashmap + */ + void insert_message(po_message && msg) { + if (!msg.strings().size()) { return; } + // don't allow messages with ZERO translations into the catalog, this will cause segfaults later. + // should perhaps throw an exception here + + if (!msg.strings()[0].size()) { return; } + // if the (first) translated string is "", it is untranslated and message does not enter catalog + + if (msg.strings().size() > 1 && msg.strings().size() != metadata_.num_plural_forms) { + if (warning_channel_) { + warning_channel_("Ignoring a message with an incorrect number of plural forms: plural = " + std::to_string(msg.strings().size()) + " msgid = '" + msg.id + "'"); + } + return; + } + + std::string index = form_index(msg); + // adjust the id based on context if necessary + + auto result = hashmap_.emplace(std::move(index), std::move(msg)); + + // Issue a warning if emplace failed, rather than silently overwrite. + if (!result.second) { + if (warning_channel_) { + std::string warning = "Overwriting a message: msgid = <<<" + msg.id + ">>>"; + if (msg.context) { warning += " msgctxt = <<<" + *msg.context + ">>>"; } + warning_channel_(warning); + } + result.first->second = std::move(msg); + } + } + +public: +#ifdef SPIRIT_PO_NOEXCEPT + /*** + * Error checking (this is done so we don't have to throw exceptions from the ctor. + */ + explicit operator bool() const { + return !error_message_; + } + + std::string error() const { + return *error_message_; // UB if there there is not an error message + } +#endif + + /*** + * Ctors + */ + template + catalog(spirit::line_pos_iterator & it, spirit::line_pos_iterator & end, warning_channel_type warn_channel = warning_channel_type(), pf_compiler compiler = pf_compiler()) + : metadata_() + , pf_function_object_() + , warning_channel_(warn_channel) + , hashmap_() + { + typedef spirit::line_pos_iterator iterator_type; + po_grammar grammar; + + po_message msg; + std::size_t line_no = 0; + + // Parse header first + { + // must be able to parse first message + qi::parse(it, end, grammar.skipped_block); // first parse any comments + if (!qi::parse(it, end, grammar, msg)) { // now parse the main grammar target + int err_line = it.position(); + SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, stopped at line " + std::to_string(err_line) + ": " + iterator_context(it, end)); + } + + // first message must have empty MSGID (po format says so) + if (msg.id.size()) { + SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, first msgid must be empty string \"\", found: " + msg.id); + } + + // Now parse the header string itself + if (msg.strings().size()) { + std::string maybe_error = metadata_.parse_header(msg.strings()[0]); + if (maybe_error.size()) { + SPIRIT_PO_CATALOG_FAIL("Failed to parse po header: " + maybe_error); + } + } + + if (!metadata_.num_plural_forms) { + SPIRIT_PO_CATALOG_FAIL("Invalid metadata in po header, found num_plurals = 0"); + } + + // Try to compile the plural forms function string + pf_function_object_ = compiler(metadata_.plural_forms_function_string); + if (!pf_function_object_) { + SPIRIT_PO_CATALOG_FAIL(("Failed to read plural forms function. " + "Input: '" + metadata_.plural_forms_function_string + "', " + "error message: " + pf_function_object_.error())); + } + + // Cache the 'singular' form index since it is most common + singular_index_ = pf_function_object_(1); + if (singular_index_ >= metadata_.num_plural_forms) { + SPIRIT_PO_CATALOG_FAIL(("Invalid plural forms function. " + "On input n = 1, returned plural = " + std::to_string(singular_index_) + ", " + "while num_plurals = " + std::to_string(metadata_.num_plural_forms))); + } + + msg.line_no = line_no; + insert_message(std::move(msg)); // for compatibility, need to insert the header message at msgid "" + } + + // Now parse non-fuzzy messages + while (it != end) { + // this parse rule cannot fail, it can be a zero length match + qi::parse(it, end, grammar.ignored_comments); + + bool fuzzy = false; + // this parse rule cannot fail, it can be a zero length match + qi::parse(it, end, grammar.message_preamble, fuzzy); + + // check if we exhausted the file by comments + if (it != end) { + msg = po_message{}; + msg.strings().reserve(metadata_.num_plural_forms); // try to prevent frequent vector reallocations + line_no = it.position(); + // actually parse a message + if (!qi::parse(it, end, grammar, msg)) { + int err_line = it.position(); + SPIRIT_PO_CATALOG_FAIL(("Failed to parse po file, " + "started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n" + + iterator_context(it, end))); + } + // cannot overwrite header + if (!msg.id.size()) { + int err_line = it.position(); + SPIRIT_PO_CATALOG_FAIL(("Malformed po file: Cannot overwrite the header entry later in the po file." + "Started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n" + + iterator_context(it, end))); + } + msg.line_no = line_no; + // only insert it if it wasn't marked fuzzy + if (!fuzzy) { insert_message(std::move(msg)); } + } + } + +#ifdef SPIRIT_PO_DEBUG + // validate resulting hashmap + for (const auto & p : hashmap_) { + if (!p.second.strings().size()) { SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with no strings, msgid='" + p.first + "'")); } + if (p.second.strings().size() != 1 && p.second.strings().size() != metadata_.num_plural_forms) { + SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with wrong number of strings, msgid='" + p.first + "' num msgstr = " + std::to_string(p.second.strings().size()) + ", catalog num_plural_forms = " + std::to_string(metadata_.num_plural_forms) + "\nWhole message: " + debug_string(p.second) )); + } + } +#endif + } + + // Upgrade an iterator pair to spirit::line_pos_iterators + template + static catalog from_iterators(Iterator & b, Iterator & e, warning_channel_type w = warning_channel_type()) { + spirit::line_pos_iterator it{b}; + spirit::line_pos_iterator end{e}; + return catalog(it, end, w); + } + + template + static catalog from_iterators(spirit::line_pos_iterator & b, spirit::line_pos_iterator & e, warning_channel_type w = warning_channel_type()) { + return catalog(b, e, w); + } + + // Construct a catalog from a range using one expression + template + static catalog from_range(const Range & range, warning_channel_type w = warning_channel_type()) { + auto it = boost::begin(range); + auto end = boost::end(range); + return from_iterators(it, end, w); + } + + static catalog from_istream(std::istream & is, warning_channel_type w = warning_channel_type()) { + // no white space skipping in the stream! + is.unsetf(std::ios::skipws); + spirit::istream_iterator it(is); + spirit::istream_iterator end; + return from_iterators(it, end, w); + } + + /////////////// + // ACCESSORS // + /////////////// + + /*** + * Lookup strings from the catalog + * + * When using string literals as the parameters, these versions are safe and + * are maximally efficient. + * (The returned pointer is either the input pointer, having static storage + * duration, or has lifetime as long as the catalog.) + * + * Chosen to behave in the same manner as corresponding gettext functions. + */ + const char * gettext(const char * msgid) const { + auto it = hashmap_.find(msgid); + if (it != hashmap_.end()) { + return get(it->second).c_str(); + } else { + return msgid; + } + } + + const char * ngettext(const char * msgid, const char * msgid_plural, uint plural) const { + auto it = hashmap_.find(msgid); + if (it != hashmap_.end() && it->second.is_plural()) { + return get(it->second, plural).c_str(); + } else { + return (plural == 1 ? msgid : msgid_plural); + } + } + + const char * pgettext(const char * context, const char * msgid) const { + auto it = hashmap_.find(form_context_index(context, msgid)); + if (it != hashmap_.end()) { + return get(it->second).c_str(); + } else { + return msgid; + } + } + + const char * npgettext(const char * context, const char * msgid, const char * msgid_plural, uint plural) const { + auto it = hashmap_.find(form_context_index(context, msgid)); + if (it != hashmap_.end() && it->second.is_plural()) { + return get(it->second, plural).c_str(); + } else { + return (plural == 1 ? msgid : msgid_plural); + } + } + + /*** + * Lookup strings from catalog, return std::string. + * + * When, for whatever reason, it is more comfortable to use idiomatic C++. + */ + std::string gettext_str(const std::string & msgid) const { + auto it = hashmap_.find(msgid); + if (it != hashmap_.end()) { + return get(it->second); + } else { + return msgid; + } + } + + std::string ngettext_str(const std::string & msgid, const std::string & msgid_plural, uint plural) const { + auto it = hashmap_.find(msgid); + if (it != hashmap_.end() && it->second.is_plural()) { + return get(it->second, plural); + } else { + return (plural == 1 ? msgid : msgid_plural); + } + } + + std::string pgettext_str(const std::string & context, const std::string & msgid) const { + auto it = hashmap_.find(form_context_index(context, msgid)); + if (it != hashmap_.end()) { + return get(it->second); + } else { + return msgid; + } + } + + std::string npgettext_str(const std::string & context, const std::string & msgid, const std::string & msgid_plural, uint plural) const { + auto it = hashmap_.find(form_context_index(context, msgid)); + if (it != hashmap_.end() && it->second.is_plural()) { + return get(it->second, plural); + } else { + return (plural == 1 ? msgid : msgid_plural); + } + } + + /*** + * Get line numbers of messages + */ + std::size_t gettext_line_no(const std::string & msgid) const { + auto it = hashmap_.find(msgid); + if (it != hashmap_.end()) { + return it->second.line_no; + } else { + return 0; + } + } + + std::size_t pgettext_line_no(const std::string & context, const std::string & msgid) const { + auto it = hashmap_.find(form_context_index(context, msgid)); + if (it != hashmap_.end()) { + return it->second.line_no; + } else { + return 0; + } + } + + /*** + * Access metadata + */ + const catalog_metadata & get_metadata() const { return metadata_; } + + /*** + * Catalog size + */ + uint size() const { + // exclude po header from the count, this is how msgfmt reports size also + return hashmap_.size() - hashmap_.count(""); + } + + /*** + * Debugging output + */ + const hashmap_type & get_hashmap() const { return hashmap_; } + + /*** + * Set warning channel (for msgid overwrites) + */ + void set_warning_channel(const warning_channel_type & w) { warning_channel_ = w; } + + /*** + * Merge a different catalog into this one + */ + template + void merge(catalog && other) { + std::string maybe_error = metadata_.check_compatibility(other.metadata_); + if (maybe_error.size()) { + SPIRIT_PO_CATALOG_FAIL(("Cannot merge catalogs: " + maybe_error)); + } + for (auto & p : other.hashmap_) { + if (p.first.size()) { // don't copy over the header, keep our original header + insert_message(std::move(p.second)); + } + } + } +}; + +} // end namespace spirit_po diff --git a/src/spirit_po/catalog_metadata.hpp b/src/spirit_po/catalog_metadata.hpp new file mode 100644 index 000000000000..33c6dcc3ef08 --- /dev/null +++ b/src/spirit_po/catalog_metadata.hpp @@ -0,0 +1,151 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef BOOST_SPIRIT_USE_PHOENIX_V3 +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#endif + +#include + +#include +#include +#include + +namespace spirit_po { + +namespace qi = boost::spirit::qi; + +typedef unsigned int uint; +typedef std::pair num_plurals_info; + +struct catalog_metadata { + std::string project_id; + std::string language; + std::string language_team; + std::string last_translator; + + uint num_plural_forms; + std::string plural_forms_function_string; + + std::string charset; + + catalog_metadata() + : project_id() + , language() + , language_team() + , last_translator() + , num_plural_forms(0) + , plural_forms_function_string() + {} + +private: + std::string find_header_line(const std::string & header, const std::string & label) { + size_t idx = header.find(label); + if (idx == std::string::npos) { + return ""; + } + auto it = header.begin() + idx + label.size(); + while (it != header.end() && *it == ' ') { ++it; } + + auto e = it; + while (e != header.end() && *e != '\n') { ++e; } + return std::string(it, e); + } + + template + struct num_plurals_grammar : qi::grammar { + qi::rule main; + num_plurals_grammar() : num_plurals_grammar::base_type(main) { + using qi::lit; + main = qi::skip(' ') [ lit("nplurals=") >> qi::uint_ >> lit(';') >> lit("plural=") ] >> (*qi::char_); + } + }; + +#define DEFAULT_CHARSET "UTF-8" + + template + struct content_type_grammar : qi::grammar { + qi::rule main; + content_type_grammar() : content_type_grammar::base_type(main) { + using qi::lit; + using qi::omit; + using qi::skip; + main = skip(' ')[ omit[ *(qi::char_ - ';') >> lit(';') ] >> ((lit("charset=") >> *(qi::char_)) | qi::attr(DEFAULT_CHARSET)) ]; + } + }; + +public: + // nonempty return is an error mesage + std::string parse_header(const std::string & header) { + constexpr const char * default_charset = DEFAULT_CHARSET; +#undef DEFAULT_CHARSET + + project_id = find_header_line(header, "Project-Id-Version:"); + language = find_header_line(header, "Language:"); + language_team = find_header_line(header, "Language-Team:"); + last_translator = find_header_line(header, "Last-Translator:"); + + std::string content_type_line = find_header_line(header, "Content-Type:"); + if (content_type_line.size()) { + auto it = content_type_line.begin(); + auto end = content_type_line.end(); + content_type_grammar gram; + std::string ct; + if (qi::parse(it, end, gram, ct)) { + charset = ct; + if (charset != "ASCII" && charset != "UTF-8") { + return "PO file declared charset of '" + charset + "', but spirit_po only supports UTF-8 and ASCII for this."; + } + } + } else { + // Assume defaults for charset + charset = default_charset; + } + + std::string content_transfer_encoding = find_header_line(header, "Content-Transfer-Encoding:"); + if (content_transfer_encoding.size()) { + auto it = content_transfer_encoding.begin(); + auto end = content_transfer_encoding.end(); + if (!qi::phrase_parse(it, end, qi::lit("8bit"), qi::ascii::space)) { + return "PO header 'Content-Transfer-Encoding' must be '8bit' if specified, but PO file declared '" + content_transfer_encoding + "'"; + } + } + + std::string num_plurals_line = find_header_line(header, "Plural-Forms:"); + + if (num_plurals_line.size()) { + auto it = num_plurals_line.begin(); + auto end = num_plurals_line.end(); + + num_plurals_grammar gram; + num_plurals_info info; + if (qi::parse(it, end, gram, info)) { + num_plural_forms = info.first; + plural_forms_function_string = info.second; + } else { + num_plural_forms = 0; + plural_forms_function_string = ""; + return "Failed to parse Plural-Forms entry -- stopped at:\n" + string_iterator_context(num_plurals_line, it); + } + } else { + num_plural_forms = 2; + plural_forms_function_string = "n != 1"; + } + return ""; + } + + // check if this metadata is compatible with another metadata (number of plural forms, maybe other criteria) + // return a nonempty string containing error message if they are not compatible. + std::string check_compatibility(const catalog_metadata & other) const { + if (num_plural_forms != other.num_plural_forms) { + return std::string{"Num plural forms mismatch. this = "} + std::to_string(num_plural_forms) + " other = " + std::to_string(other.num_plural_forms); + } + return ""; + } +}; + +} // end namespace spirit_po diff --git a/src/spirit_po/default_plural_forms_compiler.hpp b/src/spirit_po/default_plural_forms_compiler.hpp new file mode 100644 index 000000000000..a5c5e4dd312e --- /dev/null +++ b/src/spirit_po/default_plural_forms_compiler.hpp @@ -0,0 +1,117 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +/*** + * In GNU gettext, a language is permitted to define any number of 'plural forms'. + * For instance, in English and most romance languages there are only two forms, + * singular and plural. However in many other languages, there may be only one + * form, or there may be many plural forms reserved for various numbers of items. + * + * In the header of a po file, as part of the metadata, translators are expected + * to specify exactly how many plural forms there are, (how many different + * variations of a pluralized string they will provide), and also a function that + * computes which form (the appropriate index) should be used when the number of + * items is a number "n". + * + * Traditionally, this function is specified as a single line of pseudo C code. + * + * Examples: + * + * Russian: + * Po header: + * num_plurals = 3 + * plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; + * Messages: + * + * + * There are many many more examples shown here: + * http://localization-guide.readthedocs.org/en/latest/l10n/pluralforms.html + * + * The code in *this* file is concerned with converting these strings into + * function objects implementing a function uint -> uint. + * + * These function objects are then associated to each catalog and used when + * looking up plurals. + * + * In spirit-po, we provide support for the standard gettext pseudo-C language + * using the 'default_plural_forms_compiler', which compiles these run-time + * pseudo-C expressions into expression trees which can be evaluated. + * + * By using non-default template parameters and providing an appropriate + * function object, you can make spirit-po use your favorite programming + * language for these instead. (Or, your translators' favorite?) + * + * The 'plural_forms_compiler' concept must be a class/struct and provide: + * - The plural_forms_compiler must be default constructible. + * - It must have a typedef 'result_type' which is the type of the function + * object it produces. + * - An operator() overload which takes const std::string &, and return an + * instance of 'result_type'. + * - result_type must be default constructible and move constructible. + * - result_type must have an operator() overload which takes and yields + * unsigned int. + * - result_type must have an explicit operator bool() const overload which + * returns whether the function object is valid (compilation succeeded) + * - result_type must have a function `error()` which returns a std::string + * representing a compilation error message in the case of failure. + */ + +#ifndef BOOST_SPIRIT_USE_PHOENIX_V3 +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#endif + +#include +#include +#include +#include +#include + +namespace spirit_po { + +namespace qi = boost::spirit::qi; +typedef unsigned int uint; + +namespace default_plural_forms { + +class function_object { + mutable stack_machine machine_; + boost::optional parse_error_; + +public: + function_object(const expr & _e) : machine_(_e), parse_error_() {} + function_object(const std::string & s) : machine_(n_var()), parse_error_(s) {} + function_object() : function_object(std::string{"uninitialized"}) {} + + uint operator()(uint n) const { + return machine_.compute(n); + } + + explicit operator bool() const { return !parse_error_; } + std::string error() const { return *parse_error_; } +}; + +struct compiler { + typedef function_object result_type; + result_type operator()(const std::string & str) const { + expr e; + + typedef std::string::const_iterator str_it; + str_it it = str.begin(); + str_it end = str.end(); + op_grammar grammar; + + if (qi::phrase_parse(it, end, grammar, qi::space, e) && it == end) { + return function_object(std::move(e)); + } else { + return function_object("Plural-Forms expression reader: Could not parse expression, stopped parsing at:\n" + string_iterator_context(str, it)); + } + } +}; + +} // end namespace default_plura_forms + +} // end namespace spirit_po diff --git a/src/spirit_po/default_plural_forms_expressions.hpp b/src/spirit_po/default_plural_forms_expressions.hpp new file mode 100644 index 000000000000..acdea623215a --- /dev/null +++ b/src/spirit_po/default_plural_forms_expressions.hpp @@ -0,0 +1,574 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +/*** + * The namespace default_plural_forms contains all the details to implement + * the subset of the C grammar used by standard GNU gettext po headers. + * + * Boolean expressions return uint 0 or 1. + * + * The 'compiler' is a spirit grammar which parses a string into an expression + * object. The expressions are evaluated by a simple stack machine. + */ + +#ifndef BOOST_SPIRIT_USE_PHOENIX_V3 +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef SPIRIT_PO_DEBUG +#include +#include +#endif + +namespace spirit_po { + +namespace qi = boost::spirit::qi; +typedef unsigned int uint; + +namespace default_plural_forms { + +// X Macro for repetitive binary ops declarations + +#define FOREACH_SPIRIT_PO_BINARY_OP(X_) \ + X_(eq_op, ==) X_(neq_op, !=) X_(ge_op, >=) X_(le_op, <=) X_(gt_op, >) X_(lt_op, <) X_(mod_op, %) + +// && and || are treated slightly differently from other binary ops + +#define FOREACH_SPIRIT_PO_CONJUNCTION(X_) \ + X_(and_op, &&) X_(or_op, ||) + +/*** + * Declare / forward declare expr struct types + */ + +struct constant { uint value; }; +struct n_var { n_var() = default; explicit n_var(char) {}}; // work around a quirk in spirit +struct not_op; +struct ternary_op; + +#define FWD_DECL_(name, op) \ +struct name ; \ + +FOREACH_SPIRIT_PO_BINARY_OP(FWD_DECL_) +FOREACH_SPIRIT_PO_CONJUNCTION(FWD_DECL_) + +#undef FWD_DECL_ + +/*** + * Define expr variant type + */ + +#define WRAP_(name, op) boost::recursive_wrapper< name >, \ + +typedef boost::variant, +FOREACH_SPIRIT_PO_BINARY_OP(WRAP_) +FOREACH_SPIRIT_PO_CONJUNCTION(WRAP_) +boost::recursive_wrapper> expr; + +#undef WRAP_ + +/*** + * Define structs + */ + +struct not_op { expr e1; }; +struct ternary_op { expr e1, e2, e3; }; + +#define DECL_(name, op) \ +struct name { expr e1, e2; }; \ + +FOREACH_SPIRIT_PO_BINARY_OP(DECL_) +FOREACH_SPIRIT_PO_CONJUNCTION(DECL_) + +#undef DECL_ + +/*** + * Visitor that naively evaluates expressions + */ +struct evaluator : public boost::static_visitor { + uint n_value_; + explicit evaluator(uint n) : n_value_(n) {} + + uint operator()(const constant & c) const { return c.value; } + uint operator()(n_var) const { return n_value_; } + uint operator()(const not_op & op) const { return !boost::apply_visitor(*this, op.e1); } + +#define EVAL_OP_(name, OPERATOR) \ + uint operator()(const name & op) const { return (boost::apply_visitor(*this, op.e1)) OPERATOR (boost::apply_visitor(*this, op.e2)); } \ + +FOREACH_SPIRIT_PO_BINARY_OP(EVAL_OP_) +FOREACH_SPIRIT_PO_CONJUNCTION(EVAL_OP_) +#undef EVAL_OP_ + + uint operator()(const ternary_op & op) const { return boost::apply_visitor(*this, op.e1) ? boost::apply_visitor(*this, op.e2) : boost::apply_visitor(*this, op.e3); } +}; + +} // end namespace default_plural_forms + +} // end namespace spirit_po + +/*** + * Adapt structs for fusion / qi + */ + +BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::constant, + (uint, value)) +BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::not_op, + (spirit_po::default_plural_forms::expr, e1)) +BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::ternary_op, + (spirit_po::default_plural_forms::expr, e1) + (spirit_po::default_plural_forms::expr, e2) + (spirit_po::default_plural_forms::expr, e3)) + +#define ADAPT_STRUCT_(name, op) \ +BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms:: name, \ + (spirit_po::default_plural_forms::expr, e1) \ + (spirit_po::default_plural_forms::expr, e2)) \ + +FOREACH_SPIRIT_PO_BINARY_OP(ADAPT_STRUCT_) +FOREACH_SPIRIT_PO_CONJUNCTION(ADAPT_STRUCT_) + +#undef ADAPT_STRUCT_ + +namespace spirit_po { + +namespace default_plural_forms { + +/*** + * Pseudo-C Grammar + * + * Note that the grammar has been somewhat optimized by using local variables + * and inherited attributes, in order to avoid exponential backtracking overhead. + * This makes it a little harder to read than if we got rid of all local variables, + * but then it is too slow to parse the expressions for certain languages. + * + * The main idea is that instead of parsing things like + * + * BINARY_OP = LOWER_PRECENDENCE >> BINARY_OP_LITERAL >> CURRENT_PRECEDENCE + * CURRENT_PRECEDENCE = BINARY_OP | OTHER_OP | YET_ANOTHER_OP | LOWER_PRECEDENCE + * + * (which is bad because if the binary op literal is not there then we have to + * backtrack through an entire subexpression) + * + * we make BINARY_OP take the subexpression as a parameter, and in each + * precedence level, we capture the subexpression first and store it in a local + * variable, so that it does not get reparsed when we backtrack. + * + * BINARY_OP = BINARY_OP_LITERAL >> qi::attr(parameter) >> CURRENT_PRECEDENCE + * + * CURRENT_PRECEDENCE = LOWER_PRECEDENCE[local_var = result] >> + * (BINARY_OP(local_var) | OTHER_OP(local_var) | YET_ANOTHER_OP(local_var) | qi::attr(local_var) + * + */ + +template +struct op_grammar : qi::grammar { + qi::rule constant_; + qi::rule n_; + qi::rule not_; + qi::rule and_; + qi::rule or_; + qi::rule eq_; + qi::rule neq_; + qi::rule ge_; + qi::rule le_; + qi::rule gt_; + qi::rule lt_; + qi::rule mod_; + qi::rule ternary_; + qi::rule paren_expr_; + + // expression precedence levels + qi::rule> ternary_level_; + qi::rule> or_level_; + qi::rule> and_level_; + qi::rule> eq_level_; + qi::rule> rel_level_; + qi::rule> mod_level_; + qi::rule atom_level_; + qi::rule expr_; + + // handle optional ';' at end + qi::rule main_; + + op_grammar() : op_grammar::base_type(main_) { + using qi::attr; + using qi::lit; + + constant_ = qi::uint_; + n_ = qi::char_('n'); + paren_expr_ = lit('(') >> expr_ >> lit(')'); + not_ = lit('!') >> atom_level_; + atom_level_ = paren_expr_ | not_ | n_ | constant_; + + mod_ = lit('%') >> attr(qi::_r1) >> atom_level_; + mod_level_ = qi::omit[atom_level_[qi::_a = qi::_1]] >> (mod_(qi::_a) | attr(qi::_a)); + + ge_ = lit(">=") >> attr(qi::_r1) >> mod_level_; + le_ = lit("<=") >> attr(qi::_r1) >> mod_level_; + gt_ = lit('>') >> attr(qi::_r1) >> mod_level_; + lt_ = lit('<') >> attr(qi::_r1) >> mod_level_; + rel_level_ = qi::omit[mod_level_[qi::_a = qi::_1]] >> (ge_(qi::_a) | le_(qi::_a) | gt_(qi::_a) | lt_(qi::_a) | attr(qi::_a)); + + eq_ = lit("==") >> attr(qi::_r1) >> rel_level_; + neq_ = lit("!=") >> attr(qi::_r1) >> rel_level_; + eq_level_ = qi::omit[rel_level_[qi::_a = qi::_1]] >> (eq_(qi::_a) | neq_(qi::_a) | attr(qi::_a)); + + and_ = lit("&&") >> attr(qi::_r1) >> and_level_; + and_level_ = qi::omit[eq_level_[qi::_a = qi::_1]] >> (and_(qi::_a) | attr(qi::_a)); + + or_ = lit("||") >> attr(qi::_r1) >> or_level_; + or_level_ = qi::omit[and_level_[qi::_a = qi::_1]] >> (or_(qi::_a) | attr(qi::_a)); + + ternary_ = lit('?') >> attr(qi::_r1) >> ternary_level_ >> lit(':') >> ternary_level_; + ternary_level_ = qi::omit[or_level_[qi::_a = qi::_1]] >> (ternary_(qi::_a) | attr(qi::_a)); + + expr_ = ternary_level_; + + main_ = expr_ >> -lit(';'); + } +}; + +/*** + * Now define a simple stack machine to evaluate the expressions efficiently. + * + * First define op_codes + */ + +#define ENUMERATE(X, Y) X, + +enum class op_code { n_var, FOREACH_SPIRIT_PO_BINARY_OP(ENUMERATE) not_op }; + +#undef ENUMERATE + +/// Instruction that causes us to skip upcoming instructions +struct skip { + uint distance; +}; + +/// Instructions that conditionally cause us to skip upcoming instructions +struct skip_if { + uint distance; +}; + +struct skip_if_not { + uint distance; +}; + +/*** + * Instruction is a variant type that represents either a push_constant, branch, jump, or arithmetic op. + */ +typedef boost::variant instruction; + +/*** + * Debug strings for instruction set + */ +#ifdef SPIRIT_PO_DEBUG +inline std::string op_code_string(op_code oc) { + std::string result = "[ "; + switch (oc) { + case op_code::n_var: { + result += "n "; + break; + } + case op_code::not_op: { + result += "! "; + break; + } +#define OP_CODE_STR_CASE_(X, Y) \ + case op_code::X: { \ + result += #Y; \ + break; \ + } + +FOREACH_SPIRIT_PO_BINARY_OP(OP_CODE_STR_CASE_) + +#undef OP_CODE_STR_CASE_ + } + + if (result.size() < 5) { result += ' '; } \ + result += " : ]"; + return result; +} + +struct instruction_debug_string_maker : boost::static_visitor { + std::string operator()(const constant & c) const { + return "[ push : " + std::to_string(c.value) + " ]"; + } + std::string operator()(const skip & s) const { + return "[ skip : " + std::to_string(s.distance) + " ]"; + } + std::string operator()(const skip_if & s) const { + return "[ sif : " + std::to_string(s.distance) + " ]"; + } + std::string operator()(const skip_if_not & s) const { + return "[ sifn : " + std::to_string(s.distance) + " ]"; + } + std::string operator()(const op_code & oc) const { + return op_code_string(oc); + } +}; + +inline std::string debug_string(const instruction & i) { + return boost::apply_visitor(instruction_debug_string_maker{}, i); +} + +#endif // SPIRIT_PO_DEBUG + +/*** + * Helper: Check if an expression obviously is zero-one valued + */ +struct is_boolean : public boost::static_visitor { + bool operator()(const and_op &) const { return true; } + bool operator()(const or_op &) const { return true; } + bool operator()(const not_op &) const { return true; } + bool operator()(const eq_op &) const { return true; } + bool operator()(const neq_op &) const { return true; } + bool operator()(const ge_op &) const { return true; } + bool operator()(const le_op &) const { return true; } + bool operator()(const gt_op &) const { return true; } + bool operator()(const lt_op &) const { return true; } + bool operator()(const n_var &) const { return false; } + bool operator()(const constant & c) const { return (c.value == 0 || c.value == 1); } + bool operator()(const mod_op & m) const { return boost::apply_visitor(*this, m.e1); } + bool operator()(const ternary_op & t) const { return boost::apply_visitor(*this, t.e2) && boost::apply_visitor(*this, t.e3); } +}; + + +/*** + * Visitor that maps expressions to instruction sequences + */ +struct emitter : public boost::static_visitor> { + std::vector operator()(const constant & c) const { + return std::vector{instruction{c}}; + } + std::vector operator()(const n_var &) const { + return std::vector{instruction{op_code::n_var}}; + } + std::vector operator()(const not_op & o) const { + auto result = boost::apply_visitor(*this, o.e1); + result.emplace_back(op_code::not_op); + return result; + } +#define EMIT_OP_(name, op) \ + std::vector operator()(const name & o) const { \ + auto result = boost::apply_visitor(*this, o.e1); \ + auto temp = boost::apply_visitor(*this, o.e2); \ + std::move(temp.begin(), temp.end(), std::back_inserter(result)); \ + result.emplace_back(op_code::name); \ + return result; \ + } + +FOREACH_SPIRIT_PO_BINARY_OP(EMIT_OP_) + +#undef EMIT_OP_ + + /*** + * We make &&, ||, and ? shortcut + */ + std::vector operator()(const and_op & o) const { + auto result = boost::apply_visitor(*this, o.e1); + auto second = boost::apply_visitor(*this, o.e2); + bool second_is_boolean = boost::apply_visitor(is_boolean{}, o.e2); + + uint sec_size = static_cast(second.size()); + if (!second_is_boolean) { sec_size += 2; } + + result.emplace_back(skip_if{2}); + result.emplace_back(constant{0}); + result.emplace_back(skip{sec_size}); + + std::move(second.begin(), second.end(), std::back_inserter(result)); + if (!second_is_boolean) { + result.emplace_back(op_code::not_op); + result.emplace_back(op_code::not_op); + } + + return result; + } + + std::vector operator()(const or_op & o) const { + auto result = boost::apply_visitor(*this, o.e1); + auto second = boost::apply_visitor(*this, o.e2); + bool second_is_boolean = boost::apply_visitor(is_boolean{}, o.e2); + + uint sec_size = static_cast(second.size()); + if (!second_is_boolean) { sec_size += 2; } + + result.emplace_back(skip_if_not{2}); + result.emplace_back(constant{1}); + result.emplace_back(skip{sec_size}); + + std::move(second.begin(), second.end(), std::back_inserter(result)); + if (!second_is_boolean) { + result.emplace_back(op_code::not_op); + result.emplace_back(op_code::not_op); + } + + return result; + } + + std::vector operator()(const ternary_op & o) const { + auto result = boost::apply_visitor(*this, o.e1); + auto tbranch = boost::apply_visitor(*this, o.e2); + auto fbranch = boost::apply_visitor(*this, o.e3); + + uint tsize = static_cast(tbranch.size()); + uint fsize = static_cast(fbranch.size()); + + // We use jump if / jump if not in the way that will let us put the shorter branch first. + if (tbranch.size() > fbranch.size()) { + // + 1 to size because we have to put a jump at end of this branch also + result.emplace_back(skip_if{fsize + 1}); + std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result)); + result.emplace_back(skip{tsize}); + std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result)); + } else { + result.emplace_back(skip_if_not{tsize + 1}); + std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result)); + result.emplace_back(skip{fsize}); + std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result)); + } + return result; + } +}; + +/*** + * Actual stack machine + */ + +class stack_machine : public boost::static_visitor { + std::vector instruction_seq_; + std::vector stack_; + uint n_value_; + +#ifdef SPIRIT_PO_DEBUG +public: + void debug_print_instructions() const { + std::cerr << "Instruction sequence:\n"; + for (const auto & i : instruction_seq_) { + std::cerr << debug_string(i) << std::endl; + } + } +private: + +#define MACHINE_ASSERT(X) \ + do { \ + if (!(X)) { \ + std::cerr << "Stack machine failure:\n"; \ + debug_print_instructions(); \ + assert(false && #X); \ + } \ + } while(0) + +#else + +#define MACHINE_ASSERT(...) do {} while(0) + +#endif + + uint pop_one() { + MACHINE_ASSERT(stack_.size()); + + uint result = stack_.back(); + stack_.resize(stack_.size() - 1); + return result; + } + +public: + explicit stack_machine(const expr & e) + : instruction_seq_(boost::apply_visitor(emitter{}, e)) + , stack_() + , n_value_() + {} + + /*** + * operator() takes the instruction that we should execute + * It should perform the operation adjusting the stack + * It returns the amount by which we should increment the + * program counter. + */ + uint operator()(const constant & c) { + stack_.emplace_back(c.value); + return 1; + } + + uint operator()(const skip & s) { + return 1 + s.distance; + } + + uint operator()(const skip_if & s) { + return 1 + (pop_one() ? s.distance : 0); + } + + uint operator()(const skip_if_not & s) { + return 1 + (pop_one() ? 0 : s.distance); + } + + uint operator()(op_code oc) { + switch (oc) { + case op_code::n_var: { + stack_.emplace_back(n_value_); + return 1; + } + case op_code::not_op: { + MACHINE_ASSERT(stack_.size()); + stack_.back() = !stack_.back(); + return 1; + } +#define STACK_MACHINE_CASE_(name, op) \ + case op_code::name: { \ + MACHINE_ASSERT(stack_.size() >= 2); \ + uint parm2 = pop_one(); \ + \ + if (op_code::name == op_code::mod_op) { \ + MACHINE_ASSERT(parm2 && "Division by zero when evaluating gettext plural form expression"); \ + } \ + \ + stack_.back() = (stack_.back() op parm2); \ + return 1; \ + } + +FOREACH_SPIRIT_PO_BINARY_OP(STACK_MACHINE_CASE_) + +#undef STACK_MACHINE_CASE_ + } + MACHINE_ASSERT(false); + return 1; + } + + uint compute(uint arg) { + n_value_ = arg; + stack_.resize(0); + uint pc = 0; + while (pc < instruction_seq_.size()) { + pc += boost::apply_visitor(*this, instruction_seq_[pc]); + } + MACHINE_ASSERT(pc == instruction_seq_.size()); + MACHINE_ASSERT(stack_.size() == 1); + return stack_[0]; + } +}; + +#undef MACHINE_ASSERT + +// X macros not used anymore +#undef FOREACH_SPIRIT_PO_BINARY_OP +#undef FOREACH_SPIRIT_PO_CONJUNCTION + +} // end namespace default_plural_forms + +} // end namespace spirit_po diff --git a/src/spirit_po/exceptions.hpp b/src/spirit_po/exceptions.hpp new file mode 100644 index 000000000000..06956d73789c --- /dev/null +++ b/src/spirit_po/exceptions.hpp @@ -0,0 +1,76 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include + +namespace spirit_po { + +// Show the next 80 characters from some iterator position. +// Intended to be used for parser error messages +template +std::string iterator_context(Iterator & it, Iterator & end) { + std::string result; + std::size_t line_no = boost::spirit::get_line(it); + if (line_no != static_cast(-1)) { + result = "Line " + std::to_string(line_no) + ":\n"; + } + + uint count = 80; + while (it != end && count) { + result += *it; + ++it; + --count; + } + return result; +} + +// When the thing being parsed is a short string, we can give +// a better context report +std::string string_iterator_context(const std::string & str, + std::string::const_iterator it) { + std::string result{str}; + result += "\n"; + + for (auto temp = str.begin(); temp != it; ++temp) { + result += ' '; + } + result += "^\n"; + return result; +} + +} // end namespace spirit_po + + +#ifdef SPIRIT_PO_NOEXCEPT + +#define SPIRIT_PO_CATALOG_FAIL(Message) \ +do { \ + error_message_ = (Message); \ + return ; \ +} while(0) + +#else + +#include + +namespace spirit_po { + +struct catalog_exception : std::runtime_error { + catalog_exception(const char * what) : runtime_error(what) {} + catalog_exception(const std::string & what) : runtime_error(what) {} +}; + +} // end namespace spirit_po + +#define SPIRIT_PO_CATALOG_FAIL(Message) \ +do { \ + throw spirit_po::catalog_exception(( Message )); \ +} while(0) + + +#endif // SPIRIT_PO_NOEXCEPT diff --git a/src/spirit_po/po_grammar.hpp b/src/spirit_po/po_grammar.hpp new file mode 100644 index 000000000000..bc78f77a2b51 --- /dev/null +++ b/src/spirit_po/po_grammar.hpp @@ -0,0 +1,125 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef BOOST_SPIRIT_USE_PHOENIX_V3 +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#endif + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace spirit_po { + +typedef unsigned int uint; +namespace qi = boost::spirit::qi; +namespace phoenix = boost::phoenix; + +template +struct po_grammar : qi::grammar { + qi::rule white_line; + qi::rule comment_line; + qi::rule skipped_line; + qi::rule skipped_block; + + qi::rule escaped_character; + qi::rule single_line_string; + qi::rule multiline_string; + + qi::rule message_id; + qi::rule message_id_plural; + qi::rule message_context; + qi::rule message_str; + qi::rule message_str_plural; + + qi::rule()> message_single_str; + qi::rule(uint)> message_strs; + + qi::rule message_singular; + qi::rule message_plural; + + qi::rule message; + + // Related to parsing "fuzzy" po comment + qi::rule> fuzzy; + qi::rule preamble_comment_line; + qi::rule preamble_comment_block; + + /// consume any number of blocks, consisting of any number of comments followed by a white line + qi::rule ignored_comments; + /// consume any number of non-white comment line (using #). bool result represents if we saw #, fuzzy comment + qi::rule message_preamble; + + po_grammar() : po_grammar::base_type(message) { + using qi::attr; + using qi::char_; + using qi::eoi; + using qi::lit; + using qi::omit; + using qi::uint_; + + white_line = *char_(" \t\r"); // nullable + comment_line = char_('#') >> *(char_ - '\n'); // not nullable + skipped_line = (comment_line | white_line) >> lit('\n'); // not nullable + skipped_block = *skipped_line; // nullable + + // TODO: Do we need to handle other escaped characters? + escaped_character = lit('\\') >> (char_("\'\"\\") | (lit('n') >> attr('\n')) | (lit('t') >> attr('\t'))); + single_line_string = lit('"') >> *(escaped_character | (char_ - '\\' - '"')) >> lit('"'); + multiline_string = single_line_string % skipped_block; // ^ this is important, if we don't have this then \\ does not have to be escaped in po string, just form an illegal escape code + + message_context = skipped_block >> lit("msgctxt ") >> multiline_string; + message_id = skipped_block >> lit("msgid ") >> multiline_string; + message_str = skipped_block >> lit("msgstr ") >> multiline_string; + message_id_plural = skipped_block >> lit("msgid_plural ") >> multiline_string; + message_str_plural = skipped_block >> lit("msgstr[") >> omit[ uint_(qi::_r1) ] >> lit("] ") >> multiline_string; + // ^ the index in the po file must match what we expect + + // qi::repeat converts it from a std::string, to a singleton vector, as required + message_single_str = qi::repeat(1)[message_str]; + message_strs = message_str_plural(qi::_r1) >> -message_strs(qi::_r1 + 1); + // ^ enforces that indices must count up + + // Detect whether we should read multiple messages or a single message by presence of `msgid_plural` + message_plural = message_id_plural >> message_strs(0); // first line should be msgstr[0] + message_singular = attr("") >> message_single_str; + message = -message_context >> message_id >> (message_plural | message_singular); + + /*** + * The remaining rules are not contributing to message -- their job is to consume comments leading up to the message, + * keep track of if we saw a fuzzy marker, and to consume the entire file if only whitespace lines remain, whether or + * not it ends in new-line. + * + * First, parse "ignored_comments", + * message_preamble is the main rule of this section + */ + + /// Fuzzy: Expect comment of the form #, with literal `, fuzzy` in the list somewhere. + /// We use a qi local to keep track of if we saw it, this avoids excessive backtracking + fuzzy = lit('#') >> (&lit(','))[qi::_a = false] >> *(lit(',') >> -(lit(" fuzzy")[qi::_a = true]) >> *(char_ - '\n' - ',')) >> lit('\n') >> qi::eps(qi::_a); + preamble_comment_line = comment_line >> lit('\n'); + + ignored_comments = *(*preamble_comment_line >> white_line >> lit('\n')); + preamble_comment_block = *preamble_comment_line >> -comment_line; + // ^ if po-file ends in a comment without eol we should still consume it + message_preamble = (fuzzy >> preamble_comment_block >> attr(true)) | (preamble_comment_line >> message_preamble) | (-comment_line >> attr(false)); + // ^ if we find fuzzy, short cut out of this test ^ consume one comment line and repeat ^ didn't find fuzzy, return false + // ^ note: no backtrack after fuzzy... ^ note: no backtrack after comment line... and consume trailing comment + // preamble_comment_block is nullable message_preamble is nullable + } +}; + +} // end namespace spirit_po diff --git a/src/spirit_po/po_message.hpp b/src/spirit_po/po_message.hpp new file mode 100644 index 000000000000..48cc84e2f3bb --- /dev/null +++ b/src/spirit_po/po_message.hpp @@ -0,0 +1,59 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include + +namespace spirit_po { + +typedef std::pair> plural_and_strings_type; + +struct po_message { + boost::optional context; + std::string id; + plural_and_strings_type plural_and_strings; + + std::size_t line_no; + + // Get the 'id_plural', 'strings' fields from the pair. + // It is arranged as a pair here to allow for simpler parsing with spirit attributes. + std::string & id_plural() { return plural_and_strings.first; } + const std::string & id_plural() const { return plural_and_strings.first; } + + std::vector & strings() { return plural_and_strings.second; } + const std::vector & strings() const { return plural_and_strings.second; } + + // Check if message is plural. We do this for now by testing msgid_plural.size(). + // Recommended to use this method in case we change it in the future. + bool is_plural() const { return static_cast(id_plural().size()); } +}; + +/*** + * Debug printer + */ +#ifdef SPIRIT_PO_DEBUG +std::string debug_string(const po_message & msg) { + std::string result = "{\n"; + if (msg.context) { + result += " context: \"" + *msg.context + "\"\n"; + } + result += " id: \"" + msg.id + "\"\n"; + result += " id_plural: \"" + msg.id_plural() + "\"\n"; + result += " strings: { "; + for (uint i = 0; i < msg.strings().size(); ++i) { + if (i) { result += ", "; } + result += '"' + msg.strings()[i] + '"'; + } + result += " }\n"; + result += "}"; + return result; +} +#endif // SPIRIT_PO_DEBUG + +} // end namespace spirit_po diff --git a/src/spirit_po/po_message_adapted.hpp b/src/spirit_po/po_message_adapted.hpp new file mode 100644 index 000000000000..2273fadb6a45 --- /dev/null +++ b/src/spirit_po/po_message_adapted.hpp @@ -0,0 +1,23 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include + +BOOST_FUSION_ADAPT_STRUCT( + spirit_po::po_message, + (boost::optional, context) + (std::string, id) + (spirit_po::plural_and_strings_type, plural_and_strings)) diff --git a/src/spirit_po/version.hpp b/src/spirit_po/version.hpp new file mode 100644 index 000000000000..88e8decf19ef --- /dev/null +++ b/src/spirit_po/version.hpp @@ -0,0 +1,10 @@ +// (C) Copyright 2015 - 2016 Christopher Beck + +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#define SPIRIT_PO_VERSION_MAJOR 1 +#define SPIRIT_PO_VERSION_MINOR 0 +#define SPIRIT_PO_VERSION_PATCH 0