data/tools/wmlvalidator

#!/usr/bin/env python
"""
wmltest -- tool to validate the syntax and semantics of WML.

Use --help to see usage.
"""
#TODO:
#-define verbosity levels better

from __future__ import print_function
import wesnoth.wmldata as wmldata
import wesnoth.wmlparser as wmlparser
import wesnoth.wmlgrammar as wmlgrammar
import re

def print_indent(string, depth, char=' '):
    print("%s%s" % (depth * char, string))

class Validator:
    """
    The class that takes a wmlgrammar object to validate wml trees with
    """
    def __init__(self, schema, verbosity=0):
        self.schema = wmlgrammar.Grammar(schema)
        self.verbosity = verbosity
        self.validate_result = {}

    def validate_result_add(self, from_file, line, origin, message):
        if not from_file in self.validate_result:
            self.validate_result[from_file] = []

        self.validate_result[from_file].append({'line': line, 'origin': origin, 'message': message})

    def validate_result_print(self):
        normal = '\033[0m'
        bold = '\033[1m'
        underline = '\033[4m'
        for k, v in self.validate_result.iteritems():
            print("%s%s%s" % (bold, k, normal))
            for i in v:
                print("%s#%d: %s%s %s" % (underline, i['line'], i['origin'], normal, i['message']))

    def validate(self, node, depth=0, name=None):
        """
        Validate the given DataSub node.
        depth indicates how deep we've recursed into the tree,
        name is a mechanism for overwriting the node name to look up in the schema, used for overloaded names.
        """
        if not name or name == node.name:
            name = node.name
            verbosename = name
        else:
            verbosename = "%s (%s)" % (node.name, name)

        if self.verbosity > 1:
            print_indent(node.name, depth)

        try:
            schema = self.schema.get_element(name)
        except KeyError:
            print("No valid schema found for %s" % verbosename)
            return

        # Validate the attributes
        for attribute in schema.get_attributes():
            matches = node.get_texts(attribute.name)

            # Check frequency
            nummatches = len(matches)
            if attribute.freq == wmlgrammar.REQUIRED and nummatches != 1:
                self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Should appear exactly once, not %d times" % nummatches)
            elif attribute.freq == wmlgrammar.OPTIONAL and nummatches > 1:
                self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Should appear at most once, not %d times" % nummatches)
            elif attribute.freq == wmlgrammar.FORBIDDEN and nummatches > 0:
                self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Should not appear. It appears %d times" % nummatches)

            # Check use
            for match in matches:
                if 'translatable' in attribute.optionals and match.is_translatable() == False:
                    self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Value is translatable, but haven't _ at the beginning")
                elif 'translatable' not in attribute.optionals and 'optional-translatable' not in attribute.optionals and match.is_translatable() == True:
                    self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Value isn't translatable, but have a _ at the beginning")

                def check_attribute_value(value, pos=None):
                    def gerate_message_with_pos():
                        if pos is None:
                            return ""
                        else:
                            return " (At position %d)" % pos

                    if not attribute.validate(value):
                        self.validate_result_add(node.file, node.line, "Attribute [%s] %s%s" % (verbosename, attribute.name, gerate_message_with_pos()), "Value should be %s, found: %s" % (attribute.type, value))

                    regex_limit = re.compile(ur'^limit\((\d+.\d+|\d+),(\d+.\d+|\d+)\)$')
                    checklimit = [i for i in attribute.optionals if regex_limit.search(i)]
                    if len(checklimit):
                        checklimit = checklimit[0]
                        number_min, number_max = regex_limit.search(checklimit).groups()

                        if float(value) > float(number_max) or float(value) < float(number_min):
                            self.validate_result_add(node.file, node.line, "Attribute [%s] %s%s" % (verbosename, attribute.name, gerate_message_with_pos()), "Value must be between %s and %s, found : %s" % (number_min, number_max, value))

                if 'list' in attribute.optionals:
                    pos = 1
                    for i in match.data.split(","):
                        if i[0] == ' ': i = i[1:]
                        check_attribute_value(i, pos=pos)
                        pos += 1
                else:
                    check_attribute_value(match.data)
                node.remove(match) # Get rid of these so we can see what's left
        for attribute in node.get_all_text():
            self.validate_result_add(node.file, node.line, "Attribute [%s] %s" % (verbosename, attribute.name), "Found, which has no meaning there")

        # Validate the elements
        for element in schema.get_elements():
            matches = node.get_subs(element.name)

            # Check frequency
            nummatches = len(matches)
            if element.freq == wmlgrammar.REQUIRED and nummatches != 1:
                self.validate_result_add(node.file, node.line, "Element [%s] [%s]" % (verbosename, element.name), "Should appear exactly once, not %d times" % nummatches)
            elif element.freq == wmlgrammar.OPTIONAL and nummatches > 1:
                self.validate_result_add(node.file, node.line, "Element [%s] [%s]" % (verbosename, element.name), "Should appear at most once, not %d times" % nummatches)
            elif element.freq == wmlgrammar.FORBIDDEN and nummatches > 0:
                self.validate_result_add(node.file, node.line, "Element [%s] [%s]" % (verbosename, element.name), "Should not appear. It appears %d times" % nummatches)

            # Check sub
            for match in matches:
                self.validate(match, depth+1, element.subname)
                node.remove(match)
        for element in node.get_all_subs():
            self.validate_result_add(node.file, node.line, "Element [%s] [%s]" % (verbosename, element.name), "Found, which has no meaning there")

if __name__ == '__main__':
    import argparse, subprocess, os, codecs, sys

    # Ugly hack to force the output of UTF-8.
    # This prevents us from crashing when we're being verbose
    #  and encounter a non-ascii character.
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

    ap = argparse.ArgumentParser("Usage: %(prog)s [options]")
    ap.add_argument("-p", "--path",
        help = "Specify Wesnoth's data dir",
        dest = "path")
    ap.add_argument("-u", "--userpath",
        help = "Specify user data dir",
        dest = "userpath")
    ap.add_argument("-s", "--schema",
        help = "Specify WML schema",
        dest = "schema")
    ap.add_argument("-v", "--verbose",
        action = "count",
        dest = "verbose",
        help = "Increase verbosity, 4 is the maximum.")
    ap.add_argument("-D", "--define",
        action = "append",
        dest = "defines",
        default = [],
        help = "Define (empty) preprocessor macros, for campaign/multiplayer inclusion.")
    ap.add_argument("filename",
        nargs = "*",
        help = "Files to validate or directory. If it is a directory, get all the cfg files from directory")
    args = ap.parse_args()
    if args.path:
        args.path = os.path.expanduser(args.path)
    if args.userpath:
        args.userpath = os.path.expanduser(args.userpath)
    if args.filename:
        args.filename = [os.path.expanduser(i) for i in args.filename]

    if not args.path:
        try:
            p = subprocess.Popen(["wesnoth", "--path"], stdout = subprocess.PIPE)
            path = p.stdout.read().strip()
            args.path = os.path.join(path, "data")
            sys.stderr.write("No Wesnoth path given.\nAutomatically found '%s'\n" % (args.path, ) )
        except OSError:
            args.path = '.'
            sys.stderr.write("Could not determine Wesnoth path.\nAssuming '%s'\n" % (args.path, ) )

    list_files_analyze = []
    if len(args.filename) < 1:
        list_files_analyze.append(os.path.join(args.path, '_main.cfg'))

    for i in args.filename:
        if os.path.isdir(i):
            if i[-1] != '/':
                i += '/'
            cfg_from_dir = [i + cfg for cfg in os.listdir(i) if cfg[-3:] == 'cfg']
            list_files_analyze += cfg_from_dir
        else:
            list_files_analyze.append(i)

    if args.verbose > 1:
        print("Args: %s\n"% (args, ))

    if not args.schema:
        args.schema = os.path.join(args.path, 'schema.cfg')

    # Parse the schema
    parser = wmlparser.Parser(args.path)

    if args.verbose > 3:
        parser.verbose = True
    parser.parse_file(args.schema)

    schema = wmldata.DataSub("schema")
    parser.parse_top(schema)

    # Construct the validator
    validator = Validator(schema, args.verbose)

    # Parse the WML
    parser = wmlparser.Parser(args.path, args.userpath)

    if args.verbose > 3:
        parser.verbose = True

    if args.userpath:
        parser.parse_text("{~add-ons}")
    for file in list_files_analyze:
        parser.parse_file(file)
    for macro in args.defines:
        parser.parse_text("#define %s \n#enddef" % (macro, ) )

    data = wmldata.DataSub("root")
    parser.parse_top(data)

    # Validate
    validator.validate(data)
    validator.validate_result_print()

# vim: tabstop=4: shiftwidth=4: expandtab: softtabstop=4: autoindent: