From 4661dada0cb0d3fa6e38bc87d370c56eb062eac8 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Mon, 11 Jul 2016 01:35:47 +0300 Subject: [PATCH] Change grammar to support priority of operation Build priority of OR > AND > NOT > () right into the grammar. Avoid left recursion - PEG parsers don't like it and grako behaves strange. --- parser/build_ast.py | 24 ++++----- parser/grammar.ebnf | 30 +++++++---- parser/parser.py | 115 ++++++++++++++++++++++++++++++++---------- parser/test_parser.py | 5 +- 4 files changed, 120 insertions(+), 54 deletions(-) diff --git a/parser/build_ast.py b/parser/build_ast.py index de87f84..42bf920 100644 --- a/parser/build_ast.py +++ b/parser/build_ast.py @@ -2,20 +2,16 @@ from parser import TagsSemantics as _TagsSemantics class TagsSemantics(_TagsSemantics): - def expression(self, ast): - if isinstance(ast, str): # name - return ast - if isinstance(ast, unicode): # name - return str(ast) - if isinstance(ast, list): - ast = tuple(ast) - elif not isinstance(ast, tuple): - raise TypeError("Expected a list, got %r %s" % (type(ast), ast)) - if len(ast) == 3: - return (ast[1], ast[0], ast[2]) - return ast - - def expression_parens(self, ast): + def or_expression(self, ast): + return ('OR', ast[0], ast[2]) + + def and_expression(self, ast): + return ('AND', ast[0], ast[2]) + + def not_expression(self, ast): + return ('NOT', ast[1]) + + def parens_expression(self, ast): return ('PARENS', ast[1]) def name(self, ast): diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index b657738..4a702ed 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -11,26 +11,34 @@ # Allowed operators: conjunction - & && AND and # disjunction - | || OR or # negation - ! NOT not +# Usual priority: NOT recognized before AND, AND before OR. # This is a simple version of the grammar and it allows -# rather stupid expressions, like !!TAG or ((TAG)) +# rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)). @@grammar :: Tags start = expression $ ; -expression = ( - | expression and_op expression - | expression or_op expression - | not_op expression - | expression_parens - | name ) ; +expression = expression1 !&or_op | or_expression ; -expression_parens = '(' expression ')' ; +or_expression = expression1 or_op expression ; -name = /[a-z][a-z0-9_]+/ ; +and_expression = expression2 and_op expression1 ; + +not_expression = not_op expression3 ; + +parens_expression = '(' expression ')' ; + +expression1 = expression2 !&and_op | and_expression ; -and_op = '&' | '&&' | 'AND' | 'and' ; +expression2 = !¬_op expression3 | not_expression ; -or_op = '|' | '||' | 'OR' | 'or' ; +expression3 = parens_expression | name ; + +and_op = '&&' | '&' | 'AND' | 'and' ; + +or_op = '||' | '|' | 'OR' | 'or' ; not_op = '!' | 'NOT' | 'not' ; + +name = /[a-z][a-z0-9_]+/ ; diff --git a/parser/parser.py b/parser/parser.py index 460a552..4984f18 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -17,7 +17,7 @@ from grako.parsing import graken, Parser from grako.util import re, RE_FLAGS, generic_main # noqa -__version__ = (2016, 7, 9, 18, 10, 4, 5) +__version__ = (2016, 7, 10, 22, 31, 57, 6) __all__ = [ 'TagsParser', @@ -58,42 +58,79 @@ class TagsParser(Parser): @graken() def _expression_(self): - with self._group(): - with self._choice(): - with self._option(): - self._expression_() - self._and_op_() - self._expression_() - with self._option(): - self._expression_() - self._or_op_() - self._expression_() - with self._option(): - self._not_op_() - self._expression_() - with self._option(): - self._expression_parens_() - with self._option(): - self._name_() - self._error('no available options') + with self._choice(): + with self._option(): + self._expression1_() + with self._ifnot(): + with self._if(): + self._or_op_() + with self._option(): + self._or_expression_() + self._error('no available options') @graken() - def _expression_parens_(self): + def _or_expression_(self): + self._expression1_() + self._or_op_() + self._expression_() + + @graken() + def _and_expression_(self): + self._expression2_() + self._and_op_() + self._expression1_() + + @graken() + def _not_expression_(self): + self._not_op_() + self._expression3_() + + @graken() + def _parens_expression_(self): self._token('(') self._expression_() self._token(')') @graken() - def _name_(self): - self._pattern(r'[a-z][a-z0-9_]+') + def _expression1_(self): + with self._choice(): + with self._option(): + self._expression2_() + with self._ifnot(): + with self._if(): + self._and_op_() + with self._option(): + self._and_expression_() + self._error('no available options') @graken() - def _and_op_(self): + def _expression2_(self): with self._choice(): with self._option(): - self._token('&') + with self._ifnot(): + with self._if(): + self._not_op_() + self._expression3_() + with self._option(): + self._not_expression_() + self._error('no available options') + + @graken() + def _expression3_(self): + with self._choice(): + with self._option(): + self._parens_expression_() + with self._option(): + self._name_() + self._error('no available options') + + @graken() + def _and_op_(self): + with self._choice(): with self._option(): self._token('&&') + with self._option(): + self._token('&') with self._option(): self._token('AND') with self._option(): @@ -103,10 +140,10 @@ class TagsParser(Parser): @graken() def _or_op_(self): with self._choice(): - with self._option(): - self._token('|') with self._option(): self._token('||') + with self._option(): + self._token('|') with self._option(): self._token('OR') with self._option(): @@ -124,6 +161,10 @@ class TagsParser(Parser): self._token('not') self._error('expecting one of: ! NOT not') + @graken() + def _name_(self): + self._pattern(r'[a-z][a-z0-9_]+') + class TagsSemantics(object): def start(self, ast): @@ -132,10 +173,25 @@ class TagsSemantics(object): def expression(self, ast): return ast - def expression_parens(self, ast): + def or_expression(self, ast): return ast - def name(self, ast): + def and_expression(self, ast): + return ast + + def not_expression(self, ast): + return ast + + def parens_expression(self, ast): + return ast + + def expression1(self, ast): + return ast + + def expression2(self, ast): + return ast + + def expression3(self, ast): return ast def and_op(self, ast): @@ -147,6 +203,9 @@ class TagsSemantics(object): def not_op(self, ast): return ast + def name(self, ast): + return ast + def main( filename, diff --git a/parser/test_parser.py b/parser/test_parser.py index 046c31c..029fbd6 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -29,7 +29,10 @@ class TestParser(unittest.TestCase): ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) ) self.assertEqual(self._parse('!xxx&yyy&zzz|ooo'), - ('OR', ('AND', ('AND', ('NOT', ('NAME', 'xxx')), ('NAME', 'yyy')), ('NAME', 'zzz')), ('NAME', 'ooo')) + ('OR', ('AND', + ('NOT', ('NAME', 'xxx')), + ('AND', ('NAME', 'yyy'), ('NAME', 'zzz'))), + ('NAME', 'ooo')) ) self.assertEqual(self._parse('!(xxx && yyy)'), ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) -- 2.39.2