]> git.phdru.name Git - phdru.name/cgi-bin/blog-ru/search-tags.git/commitdiff
Change grammar to support priority of operation
authorOleg Broytman <phd@phdru.name>
Sun, 10 Jul 2016 22:35:47 +0000 (01:35 +0300)
committerOleg Broytman <phd@phdru.name>
Sun, 10 Jul 2016 22:51:26 +0000 (01:51 +0300)
Build priority of OR > AND > NOT > () right into the grammar.

Avoid left recursion - PEG parsers don't like it and grako behaves
strange.

parser/build_ast.py
parser/grammar.ebnf
parser/parser.py
parser/test_parser.py

index de87f84b3a08c8784da9a9f4c31d272fe9cd6644..42bf9207b14e67314d9f0f31f1e94dd6e420caa8 100644 (file)
@@ -2,20 +2,16 @@
 from parser import TagsSemantics as _TagsSemantics
 
 class TagsSemantics(_TagsSemantics):
-    def expression(self, ast):
-        if isinstance(ast, str):  # name
-            return ast
-        if isinstance(ast, unicode):  # name
-            return str(ast)
-        if isinstance(ast, list):
-            ast = tuple(ast)
-        elif not isinstance(ast, tuple):
-            raise TypeError("Expected a list, got %r %s" % (type(ast), ast))
-        if len(ast) == 3:
-            return (ast[1], ast[0], ast[2])
-        return ast
-
-    def expression_parens(self, ast):
+    def or_expression(self, ast):
+        return ('OR', ast[0], ast[2])
+
+    def and_expression(self, ast):
+        return ('AND', ast[0], ast[2])
+
+    def not_expression(self, ast):
+        return ('NOT', ast[1])
+
+    def parens_expression(self, ast):
         return ('PARENS', ast[1])
 
     def name(self, ast):
index b657738ad5f30a6ab29508f8364b212faffa4708..4a702edf2bbcfa7f0bfcecb1c47171580cefacb9 100644 (file)
 # Allowed operators: conjunction - & && AND and
 #                    disjunction - | || OR or
 #                    negation    - ! NOT not
+# Usual priority: NOT recognized before AND, AND before OR.
 # This  is a simple version of the grammar and it allows
-# rather stupid expressions, like !!TAG or ((TAG))
+# rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)).
 
 @@grammar :: Tags
 
 start = expression $ ;
 
-expression = (
-           | expression and_op expression
-           | expression or_op expression
-           | not_op expression
-           | expression_parens
-           | name ) ;
+expression = expression1 !&or_op | or_expression ;
 
-expression_parens = '(' expression ')' ;
+or_expression = expression1 or_op expression ;
 
-name = /[a-z][a-z0-9_]+/ ;
+and_expression = expression2 and_op expression1 ;
+
+not_expression = not_op expression3 ;
+
+parens_expression = '(' expression ')' ;
+
+expression1 = expression2 !&and_op | and_expression ;
 
-and_op = '&' | '&&' | 'AND' | 'and' ;
+expression2 = !&not_op expression3 | not_expression ;
 
-or_op = '|' | '||' | 'OR' | 'or' ;
+expression3 = parens_expression | name ;
+
+and_op = '&&' | '&' | 'AND' | 'and' ;
+
+or_op = '||' | '|' | 'OR' | 'or' ;
 
 not_op = '!' | 'NOT' | 'not' ;
+
+name = /[a-z][a-z0-9_]+/ ;
index 460a552e43691262d2fac6ba192c00c69fa783b2..4984f1862f4a4ef165dfc7ba05a3c263e2936056 100755 (executable)
@@ -17,7 +17,7 @@ from grako.parsing import graken, Parser
 from grako.util import re, RE_FLAGS, generic_main  # noqa
 
 
-__version__ = (2016, 7, 9, 18, 10, 4, 5)
+__version__ = (2016, 7, 10, 22, 31, 57, 6)
 
 __all__ = [
     'TagsParser',
@@ -58,42 +58,79 @@ class TagsParser(Parser):
 
     @graken()
     def _expression_(self):
-        with self._group():
-            with self._choice():
-                with self._option():
-                    self._expression_()
-                    self._and_op_()
-                    self._expression_()
-                with self._option():
-                    self._expression_()
-                    self._or_op_()
-                    self._expression_()
-                with self._option():
-                    self._not_op_()
-                    self._expression_()
-                with self._option():
-                    self._expression_parens_()
-                with self._option():
-                    self._name_()
-                self._error('no available options')
+        with self._choice():
+            with self._option():
+                self._expression1_()
+                with self._ifnot():
+                    with self._if():
+                        self._or_op_()
+            with self._option():
+                self._or_expression_()
+            self._error('no available options')
 
     @graken()
-    def _expression_parens_(self):
+    def _or_expression_(self):
+        self._expression1_()
+        self._or_op_()
+        self._expression_()
+
+    @graken()
+    def _and_expression_(self):
+        self._expression2_()
+        self._and_op_()
+        self._expression1_()
+
+    @graken()
+    def _not_expression_(self):
+        self._not_op_()
+        self._expression3_()
+
+    @graken()
+    def _parens_expression_(self):
         self._token('(')
         self._expression_()
         self._token(')')
 
     @graken()
-    def _name_(self):
-        self._pattern(r'[a-z][a-z0-9_]+')
+    def _expression1_(self):
+        with self._choice():
+            with self._option():
+                self._expression2_()
+                with self._ifnot():
+                    with self._if():
+                        self._and_op_()
+            with self._option():
+                self._and_expression_()
+            self._error('no available options')
 
     @graken()
-    def _and_op_(self):
+    def _expression2_(self):
         with self._choice():
             with self._option():
-                self._token('&')
+                with self._ifnot():
+                    with self._if():
+                        self._not_op_()
+                self._expression3_()
+            with self._option():
+                self._not_expression_()
+            self._error('no available options')
+
+    @graken()
+    def _expression3_(self):
+        with self._choice():
+            with self._option():
+                self._parens_expression_()
+            with self._option():
+                self._name_()
+            self._error('no available options')
+
+    @graken()
+    def _and_op_(self):
+        with self._choice():
             with self._option():
                 self._token('&&')
+            with self._option():
+                self._token('&')
             with self._option():
                 self._token('AND')
             with self._option():
@@ -103,10 +140,10 @@ class TagsParser(Parser):
     @graken()
     def _or_op_(self):
         with self._choice():
-            with self._option():
-                self._token('|')
             with self._option():
                 self._token('||')
+            with self._option():
+                self._token('|')
             with self._option():
                 self._token('OR')
             with self._option():
@@ -124,6 +161,10 @@ class TagsParser(Parser):
                 self._token('not')
             self._error('expecting one of: ! NOT not')
 
+    @graken()
+    def _name_(self):
+        self._pattern(r'[a-z][a-z0-9_]+')
+
 
 class TagsSemantics(object):
     def start(self, ast):
@@ -132,10 +173,25 @@ class TagsSemantics(object):
     def expression(self, ast):
         return ast
 
-    def expression_parens(self, ast):
+    def or_expression(self, ast):
         return ast
 
-    def name(self, ast):
+    def and_expression(self, ast):
+        return ast
+
+    def not_expression(self, ast):
+        return ast
+
+    def parens_expression(self, ast):
+        return ast
+
+    def expression1(self, ast):
+        return ast
+
+    def expression2(self, ast):
+        return ast
+
+    def expression3(self, ast):
         return ast
 
     def and_op(self, ast):
@@ -147,6 +203,9 @@ class TagsSemantics(object):
     def not_op(self, ast):
         return ast
 
+    def name(self, ast):
+        return ast
+
 
 def main(
         filename,
index 046c31c056b0f1bd7d11184597c8e64e55121f46..029fbd6fe576dd84e126f18739280b8e83b886d0 100755 (executable)
@@ -29,7 +29,10 @@ class TestParser(unittest.TestCase):
             ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy'))))
         )
         self.assertEqual(self._parse('!xxx&yyy&zzz|ooo'),
-            ('OR', ('AND', ('AND', ('NOT', ('NAME', 'xxx')), ('NAME', 'yyy')), ('NAME', 'zzz')), ('NAME', 'ooo'))
+            ('OR', ('AND',
+                ('NOT', ('NAME', 'xxx')),
+                ('AND', ('NAME', 'yyy'), ('NAME', 'zzz'))),
+            ('NAME', 'ooo'))
         )
         self.assertEqual(self._parse('!(xxx &&  yyy)'),
             ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy'))))