]> git.phdru.name Git - phdru.name/cgi-bin/blog-ru/search-tags.git/commitdiff
Use parsimonious instead of grako
authorOleg Broytman <phd@phdru.name>
Fri, 21 Apr 2017 22:08:51 +0000 (01:08 +0300)
committerOleg Broytman <phd@phdru.name>
Fri, 21 Apr 2017 22:45:35 +0000 (01:45 +0300)
parser/Makefile
parser/build_ast.py [deleted file]
parser/grammar.ebnf
parser/parser.py [changed mode: 0755->0644]
parser/test_parser.py

index cb441842e779254000adcc258c9f15d55b1dfe1d..e45b1516a65d79d3658a45c22dc18503bf513c2f 100644 (file)
@@ -1,14 +1,3 @@
-# Makefile.
-#
-# __author__ = "Oleg Broytman <phd@phdru.name>"
-# __copyright__ = "Copyright (C) 2016 PhiloSoft Design"
-
-parser.py: grammar.ebnf
-       grako -o $@ $<
-       python -m compileall $@
-       chmod +x $@
-
-
 .PHONY: test
 test: parser.py
        ./test_parser.py
diff --git a/parser/build_ast.py b/parser/build_ast.py
deleted file mode 100644 (file)
index 42bf920..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-
-from parser import TagsSemantics as _TagsSemantics
-
-class TagsSemantics(_TagsSemantics):
-    def or_expression(self, ast):
-        return ('OR', ast[0], ast[2])
-
-    def and_expression(self, ast):
-        return ('AND', ast[0], ast[2])
-
-    def not_expression(self, ast):
-        return ('NOT', ast[1])
-
-    def parens_expression(self, ast):
-        return ('PARENS', ast[1])
-
-    def name(self, ast):
-        return ('NAME', str(ast))
-
-    def and_op(self, ast):
-        return 'AND'
-
-    def or_op(self, ast):
-        return 'OR'
-
-    def not_op(self, ast):
-        return 'NOT'
index ce21b642621acec201493329aa4b1126d6dbfd01..935e6a4d3477f7bfa557fde4a28c5dca83786eaa 100644 (file)
@@ -1,4 +1,4 @@
-# Grammar rules for tag searching; EBNF.
+# Grammar rules for tag searching
 
 # The grammar defines expressions in the following forms:
 #  TAG - search blog posts that contain the tag;
 # This  is a simple version of the grammar and it allows
 # rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)).
 
-@@grammar :: Tags
+expression = or_expression / aterm_expression
 
-start = expression $ ;
+or_expression = aterm_expression or_op expression
 
-expression = expression1 !or_op | or_expression ;
+and_expression = term_expression and_op aterm_expression
 
-or_expression = expression1 or_op expression ;
+not_expression = not_op space0 (parens_expression / name)
 
-and_expression = expression2 and_op expression1 ;
+aterm_expression = and_expression / term_expression
 
-not_expression = not_op expression3 ;
+term_expression = not_expression / parens_expression / (name space_b4letter)
 
-parens_expression = '(' expression ')' ;
+parens_expression = "(" space0 expression space0 ")"
 
-expression1 = expression2 !and_op | and_expression ;
+and_op = (space0 ("&&" / "&") space0) / (space0 ("AND" / "and") space_b4letter)
 
-expression2 = !not_op expression3 | not_expression ;
+or_op = (space0 ("||" / "|") space0) / (space0 ("OR" / "or") space_b4letter)
 
-expression3 = parens_expression | name ;
+not_op = (space0 "!" space0) / (space0 ("NOT" / "not") space_b4letter)
 
-and_op = '&&' | '&' | 'AND' | 'and' ;
+letter = ~"[a-z]"i
 
-or_op = '||' | '|' | 'OR' | 'or' ;
+name = ~"[a-z][a-z0-9_]*"
 
-not_op = '!' | 'NOT' | 'not' ;
+space_b4letter = (space1 &letter) / space0
 
-name = /[a-z][a-z0-9_]+/ ;
+space0 = ~" *"
+
+space1 = ~" +"
+
+# vim: set ft=text :
old mode 100755 (executable)
new mode 100644 (file)
index a1dd034..dc562dd
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# CAVEAT UTILITOR
-#
-# This file was automatically generated by Grako.
-#
-#    https://pypi.python.org/pypi/grako/
-#
-# Any changes you make to it will be overwritten the next time
-# the file is generated.
-
-
-from __future__ import print_function, division, absolute_import, unicode_literals
-
-from grako.parsing import graken, Parser
-from grako.util import re, RE_FLAGS, generic_main  # noqa
-
-
-__version__ = (2016, 7, 11, 18, 15, 20, 0)
-
-__all__ = [
-    'TagsParser',
-    'TagsSemantics',
-    'main'
-]
-
-KEYWORDS = set([])
-
-
-class TagsParser(Parser):
-    def __init__(self,
-                 whitespace=None,
-                 nameguard=None,
-                 comments_re=None,
-                 eol_comments_re=None,
-                 ignorecase=None,
-                 left_recursion=True,
-                 keywords=KEYWORDS,
-                 namechars='',
-                 **kwargs):
-        super(TagsParser, self).__init__(
-            whitespace=whitespace,
-            nameguard=nameguard,
-            comments_re=comments_re,
-            eol_comments_re=eol_comments_re,
-            ignorecase=ignorecase,
-            left_recursion=left_recursion,
-            keywords=keywords,
-            namechars=namechars,
-            **kwargs
-        )
-
-    @graken()
-    def _start_(self):
-        self._expression_()
-        self._check_eof()
-
-    @graken()
-    def _expression_(self):
-        with self._choice():
-            with self._option():
-                self._expression1_()
-                with self._ifnot():
-                    self._or_op_()
-            with self._option():
-                self._or_expression_()
-            self._error('no available options')
-
-    @graken()
-    def _or_expression_(self):
-        self._expression1_()
-        self._or_op_()
-        self._expression_()
-
-    @graken()
-    def _and_expression_(self):
-        self._expression2_()
-        self._and_op_()
-        self._expression1_()
-
-    @graken()
-    def _not_expression_(self):
-        self._not_op_()
-        self._expression3_()
-
-    @graken()
-    def _parens_expression_(self):
-        self._token('(')
-        self._expression_()
-        self._token(')')
-
-    @graken()
-    def _expression1_(self):
-        with self._choice():
-            with self._option():
-                self._expression2_()
-                with self._ifnot():
-                    self._and_op_()
-            with self._option():
-                self._and_expression_()
-            self._error('no available options')
-
-    @graken()
-    def _expression2_(self):
-        with self._choice():
-            with self._option():
-                with self._ifnot():
-                    self._not_op_()
-                self._expression3_()
-            with self._option():
-                self._not_expression_()
-            self._error('no available options')
-
-    @graken()
-    def _expression3_(self):
-        with self._choice():
-            with self._option():
-                self._parens_expression_()
-            with self._option():
-                self._name_()
-            self._error('no available options')
-
-    @graken()
-    def _and_op_(self):
-        with self._choice():
-            with self._option():
-                self._token('&&')
-            with self._option():
-                self._token('&')
-            with self._option():
-                self._token('AND')
-            with self._option():
-                self._token('and')
-            self._error('expecting one of: & && AND and')
-
-    @graken()
-    def _or_op_(self):
-        with self._choice():
-            with self._option():
-                self._token('||')
-            with self._option():
-                self._token('|')
-            with self._option():
-                self._token('OR')
-            with self._option():
-                self._token('or')
-            self._error('expecting one of: OR or | ||')
-
-    @graken()
-    def _not_op_(self):
-        with self._choice():
-            with self._option():
-                self._token('!')
-            with self._option():
-                self._token('NOT')
-            with self._option():
-                self._token('not')
-            self._error('expecting one of: ! NOT not')
-
-    @graken()
-    def _name_(self):
-        self._pattern(r'[a-z][a-z0-9_]+')
-
-
-class TagsSemantics(object):
-    def start(self, ast):
-        return ast
-
-    def expression(self, ast):
-        return ast
-
-    def or_expression(self, ast):
-        return ast
-
-    def and_expression(self, ast):
-        return ast
-
-    def not_expression(self, ast):
-        return ast
-
-    def parens_expression(self, ast):
-        return ast
-
-    def expression1(self, ast):
-        return ast
-
-    def expression2(self, ast):
-        return ast
-
-    def expression3(self, ast):
-        return ast
-
-    def and_op(self, ast):
-        return ast
-
-    def or_op(self, ast):
-        return ast
-
-    def not_op(self, ast):
-        return ast
-
-    def name(self, ast):
-        return ast
-
-
-def main(
-        filename,
-        startrule,
-        trace=False,
-        whitespace=None,
-        nameguard=None,
-        comments_re=None,
-        eol_comments_re=None,
-        ignorecase=None,
-        left_recursion=True,
-        **kwargs):
-
-    with open(filename) as f:
-        text = f.read()
-    whitespace = whitespace or None
-    parser = TagsParser(parseinfo=False)
-    ast = parser.parse(
-        text,
-        startrule,
-        filename=filename,
-        trace=trace,
-        whitespace=whitespace,
-        nameguard=nameguard,
-        ignorecase=ignorecase,
-        **kwargs)
-    return ast
-
-if __name__ == '__main__':
-    import json
-    ast = generic_main(main, TagsParser, name='Tags')
-    print('AST:')
-    print(ast)
-    print()
-    print('JSON:')
-    print(json.dumps(ast, indent=2))
-    print()
+import os
+from parsimonious import Grammar, NodeVisitor
+
+
+# cache
+_grammar = None
+
+
+def load_grammar():
+    global _grammar
+    parser_dir = os.path.dirname(__file__)
+    with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file:
+        grammar_text = grammar_file.read()
+    _grammar = Grammar(grammar_text)
+
+
+def parse(input):
+    if _grammar is None:
+        load_grammar()
+    return _grammar.parse(input)
+
+
+def cleanup_children(visited_children):
+    children = [c for c in visited_children if c]
+    if len(children) == 1:
+        return children[0]
+    else:
+        return children
+
+
+class Compiler(NodeVisitor):
+    def generic_visit(self, node, visited_children):
+        return cleanup_children(visited_children)
+
+    def visit_or_expression(self, node, visited_children):
+        return ('OR', visited_children[0], visited_children[2])
+
+    def visit_and_expression(self, node, visited_children):
+        return ('AND', visited_children[0], visited_children[2])
+
+    def visit_not_expression(self, node, visited_children):
+        return ('NOT', visited_children[2])
+
+    def visit_parens_expression(self, node, visited_children):
+        return ('PARENS', visited_children[2])
+
+    def visit_name(self, node, visited_children):
+        return ('NAME', node.text)
+
+
+def compile(tree):
+    if isinstance(tree, str):
+        tree = parse(tree)
+    return Compiler().visit(tree)
index 029fbd6fe576dd84e126f18739280b8e83b886d0..0a30eda4217dd655b99c272b6001151c598fd6e6 100755 (executable)
@@ -1,25 +1,18 @@
 #! /usr/bin/env python
 
-
 import unittest
-from grako.exceptions import FailedParse
-
+from parsimonious import ParseError
+from parser import compile
 
 class TestParser(unittest.TestCase):
-    def test_01_import(self):
-        global parser, TagsSemantics
-        from parser import TagsParser
-        from build_ast import TagsSemantics
-        parser = TagsParser(parseinfo=False)
-
     def _parse(self, input):
-        return parser.parse(input, semantics=TagsSemantics())
+        return compile(input)
 
     def test_02_tag(self):
         self.assertEqual(self._parse('xxx'), ('NAME', 'xxx'))
 
     def test_03_bad_tag(self):
-        self.assertRaises(FailedParse, self._parse, 'XXX')
+        self.assertRaises(ParseError, self._parse, 'XXX')
 
     def test_04_expression(self):
         self.assertEqual(self._parse('!(xxx&yyy)'),
@@ -55,9 +48,22 @@ class TestParser(unittest.TestCase):
         self.assertEqual(self._parse('NOT xxx'),
             ('NOT', ('NAME', 'xxx'))
         )
+        self.assertEqual(self._parse('NOT (xxx & yyy) AND zzz | ooo'),
+            ('OR',
+                ('AND',
+                    ('NOT',
+                        ('PARENS',
+                            ('AND', ('NAME', 'xxx'), ('NAME', 'yyy'))
+                        )
+                    ),
+                    ('NAME', 'zzz')
+                ),
+                ('NAME', 'ooo')
+            )
+        )
 
     def test_05_bad_expression(self):
-        self.assertRaises(FailedParse, self._parse, '!(xxx&yyy')
+        self.assertRaises(ParseError, self._parse, '!(xxx&yyy')
 
 if __name__ == "__main__":
     unittest.main()