-# Grammar rules for tag searching; EBNF.
+# Grammar rules for tag searching
# The grammar defines expressions in the following forms:
# TAG - search blog posts that contain the tag;
# This is a simple version of the grammar and it allows
# rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)).
-@@grammar :: Tags
+expression = or_expression / aterm_expression
-start = expression $ ;
+or_expression = aterm_expression or_op expression
-expression = expression1 !or_op | or_expression ;
+and_expression = term_expression and_op aterm_expression
-or_expression = expression1 or_op expression ;
+not_expression = not_op space0 (parens_expression / name)
-and_expression = expression2 and_op expression1 ;
+aterm_expression = and_expression / term_expression
-not_expression = not_op expression3 ;
+term_expression = not_expression / parens_expression / (name space_b4letter)
-parens_expression = '(' expression ')' ;
+parens_expression = "(" space0 expression space0 ")"
-expression1 = expression2 !and_op | and_expression ;
+and_op = (space0 ("&&" / "&") space0) / (space0 ("AND" / "and") space_b4letter)
-expression2 = !not_op expression3 | not_expression ;
+or_op = (space0 ("||" / "|") space0) / (space0 ("OR" / "or") space_b4letter)
-expression3 = parens_expression | name ;
+not_op = (space0 "!" space0) / (space0 ("NOT" / "not") space_b4letter)
-and_op = '&&' | '&' | 'AND' | 'and' ;
+letter = ~"[a-z]"i
-or_op = '||' | '|' | 'OR' | 'or' ;
+name = ~"[a-z][a-z0-9_]*"
-not_op = '!' | 'NOT' | 'not' ;
+space_b4letter = (space1 &letter) / space0
-name = /[a-z][a-z0-9_]+/ ;
+space0 = ~" *"
+
+space1 = ~" +"
+
+# vim: set ft=text :
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# CAVEAT UTILITOR
-#
-# This file was automatically generated by Grako.
-#
-# https://pypi.python.org/pypi/grako/
-#
-# Any changes you make to it will be overwritten the next time
-# the file is generated.
-
-
-from __future__ import print_function, division, absolute_import, unicode_literals
-
-from grako.parsing import graken, Parser
-from grako.util import re, RE_FLAGS, generic_main # noqa
-
-
-__version__ = (2016, 7, 11, 18, 15, 20, 0)
-
-__all__ = [
- 'TagsParser',
- 'TagsSemantics',
- 'main'
-]
-
-KEYWORDS = set([])
-
-
-class TagsParser(Parser):
- def __init__(self,
- whitespace=None,
- nameguard=None,
- comments_re=None,
- eol_comments_re=None,
- ignorecase=None,
- left_recursion=True,
- keywords=KEYWORDS,
- namechars='',
- **kwargs):
- super(TagsParser, self).__init__(
- whitespace=whitespace,
- nameguard=nameguard,
- comments_re=comments_re,
- eol_comments_re=eol_comments_re,
- ignorecase=ignorecase,
- left_recursion=left_recursion,
- keywords=keywords,
- namechars=namechars,
- **kwargs
- )
-
- @graken()
- def _start_(self):
- self._expression_()
- self._check_eof()
-
- @graken()
- def _expression_(self):
- with self._choice():
- with self._option():
- self._expression1_()
- with self._ifnot():
- self._or_op_()
- with self._option():
- self._or_expression_()
- self._error('no available options')
-
- @graken()
- def _or_expression_(self):
- self._expression1_()
- self._or_op_()
- self._expression_()
-
- @graken()
- def _and_expression_(self):
- self._expression2_()
- self._and_op_()
- self._expression1_()
-
- @graken()
- def _not_expression_(self):
- self._not_op_()
- self._expression3_()
-
- @graken()
- def _parens_expression_(self):
- self._token('(')
- self._expression_()
- self._token(')')
-
- @graken()
- def _expression1_(self):
- with self._choice():
- with self._option():
- self._expression2_()
- with self._ifnot():
- self._and_op_()
- with self._option():
- self._and_expression_()
- self._error('no available options')
-
- @graken()
- def _expression2_(self):
- with self._choice():
- with self._option():
- with self._ifnot():
- self._not_op_()
- self._expression3_()
- with self._option():
- self._not_expression_()
- self._error('no available options')
-
- @graken()
- def _expression3_(self):
- with self._choice():
- with self._option():
- self._parens_expression_()
- with self._option():
- self._name_()
- self._error('no available options')
-
- @graken()
- def _and_op_(self):
- with self._choice():
- with self._option():
- self._token('&&')
- with self._option():
- self._token('&')
- with self._option():
- self._token('AND')
- with self._option():
- self._token('and')
- self._error('expecting one of: & && AND and')
-
- @graken()
- def _or_op_(self):
- with self._choice():
- with self._option():
- self._token('||')
- with self._option():
- self._token('|')
- with self._option():
- self._token('OR')
- with self._option():
- self._token('or')
- self._error('expecting one of: OR or | ||')
-
- @graken()
- def _not_op_(self):
- with self._choice():
- with self._option():
- self._token('!')
- with self._option():
- self._token('NOT')
- with self._option():
- self._token('not')
- self._error('expecting one of: ! NOT not')
-
- @graken()
- def _name_(self):
- self._pattern(r'[a-z][a-z0-9_]+')
-
-
-class TagsSemantics(object):
- def start(self, ast):
- return ast
-
- def expression(self, ast):
- return ast
-
- def or_expression(self, ast):
- return ast
-
- def and_expression(self, ast):
- return ast
-
- def not_expression(self, ast):
- return ast
-
- def parens_expression(self, ast):
- return ast
-
- def expression1(self, ast):
- return ast
-
- def expression2(self, ast):
- return ast
-
- def expression3(self, ast):
- return ast
-
- def and_op(self, ast):
- return ast
-
- def or_op(self, ast):
- return ast
-
- def not_op(self, ast):
- return ast
-
- def name(self, ast):
- return ast
-
-
-def main(
- filename,
- startrule,
- trace=False,
- whitespace=None,
- nameguard=None,
- comments_re=None,
- eol_comments_re=None,
- ignorecase=None,
- left_recursion=True,
- **kwargs):
-
- with open(filename) as f:
- text = f.read()
- whitespace = whitespace or None
- parser = TagsParser(parseinfo=False)
- ast = parser.parse(
- text,
- startrule,
- filename=filename,
- trace=trace,
- whitespace=whitespace,
- nameguard=nameguard,
- ignorecase=ignorecase,
- **kwargs)
- return ast
-
-if __name__ == '__main__':
- import json
- ast = generic_main(main, TagsParser, name='Tags')
- print('AST:')
- print(ast)
- print()
- print('JSON:')
- print(json.dumps(ast, indent=2))
- print()
+import os
+from parsimonious import Grammar, NodeVisitor
+
+
+# cache
+_grammar = None
+
+
+def load_grammar():
+ global _grammar
+ parser_dir = os.path.dirname(__file__)
+ with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file:
+ grammar_text = grammar_file.read()
+ _grammar = Grammar(grammar_text)
+
+
+def parse(input):
+ if _grammar is None:
+ load_grammar()
+ return _grammar.parse(input)
+
+
+def cleanup_children(visited_children):
+ children = [c for c in visited_children if c]
+ if len(children) == 1:
+ return children[0]
+ else:
+ return children
+
+
+class Compiler(NodeVisitor):
+ def generic_visit(self, node, visited_children):
+ return cleanup_children(visited_children)
+
+ def visit_or_expression(self, node, visited_children):
+ return ('OR', visited_children[0], visited_children[2])
+
+ def visit_and_expression(self, node, visited_children):
+ return ('AND', visited_children[0], visited_children[2])
+
+ def visit_not_expression(self, node, visited_children):
+ return ('NOT', visited_children[2])
+
+ def visit_parens_expression(self, node, visited_children):
+ return ('PARENS', visited_children[2])
+
+ def visit_name(self, node, visited_children):
+ return ('NAME', node.text)
+
+
+def compile(tree):
+ if isinstance(tree, str):
+ tree = parse(tree)
+ return Compiler().visit(tree)
#! /usr/bin/env python
-
import unittest
-from grako.exceptions import FailedParse
-
+from parsimonious import ParseError
+from parser import compile
class TestParser(unittest.TestCase):
- def test_01_import(self):
- global parser, TagsSemantics
- from parser import TagsParser
- from build_ast import TagsSemantics
- parser = TagsParser(parseinfo=False)
-
def _parse(self, input):
- return parser.parse(input, semantics=TagsSemantics())
+ return compile(input)
def test_02_tag(self):
self.assertEqual(self._parse('xxx'), ('NAME', 'xxx'))
def test_03_bad_tag(self):
- self.assertRaises(FailedParse, self._parse, 'XXX')
+ self.assertRaises(ParseError, self._parse, 'XXX')
def test_04_expression(self):
self.assertEqual(self._parse('!(xxx&yyy)'),
self.assertEqual(self._parse('NOT xxx'),
('NOT', ('NAME', 'xxx'))
)
+ self.assertEqual(self._parse('NOT (xxx & yyy) AND zzz | ooo'),
+ ('OR',
+ ('AND',
+ ('NOT',
+ ('PARENS',
+ ('AND', ('NAME', 'xxx'), ('NAME', 'yyy'))
+ )
+ ),
+ ('NAME', 'zzz')
+ ),
+ ('NAME', 'ooo')
+ )
+ )
def test_05_bad_expression(self):
- self.assertRaises(FailedParse, self._parse, '!(xxx&yyy')
+ self.assertRaises(ParseError, self._parse, '!(xxx&yyy')
if __name__ == "__main__":
unittest.main()