From fcd51c190450165c3315ed7e4f77bc1415db3d48 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 22 Apr 2017 01:08:51 +0300 Subject: [PATCH] Use parsimonious instead of grako --- parser/Makefile | 11 -- parser/build_ast.py | 27 ---- parser/grammar.ebnf | 34 ++--- parser/parser.py | 296 ++++++++---------------------------------- parser/test_parser.py | 30 +++-- 5 files changed, 91 insertions(+), 307 deletions(-) delete mode 100644 parser/build_ast.py mode change 100755 => 100644 parser/parser.py diff --git a/parser/Makefile b/parser/Makefile index cb44184..e45b151 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -1,14 +1,3 @@ -# Makefile. -# -# __author__ = "Oleg Broytman " -# __copyright__ = "Copyright (C) 2016 PhiloSoft Design" - -parser.py: grammar.ebnf - grako -o $@ $< - python -m compileall $@ - chmod +x $@ - - .PHONY: test test: parser.py ./test_parser.py diff --git a/parser/build_ast.py b/parser/build_ast.py deleted file mode 100644 index 42bf920..0000000 --- a/parser/build_ast.py +++ /dev/null @@ -1,27 +0,0 @@ - -from parser import TagsSemantics as _TagsSemantics - -class TagsSemantics(_TagsSemantics): - def or_expression(self, ast): - return ('OR', ast[0], ast[2]) - - def and_expression(self, ast): - return ('AND', ast[0], ast[2]) - - def not_expression(self, ast): - return ('NOT', ast[1]) - - def parens_expression(self, ast): - return ('PARENS', ast[1]) - - def name(self, ast): - return ('NAME', str(ast)) - - def and_op(self, ast): - return 'AND' - - def or_op(self, ast): - return 'OR' - - def not_op(self, ast): - return 'NOT' diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index ce21b64..935e6a4 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -1,4 +1,4 @@ -# Grammar rules for tag searching; EBNF. +# Grammar rules for tag searching # The grammar defines expressions in the following forms: # TAG - search blog posts that contain the tag; @@ -15,30 +15,34 @@ # This is a simple version of the grammar and it allows # rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)). -@@grammar :: Tags +expression = or_expression / aterm_expression -start = expression $ ; +or_expression = aterm_expression or_op expression -expression = expression1 !or_op | or_expression ; +and_expression = term_expression and_op aterm_expression -or_expression = expression1 or_op expression ; +not_expression = not_op space0 (parens_expression / name) -and_expression = expression2 and_op expression1 ; +aterm_expression = and_expression / term_expression -not_expression = not_op expression3 ; +term_expression = not_expression / parens_expression / (name space_b4letter) -parens_expression = '(' expression ')' ; +parens_expression = "(" space0 expression space0 ")" -expression1 = expression2 !and_op | and_expression ; +and_op = (space0 ("&&" / "&") space0) / (space0 ("AND" / "and") space_b4letter) -expression2 = !not_op expression3 | not_expression ; +or_op = (space0 ("||" / "|") space0) / (space0 ("OR" / "or") space_b4letter) -expression3 = parens_expression | name ; +not_op = (space0 "!" space0) / (space0 ("NOT" / "not") space_b4letter) -and_op = '&&' | '&' | 'AND' | 'and' ; +letter = ~"[a-z]"i -or_op = '||' | '|' | 'OR' | 'or' ; +name = ~"[a-z][a-z0-9_]*" -not_op = '!' | 'NOT' | 'not' ; +space_b4letter = (space1 &letter) / space0 -name = /[a-z][a-z0-9_]+/ ; +space0 = ~" *" + +space1 = ~" +" + +# vim: set ft=text : diff --git a/parser/parser.py b/parser/parser.py old mode 100755 new mode 100644 index a1dd034..dc562dd --- a/parser/parser.py +++ b/parser/parser.py @@ -1,242 +1,54 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# CAVEAT UTILITOR -# -# This file was automatically generated by Grako. -# -# https://pypi.python.org/pypi/grako/ -# -# Any changes you make to it will be overwritten the next time -# the file is generated. - - -from __future__ import print_function, division, absolute_import, unicode_literals - -from grako.parsing import graken, Parser -from grako.util import re, RE_FLAGS, generic_main # noqa - - -__version__ = (2016, 7, 11, 18, 15, 20, 0) - -__all__ = [ - 'TagsParser', - 'TagsSemantics', - 'main' -] - -KEYWORDS = set([]) - - -class TagsParser(Parser): - def __init__(self, - whitespace=None, - nameguard=None, - comments_re=None, - eol_comments_re=None, - ignorecase=None, - left_recursion=True, - keywords=KEYWORDS, - namechars='', - **kwargs): - super(TagsParser, self).__init__( - whitespace=whitespace, - nameguard=nameguard, - comments_re=comments_re, - eol_comments_re=eol_comments_re, - ignorecase=ignorecase, - left_recursion=left_recursion, - keywords=keywords, - namechars=namechars, - **kwargs - ) - - @graken() - def _start_(self): - self._expression_() - self._check_eof() - - @graken() - def _expression_(self): - with self._choice(): - with self._option(): - self._expression1_() - with self._ifnot(): - self._or_op_() - with self._option(): - self._or_expression_() - self._error('no available options') - - @graken() - def _or_expression_(self): - self._expression1_() - self._or_op_() - self._expression_() - - @graken() - def _and_expression_(self): - self._expression2_() - self._and_op_() - self._expression1_() - - @graken() - def _not_expression_(self): - self._not_op_() - self._expression3_() - - @graken() - def _parens_expression_(self): - self._token('(') - self._expression_() - self._token(')') - - @graken() - def _expression1_(self): - with self._choice(): - with self._option(): - self._expression2_() - with self._ifnot(): - self._and_op_() - with self._option(): - self._and_expression_() - self._error('no available options') - - @graken() - def _expression2_(self): - with self._choice(): - with self._option(): - with self._ifnot(): - self._not_op_() - self._expression3_() - with self._option(): - self._not_expression_() - self._error('no available options') - - @graken() - def _expression3_(self): - with self._choice(): - with self._option(): - self._parens_expression_() - with self._option(): - self._name_() - self._error('no available options') - - @graken() - def _and_op_(self): - with self._choice(): - with self._option(): - self._token('&&') - with self._option(): - self._token('&') - with self._option(): - self._token('AND') - with self._option(): - self._token('and') - self._error('expecting one of: & && AND and') - - @graken() - def _or_op_(self): - with self._choice(): - with self._option(): - self._token('||') - with self._option(): - self._token('|') - with self._option(): - self._token('OR') - with self._option(): - self._token('or') - self._error('expecting one of: OR or | ||') - - @graken() - def _not_op_(self): - with self._choice(): - with self._option(): - self._token('!') - with self._option(): - self._token('NOT') - with self._option(): - self._token('not') - self._error('expecting one of: ! NOT not') - - @graken() - def _name_(self): - self._pattern(r'[a-z][a-z0-9_]+') - - -class TagsSemantics(object): - def start(self, ast): - return ast - - def expression(self, ast): - return ast - - def or_expression(self, ast): - return ast - - def and_expression(self, ast): - return ast - - def not_expression(self, ast): - return ast - - def parens_expression(self, ast): - return ast - - def expression1(self, ast): - return ast - - def expression2(self, ast): - return ast - - def expression3(self, ast): - return ast - - def and_op(self, ast): - return ast - - def or_op(self, ast): - return ast - - def not_op(self, ast): - return ast - - def name(self, ast): - return ast - - -def main( - filename, - startrule, - trace=False, - whitespace=None, - nameguard=None, - comments_re=None, - eol_comments_re=None, - ignorecase=None, - left_recursion=True, - **kwargs): - - with open(filename) as f: - text = f.read() - whitespace = whitespace or None - parser = TagsParser(parseinfo=False) - ast = parser.parse( - text, - startrule, - filename=filename, - trace=trace, - whitespace=whitespace, - nameguard=nameguard, - ignorecase=ignorecase, - **kwargs) - return ast - -if __name__ == '__main__': - import json - ast = generic_main(main, TagsParser, name='Tags') - print('AST:') - print(ast) - print() - print('JSON:') - print(json.dumps(ast, indent=2)) - print() +import os +from parsimonious import Grammar, NodeVisitor + + +# cache +_grammar = None + + +def load_grammar(): + global _grammar + parser_dir = os.path.dirname(__file__) + with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: + grammar_text = grammar_file.read() + _grammar = Grammar(grammar_text) + + +def parse(input): + if _grammar is None: + load_grammar() + return _grammar.parse(input) + + +def cleanup_children(visited_children): + children = [c for c in visited_children if c] + if len(children) == 1: + return children[0] + else: + return children + + +class Compiler(NodeVisitor): + def generic_visit(self, node, visited_children): + return cleanup_children(visited_children) + + def visit_or_expression(self, node, visited_children): + return ('OR', visited_children[0], visited_children[2]) + + def visit_and_expression(self, node, visited_children): + return ('AND', visited_children[0], visited_children[2]) + + def visit_not_expression(self, node, visited_children): + return ('NOT', visited_children[2]) + + def visit_parens_expression(self, node, visited_children): + return ('PARENS', visited_children[2]) + + def visit_name(self, node, visited_children): + return ('NAME', node.text) + + +def compile(tree): + if isinstance(tree, str): + tree = parse(tree) + return Compiler().visit(tree) diff --git a/parser/test_parser.py b/parser/test_parser.py index 029fbd6..0a30eda 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -1,25 +1,18 @@ #! /usr/bin/env python - import unittest -from grako.exceptions import FailedParse - +from parsimonious import ParseError +from parser import compile class TestParser(unittest.TestCase): - def test_01_import(self): - global parser, TagsSemantics - from parser import TagsParser - from build_ast import TagsSemantics - parser = TagsParser(parseinfo=False) - def _parse(self, input): - return parser.parse(input, semantics=TagsSemantics()) + return compile(input) def test_02_tag(self): self.assertEqual(self._parse('xxx'), ('NAME', 'xxx')) def test_03_bad_tag(self): - self.assertRaises(FailedParse, self._parse, 'XXX') + self.assertRaises(ParseError, self._parse, 'XXX') def test_04_expression(self): self.assertEqual(self._parse('!(xxx&yyy)'), @@ -55,9 +48,22 @@ class TestParser(unittest.TestCase): self.assertEqual(self._parse('NOT xxx'), ('NOT', ('NAME', 'xxx')) ) + self.assertEqual(self._parse('NOT (xxx & yyy) AND zzz | ooo'), + ('OR', + ('AND', + ('NOT', + ('PARENS', + ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')) + ) + ), + ('NAME', 'zzz') + ), + ('NAME', 'ooo') + ) + ) def test_05_bad_expression(self): - self.assertRaises(FailedParse, self._parse, '!(xxx&yyy') + self.assertRaises(ParseError, self._parse, '!(xxx&yyy') if __name__ == "__main__": unittest.main() -- 2.39.2