X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parser%2Fparser.py;h=dc562dd5043783f632499d180d283591ccf46ccf;hb=7ddae4701a4608b91ac32bce958468682c1b7960;hp=cf463da5b63433082a0fd360b23586871232506e;hpb=710a8bc3e3cfc6a7497d5e71a1e53fe07d2bd927;p=phdru.name%2Fcgi-bin%2Fblog-ru%2Fsearch-tags.git diff --git a/parser/parser.py b/parser/parser.py index cf463da..dc562dd 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,110 +1,54 @@ -# Parse query +import os +from parsimonious import Grammar, NodeVisitor -from ply import lex, yacc -literals = '()' +# cache +_grammar = None -tokens = ('NAME', 'AND_OP', 'OR_OP', 'NOT_OP', 'SP1') -t_NAME = '([a-z][a-z0-9_]*)|AND|OR|NOT' +def load_grammar(): + global _grammar + parser_dir = os.path.dirname(__file__) + with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: + grammar_text = grammar_file.read() + _grammar = Grammar(grammar_text) -t_AND_OP = '&' -t_OR_OP = r'\|' +def parse(input): + if _grammar is None: + load_grammar() + return _grammar.parse(input) -t_NOT_OP = '!' -t_SP1 = '[ \t]+' - -def t_error(t): - """Avoid warnings on stderr""" - -lexer = lex.lex() - -def p_expression_name(p): - """expression : NAME""" - p[0] = ('NAME', p[1]) - -def p_expression_and_and(p): - """expression : expression SP0 AND_OP AND_OP SP0 expression""" - p[0] = ('AND', p[1], p[6]) - -def p_expression_and(p): - """expression : expression SP0 AND_OP SP0 expression""" - p[0] = ('AND', p[1], p[5]) - -def p_expression_op_word(p): - """expression : l_expression op_word r_expression""" - if p[2] in ('AND', 'and'): - p[0] = ('AND', p[1], p[3]) - elif p[2] in ('OR', 'or'): - p[0] = ('OR', p[1], p[3]) - -def p_expression_or_or(p): - """expression : expression SP0 OR_OP OR_OP SP0 expression""" - p[0] = ('OR', p[1], p[6]) - -def p_expression_or(p): - """expression : expression SP0 OR_OP SP0 expression""" - p[0] = ('OR', p[1], p[5]) - -def p_expression_not(p): - """expression : NOT_OP SP0 expression""" - p[0] = ('NOT', p[3]) +def cleanup_children(visited_children): + children = [c for c in visited_children if c] + if len(children) == 1: + return children[0] + else: + return children -def p_expression_in_parens(p): - """expression : expression_parens""" - p[0] = p[1] -def p_l_expression(p): - """l_expression : expression_parens - | expression SP1 - """ - if len(p) == 2: - p[0] = p[1] - elif len(p) == 3: - p[0] = p[1] - else: - raise ValueError(p) - -def p_r_expression(p): - """r_expression : expression_parens - | SP1 expression - """ - if len(p) == 2: - p[0] = p[1] - elif len(p) == 3: - p[0] = p[2] - else: - raise ValueError(p) +class Compiler(NodeVisitor): + def generic_visit(self, node, visited_children): + return cleanup_children(visited_children) -def p_expression_parens(p): - """expression_parens : '(' SP0 expression SP0 ')'""" - p[0] = ('PARENS', p[3]) + def visit_or_expression(self, node, visited_children): + return ('OR', visited_children[0], visited_children[2]) -def p_op_word(p): - """op_word : NAME""" - if p[1] in ('AND', 'and', 'OR', 'or'): - p[0] = p[1] - else: - raise SyntaxError + def visit_and_expression(self, node, visited_children): + return ('AND', visited_children[0], visited_children[2]) -def p_SP0(p): - """SP0 : SP1 - | empty - """ + def visit_not_expression(self, node, visited_children): + return ('NOT', visited_children[2]) -def p_empty(p): - """empty :""" + def visit_parens_expression(self, node, visited_children): + return ('PARENS', visited_children[2]) -def p_error(p): - """Avoid warnings on stderr""" - yacc.restart() + def visit_name(self, node, visited_children): + return ('NAME', node.text) -precedence = ( - ('left', 'OR_OP'), - ('left', 'AND_OP'), - ('right', 'NOT_OP'), -) -parser = yacc.yacc() +def compile(tree): + if isinstance(tree, str): + tree = parse(tree) + return Compiler().visit(tree)