X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parser%2Fparser.py;h=dbd054964820852fd530b4cfab0f1da2c8e79e30;hb=2e7b7f0cd4c51eb465780317446a81401eb71faa;hp=d278e4b210681817be55a9624083d3b7d62747a7;hpb=02dbb2a7f4d0804c56122cc7ca681224fe659abc;p=phdru.name%2Fcgi-bin%2Fblog-ru%2Fsearch-tags.git diff --git a/parser/parser.py b/parser/parser.py old mode 100644 new mode 100755 index d278e4b..dbd0549 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,107 +1,66 @@ -# Parse query - -from ply import lex, yacc - -literals = '()' - -tokens = ('NAME', 'AND_OP', 'OR_OP', 'NOT_OP', 'SP1') - -t_NAME = '[a-z][a-z0-9_]*' - -t_AND_OP = '&' - -t_OR_OP = r'\|' - -t_NOT_OP = '!' - -t_SP1 = '[ \t]+' - -def t_error(t): - """Avoid warnings on stderr""" - -lexer = lex.lex() - -def p_expression_name(p): - """expression : NAME""" - p[0] = ('NAME', p[1]) - -def p_expression_and_and(p): - """expression : expression SP0 AND_OP AND_OP SP0 expression""" - p[0] = ('AND', p[1], p[6]) - -def p_expression_and(p): - """expression : expression SP0 AND_OP SP0 expression""" - p[0] = ('AND', p[1], p[5]) - -def p_expression_and_word(p): - """expression : l_expression and_word r_expression""" - p[0] = ('AND', p[1], p[3]) - -def p_expression_not(p): - """expression : NOT_OP SP0 expression""" - p[0] = ('NOT', p[3]) - -def p_expression_or_or(p): - """expression : expression SP0 OR_OP OR_OP SP0 expression""" - p[0] = ('OR', p[1], p[6]) - -def p_expression_or(p): - """expression : expression SP0 OR_OP SP0 expression""" - p[0] = ('OR', p[1], p[5]) - -def p_expression_in_parens(p): - """expression : expression_parens""" - p[0] = p[1] - -def p_l_expression(p): - """l_expression : expression_parens - | expression SP1 - """ - if len(p) == 2: - p[0] = p[1] - elif len(p) == 3: - p[0] = p[1] - else: - raise ValueError(p) - -def p_r_expression(p): - """r_expression : expression_parens - | SP1 expression - """ - if len(p) == 2: - p[0] = p[1] - elif len(p) == 3: - p[0] = p[2] - else: - raise ValueError(p) - -def p_expression_parens(p): - """expression_parens : '(' SP0 expression SP0 ')'""" - p[0] = ('PARENS', p[3]) - -def p_and_word(p): - """and_word : NAME""" - if p[1] in ('AND', 'and'): - p[0] = p[1] - else: - raise SyntaxError - -def p_SP0(p): - """SP0 : SP1 - | empty - """ - -def p_empty(p): - """empty :""" - -def p_error(p): - """Avoid warnings on stderr""" - yacc.restart() - -precedence = ( - ('left', 'OR_OP'), - ('left', 'AND_OP'), - ('right', 'NOT_OP'), -) - -parser = yacc.yacc() +#! /usr/bin/env python + +import os +from lark import Lark, Transformer + + +class TagsTransformer(Transformer): + def and_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.and_expression(items[2:]) + return ('AND', left, right) + + def or_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.or_expression(items[2:]) + return ('OR', left, right) + + def not_expression(self, items): + return ('NOT', items[1]) + + def expression_parens(self, items): + return ('PARENS', items[0]) + + def name(self, name): + return ('NAME', name[0].value) + + +# cache +_grammar = None + + +def load_grammar(): + global _grammar + parser_dir = os.path.dirname(__file__) + with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: + grammar_text = grammar_file.read() + grammar_lines = [line for line in grammar_text.splitlines() + if not line.startswith('#')] + grammar_text = '\n'.join(grammar_lines) + _grammar = Lark(grammar_text) + + +def parse(input): + if _grammar is None: + load_grammar() + tree = _grammar.parse(input) + return TagsTransformer().transform(tree) + + +if __name__ == '__main__': + print '----------' + print parse('test') + print parse('!test') + print parse('not test') + print parse('foo or bar') + print parse('foo && bar') + print parse('foo && bar && baz') + print parse('!foo && bar && baz') + print parse('(test)') + print parse('(foo || bar)') + print parse('(foo and !bar)') + print '----------'