X-Git-Url: https://git.phdru.name/?a=blobdiff_plain;f=parser%2Fparser.py;h=dbd054964820852fd530b4cfab0f1da2c8e79e30;hb=HEAD;hp=a1dd03489c2db90ef315017fa2f7b47e3f27a630;hpb=4102600cf242f72781c8fee326802e9bb2267c25;p=phdru.name%2Fcgi-bin%2Fblog-ru%2Fsearch-tags.git diff --git a/parser/parser.py b/parser/parser.py index a1dd034..dbd0549 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,242 +1,66 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- +#! /usr/bin/env python -# CAVEAT UTILITOR -# -# This file was automatically generated by Grako. -# -# https://pypi.python.org/pypi/grako/ -# -# Any changes you make to it will be overwritten the next time -# the file is generated. +import os +from lark import Lark, Transformer -from __future__ import print_function, division, absolute_import, unicode_literals +class TagsTransformer(Transformer): + def and_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.and_expression(items[2:]) + return ('AND', left, right) -from grako.parsing import graken, Parser -from grako.util import re, RE_FLAGS, generic_main # noqa + def or_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.or_expression(items[2:]) + return ('OR', left, right) + def not_expression(self, items): + return ('NOT', items[1]) -__version__ = (2016, 7, 11, 18, 15, 20, 0) + def expression_parens(self, items): + return ('PARENS', items[0]) -__all__ = [ - 'TagsParser', - 'TagsSemantics', - 'main' -] + def name(self, name): + return ('NAME', name[0].value) -KEYWORDS = set([]) +# cache +_grammar = None -class TagsParser(Parser): - def __init__(self, - whitespace=None, - nameguard=None, - comments_re=None, - eol_comments_re=None, - ignorecase=None, - left_recursion=True, - keywords=KEYWORDS, - namechars='', - **kwargs): - super(TagsParser, self).__init__( - whitespace=whitespace, - nameguard=nameguard, - comments_re=comments_re, - eol_comments_re=eol_comments_re, - ignorecase=ignorecase, - left_recursion=left_recursion, - keywords=keywords, - namechars=namechars, - **kwargs - ) - @graken() - def _start_(self): - self._expression_() - self._check_eof() +def load_grammar(): + global _grammar + parser_dir = os.path.dirname(__file__) + with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: + grammar_text = grammar_file.read() + grammar_lines = [line for line in grammar_text.splitlines() + if not line.startswith('#')] + grammar_text = '\n'.join(grammar_lines) + _grammar = Lark(grammar_text) - @graken() - def _expression_(self): - with self._choice(): - with self._option(): - self._expression1_() - with self._ifnot(): - self._or_op_() - with self._option(): - self._or_expression_() - self._error('no available options') - @graken() - def _or_expression_(self): - self._expression1_() - self._or_op_() - self._expression_() +def parse(input): + if _grammar is None: + load_grammar() + tree = _grammar.parse(input) + return TagsTransformer().transform(tree) - @graken() - def _and_expression_(self): - self._expression2_() - self._and_op_() - self._expression1_() - - @graken() - def _not_expression_(self): - self._not_op_() - self._expression3_() - - @graken() - def _parens_expression_(self): - self._token('(') - self._expression_() - self._token(')') - - @graken() - def _expression1_(self): - with self._choice(): - with self._option(): - self._expression2_() - with self._ifnot(): - self._and_op_() - with self._option(): - self._and_expression_() - self._error('no available options') - - @graken() - def _expression2_(self): - with self._choice(): - with self._option(): - with self._ifnot(): - self._not_op_() - self._expression3_() - with self._option(): - self._not_expression_() - self._error('no available options') - - @graken() - def _expression3_(self): - with self._choice(): - with self._option(): - self._parens_expression_() - with self._option(): - self._name_() - self._error('no available options') - - @graken() - def _and_op_(self): - with self._choice(): - with self._option(): - self._token('&&') - with self._option(): - self._token('&') - with self._option(): - self._token('AND') - with self._option(): - self._token('and') - self._error('expecting one of: & && AND and') - - @graken() - def _or_op_(self): - with self._choice(): - with self._option(): - self._token('||') - with self._option(): - self._token('|') - with self._option(): - self._token('OR') - with self._option(): - self._token('or') - self._error('expecting one of: OR or | ||') - - @graken() - def _not_op_(self): - with self._choice(): - with self._option(): - self._token('!') - with self._option(): - self._token('NOT') - with self._option(): - self._token('not') - self._error('expecting one of: ! NOT not') - - @graken() - def _name_(self): - self._pattern(r'[a-z][a-z0-9_]+') - - -class TagsSemantics(object): - def start(self, ast): - return ast - - def expression(self, ast): - return ast - - def or_expression(self, ast): - return ast - - def and_expression(self, ast): - return ast - - def not_expression(self, ast): - return ast - - def parens_expression(self, ast): - return ast - - def expression1(self, ast): - return ast - - def expression2(self, ast): - return ast - - def expression3(self, ast): - return ast - - def and_op(self, ast): - return ast - - def or_op(self, ast): - return ast - - def not_op(self, ast): - return ast - - def name(self, ast): - return ast - - -def main( - filename, - startrule, - trace=False, - whitespace=None, - nameguard=None, - comments_re=None, - eol_comments_re=None, - ignorecase=None, - left_recursion=True, - **kwargs): - - with open(filename) as f: - text = f.read() - whitespace = whitespace or None - parser = TagsParser(parseinfo=False) - ast = parser.parse( - text, - startrule, - filename=filename, - trace=trace, - whitespace=whitespace, - nameguard=nameguard, - ignorecase=ignorecase, - **kwargs) - return ast if __name__ == '__main__': - import json - ast = generic_main(main, TagsParser, name='Tags') - print('AST:') - print(ast) - print() - print('JSON:') - print(json.dumps(ast, indent=2)) - print() + print '----------' + print parse('test') + print parse('!test') + print parse('not test') + print parse('foo or bar') + print parse('foo && bar') + print parse('foo && bar && baz') + print parse('!foo && bar && baz') + print parse('(test)') + print parse('(foo || bar)') + print parse('(foo and !bar)') + print '----------'