From: Oleg Broytman Date: Thu, 14 Dec 2017 11:27:22 +0000 (+0300) Subject: Version 0.7: Use lark instead of parsley X-Git-Tag: v0.7^0 X-Git-Url: https://git.phdru.name/?p=phdru.name%2Fcgi-bin%2Fblog-ru%2Fsearch-tags.git;a=commitdiff_plain;h=2e7b7f0cd4c51eb465780317446a81401eb71faa Version 0.7: Use lark instead of parsley --- diff --git a/ChangeLog b/ChangeLog index 9b7a3dc..79857bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Version 0.7 (2017-12-14) + + Use lark instead of parsley. + Version 0.6 (2017-05-20) Use parsley instead of parsimonious. diff --git a/TODO b/TODO index eda6989..06ad45b 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,3 @@ -Use lark instead of parsley. - - Sort posts by date or tags. diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index 4375257..3be4d16 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -1,4 +1,4 @@ -# Grammar rules for tag searching +# Grammar rules for tag searching; EBNF. # The grammar defines expressions in the following forms: # TAG - search blog posts that contain the tag; @@ -11,41 +11,58 @@ # Allowed operators: conjunction - & && AND and # disjunction - | || OR or # negation - ! NOT not -# Usual priority: NOT recognized before AND, AND before OR. # This is a simple version of the grammar and it allows -# rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)). +# rather stupid expressions, like !!TAG or ((TAG)); in the future +# it will be fixed by making the grammar more complex and stricter. -expression = inner_expression:e end -> e +?start : expression -inner_expression = (or_expression | aterm_expression):e -> e +?expression : or_expression + | and_expression + | and_sub_expression -or_expression = aterm_expression:a or_op inner_expression:e -> ('OR', a, e) +or_expression : or_sub_expression (or or_sub_expression)+ -and_expression = term_expression:t and_op aterm_expression:a -> ('AND', -t, a) +?or_sub_expression : and_expression + | and_sub_expression -not_expression = not_op ws (parens_expression | name):n -> ('NOT', n) +and_expression : and_sub_expression (and and_sub_expression)+ -aterm_expression = (and_expression | term_expression):e -> e +?and_sub_expression : not_expression + | expression_parens + | name -term_expression = (not_expression:e -> e) | (parens_expression:p -> p) | (name:n space_b4letter -> n) +not_expression: not and_sub_expression -parens_expression = '(' ws inner_expression:e ws ')' -> ('PARENS', e) +expression_parens : "(" expression ")" -and_op = (ws ('&&' | '&') ws) | (ws ('AND' | 'and') space_b4letter) +name : /[a-z][a-z0-9_]+/ -or_op = (ws ('||' | '|') ws) | (ws ('OR' | 'or') space_b4letter) +?and : and_op + | and_op and_op + | and_word -not_op = (ws '!' ws) | (ws ('NOT' | 'not') space_b4letter) +?or : or_op + | or_op or_op + | or_word -name = :n -> ('NAME', n) +?not : not_op + | not_word -lletter = :l ?(l in 'abcdefghijklmnopqrstuvwxyz') -> l +?and_op : "&" -digit = :d ?(d in '0123456789') -> d +?or_op : "|" -lletterOrDigit = (lletter | digit):c -> c +?not_op : "!" -space_b4letter = (' '+ ~~letter) | ws +?and_word : "AND" + | "and" -# vim: set ft=text : +?or_word : "OR" + | "or" + +?not_word : "NOT" + | "not" + +%import common.WS +%ignore WS diff --git a/parser/parser.py b/parser/parser.py index 3f51d3f..dbd0549 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,7 +1,32 @@ #! /usr/bin/env python import os -from parsley import makeGrammar +from lark import Lark, Transformer + + +class TagsTransformer(Transformer): + def and_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.and_expression(items[2:]) + return ('AND', left, right) + + def or_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.or_expression(items[2:]) + return ('OR', left, right) + + def not_expression(self, items): + return ('NOT', items[1]) + + def expression_parens(self, items): + return ('PARENS', items[0]) + + def name(self, name): + return ('NAME', name[0].value) # cache @@ -13,21 +38,29 @@ def load_grammar(): parser_dir = os.path.dirname(__file__) with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: grammar_text = grammar_file.read() - _grammar = makeGrammar(grammar_text, {}, 'Tags') + grammar_lines = [line for line in grammar_text.splitlines() + if not line.startswith('#')] + grammar_text = '\n'.join(grammar_lines) + _grammar = Lark(grammar_text) def parse(input): if _grammar is None: load_grammar() - return _grammar(input).expression() + tree = _grammar.parse(input) + return TagsTransformer().transform(tree) if __name__ == '__main__': + print '----------' print parse('test') print parse('!test') print parse('not test') print parse('foo or bar') print parse('foo && bar') + print parse('foo && bar && baz') + print parse('!foo && bar && baz') print parse('(test)') print parse('(foo || bar)') print parse('(foo and !bar)') + print '----------' diff --git a/parser/test_parser.py b/parser/test_parser.py index 67c0ae3..8783de5 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -1,7 +1,7 @@ #! /usr/bin/env python import unittest -from ometa.runtime import ParseError +from lark import ParseError from parser import parse class TestParser(unittest.TestCase): diff --git a/search-tags.py b/search-tags.py index 4f6810a..c2884fa 100755 --- a/search-tags.py +++ b/search-tags.py @@ -7,7 +7,7 @@ __copyright__ = "Copyright (C) 2014-2017 PhiloSoft Design" __license__ = "GNU GPL" import cgi, sys -from ometa.runtime import ParseError +from lark import ParseError from html.response import redirect, response from parser import parser