From e329b871b1f4b97b62eaa2aa33c638886c5834df Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 20 May 2014 01:02:04 +0400 Subject: [PATCH] Add parser --- .gitignore | 2 ++ grammar | 27 +++++++++++++++++++++++++++ parse_query.py | 32 ++++++++++++++++++++++++++++++++ test_parser.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 grammar create mode 100755 test_parser.py diff --git a/.gitignore b/.gitignore index 8d44156..bf5a5a2 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /*.py[co] +/parser.out +/parsetab.py diff --git a/grammar b/grammar new file mode 100644 index 0000000..7ded1c2 --- /dev/null +++ b/grammar @@ -0,0 +1,27 @@ +# Grammar rules for tag searching; BNF. + +# The grammar defines expressions in the following forms: +# TAG - search blog posts that contain the tag; +# !TAG - search blog posts that don't contain the tag; +# TAG & TAG - search blog posts that contain both tags; +# TAG | TAG - search blog posts that contain one of the tags or both; +# Parentheses are allowed to group expressions: +# TAG & (TAG | TAG) +# !(TAG | TAG) +# and so on. This is the first version of the grammar and it allows +# rather stupid expressions, like !!TAG or ((TAG)); in the future +# it will be fixed by making the grammar stricter and more complex. + +NAME : '[a-z][a-z0-9_]+' + +AND_OP : '&' + +OR_OP : '|' + +NOT_OP : '!' + +expression : NAME + | expression AND_OP expression + | NOT_OP expression + | expression OR_OP expression + | '(' expression ')' diff --git a/parse_query.py b/parse_query.py index 4533413..c74eb78 100644 --- a/parse_query.py +++ b/parse_query.py @@ -18,3 +18,35 @@ def t_error(t): """Avoid warnings on stderr""" lexer = lex.lex() + +def p_expression_name(p): + """expression : NAME""" + p[0] = ('NAME', p[1]) + +def p_expression_and(p): + """expression : expression AND_OP expression""" + p[0] = ('AND', p[1], p[3]) + +def p_expression_not(p): + """expression : NOT_OP expression""" + p[0] = ('NOT', p[2]) + +def p_expression_or(p): + """expression : expression OR_OP expression""" + p[0] = ('OR', p[1], p[3]) + +def p_expression_parens(p): + """expression : '(' expression ')'""" + p[0] = ('PARENS', p[2]) + +def p_error(p): + """Avoid warnings on stderr""" + yacc.restart() + +precedence = ( + ('left', 'OR_OP'), + ('left', 'AND_OP'), + ('right', 'NOT_OP'), +) + +parser = yacc.yacc() diff --git a/test_parser.py b/test_parser.py new file mode 100755 index 0000000..e8f213a --- /dev/null +++ b/test_parser.py @@ -0,0 +1,31 @@ +#! /usr/bin/env python + +import unittest + +class TestParser(unittest.TestCase): + def test_01_import(self): + global parser + from parse_query import parser + + def test_02_tag(self): + self.assertEqual(parser.parse('xxx'), ('NAME', 'xxx')) + + def test_03_bad_tag(self): + from ply.lex import LexError + self.assertRaises(LexError, parser.parse, 'XXX') + + def test_04_expression(self): + self.assertEqual(parser.parse('!(xxx&yyy)'), + ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) + ) + + def test_05_bad_expression(self): + self.assertIs(parser.parse('!(xxx&yyy'), None) + + def test_06_expression2(self): + self.assertEqual(parser.parse('!xxx&yyy&zzz|ooo'), + ('OR', ('AND', ('AND', ('NOT', ('NAME', 'xxx')), ('NAME', 'yyy')), ('NAME', 'zzz')), ('NAME', 'ooo')) + ) + +if __name__ == "__main__": + unittest.main() -- 2.39.2