First try. Most tests work.
--- /dev/null
+# Makefile.
+#
+# __author__ = "Oleg Broytman <phd@phdru.name>"
+# __copyright__ = "Copyright (C) 2016 PhiloSoft Design"
+
+parser.py: grammar.ebnf
+ grako -o $@ $<
+ chmod +x $@
+
+
+.PHONY: test
+test: parser.py
+ ./test_parser.py
--- /dev/null
+
+from parser import TagsSemantics as _TagsSemantics
+
+class TagsSemantics(_TagsSemantics):
+ def expression(self, ast):
+ if isinstance(ast, str): # name
+ return ast
+ if isinstance(ast, unicode): # name
+ return str(ast)
+ if isinstance(ast, list):
+ ast = tuple(ast)
+ elif not isinstance(ast, tuple):
+ raise TypeError("Expected a list, got %r %s" % (type(ast), ast))
+ if len(ast) == 3:
+ return (ast[1], ast[0], ast[2])
+ return ast
+
+ def expression_parens(self, ast):
+ return ('PARENS', ast[1])
+
+ def name(self, ast):
+ return ('NAME', str(ast))
+
+ def and_op(self, ast):
+ return 'AND'
+
+ def or_op(self, ast):
+ return 'OR'
+
+ def not_op(self, ast):
+ return 'NOT'
+++ /dev/null
-# Grammar rules for tag searching; BNF.
-
-# The grammar defines expressions in the following forms:
-# TAG - search blog posts that contain the tag;
-# !TAG - search blog posts that don't contain the tag;
-# TAG & TAG - search blog posts that contain both tags;
-# TAG | TAG - search blog posts that contain any of the tags;
-# Parentheses are allowed to group expressions; for example:
-# TAG & (TAG | TAG)
-# !(TAG | TAG)
-# Allowed operators: conjunction - & && AND and
-# disjunction - | || OR or
-# negation - ! NOT not
-# This is a simple version of the grammar and it allows
-# rather stupid expressions, like !!TAG or ((TAG)); in the future
-# it will be fixed by making the grammar more complex and stricter.
-
-NAME : '[a-z][a-z0-9_]+'
-
-AND_OP : '&'
-
-OR_OP : '|'
-
-NOT_OP : '!'
-
-SP1 : '[ \t]+'
-
-expression : NAME
- | expression SP0 AND_OP AND_OP SP0 expression
- | expression SP0 AND_OP SP0 expression
- | l_expression and_word r_expression
- | expression SP0 OR_OP OR_OP SP0 expression
- | expression SP0 OR_OP SP0 expression
- | l_expression or_word r_expression
- | NOT_OP SP0 expression
- | not_word r_expression
- | expression_parens
-
-l_expression : expression_parens
- | expression_sp
-
-r_expression : expression_parens
- | sp_expression
-
-expression_parens : '(' SP0 expression SP0 ')'
-
-sp_expression : SP1 expression
-
-expression_sp : expression SP1
-
-and_word : 'A' 'N' 'D'
- | 'a' 'n' 'd'
-
-or_word : 'O' 'R'
- | 'o' 'r'
-
-not_word : 'N' 'O' 'T'
- | 'n' 'o' 't'
-
-SP0 : SP1 | empty
-
-empty :
--- /dev/null
+# Grammar rules for tag searching; EBNF.
+
+# The grammar defines expressions in the following forms:
+# TAG - search blog posts that contain the tag;
+# !TAG - search blog posts that don't contain the tag;
+# TAG & TAG - search blog posts that contain both tags;
+# TAG | TAG - search blog posts that contain any of the tags;
+# Parentheses are allowed to group expressions; for example:
+# TAG & (TAG | TAG)
+# !(TAG | TAG)
+# Allowed operators: conjunction - & && AND and
+# disjunction - | || OR or
+# negation - ! NOT not
+# This is a simple version of the grammar and it allows
+# rather stupid expressions, like !!TAG or ((TAG))
+
+@@grammar :: Tags
+
+start = expression $ ;
+
+expression = (
+ | expression and_op expression
+ | expression or_op expression
+ | not_op expression
+ | expression_parens
+ | name ) ;
+
+expression_parens = '(' expression ')' ;
+
+name = /[a-z][a-z0-9_]+/ ;
+
+and_op = '&' | '&&' | 'AND' | 'and' ;
+
+or_op = '|' | '||' | 'OR' | 'or' ;
+
+not_op = '!' | 'NOT' | 'not' ;
-# Parse query
-
-from ply import lex, yacc
-
-literals = '()'
-
-tokens = ('OP_WORD', 'NAME', 'AND_OP', 'OR_OP', 'NOT_OP', 'SP1')
-
-t_OP_WORD = '(AND|and|OR|or|NOT|not)'
-
-t_NAME = '([a-z][a-z0-9_]*)'
-
-t_AND_OP = '&'
-
-t_OR_OP = r'\|'
-
-t_NOT_OP = '!'
-
-t_SP1 = '[ \t]+'
-
-def t_error(t):
- """Avoid warnings on stderr"""
-
-lexer = lex.lex()
-
-def p_expression_name(p):
- """expression : NAME"""
- p[0] = ('NAME', p[1])
-
-def p_expression_and_and(p):
- """expression : expression SP0 AND_OP AND_OP SP0 expression"""
- p[0] = ('AND', p[1], p[6])
-
-def p_expression_and(p):
- """expression : expression SP0 AND_OP SP0 expression"""
- p[0] = ('AND', p[1], p[5])
-
-def p_expression_op_word(p):
- """expression : l_expression op_word r_expression"""
- if p[2] in ('AND', 'and'):
- p[0] = ('AND', p[1], p[3])
- elif p[2] in ('OR', 'or'):
- p[0] = ('OR', p[1], p[3])
- else:
- raise ValueError(p)
-
-def p_expression_or_or(p):
- """expression : expression SP0 OR_OP OR_OP SP0 expression"""
- p[0] = ('OR', p[1], p[6])
-
-def p_expression_or(p):
- """expression : expression SP0 OR_OP SP0 expression"""
- p[0] = ('OR', p[1], p[5])
-
-def p_expression_not(p):
- """expression : NOT_OP SP0 expression"""
- p[0] = ('NOT', p[3])
-
-def p_expression_not_word(p):
- """expression : op_word r_expression"""
- if p[1] in ('NOT', 'not'):
- p[0] = ('NOT', p[2])
- else:
- raise ValueError(p)
-
-def p_expression_in_parens(p):
- """expression : expression_parens"""
- p[0] = p[1]
-
-def p_l_expression(p):
- """l_expression : expression_parens
- | expression SP1
- """
- if len(p) == 2:
- p[0] = p[1]
- elif len(p) == 3:
- p[0] = p[1]
- else:
- raise ValueError(p)
-
-def p_r_expression(p):
- """r_expression : expression_parens
- | SP1 expression
- """
- if len(p) == 2:
- p[0] = p[1]
- elif len(p) == 3:
- p[0] = p[2]
- else:
- raise ValueError(p)
-
-def p_expression_parens(p):
- """expression_parens : '(' SP0 expression SP0 ')'"""
- p[0] = ('PARENS', p[3])
-
-def p_op_word(p):
- """op_word : OP_WORD"""
- if p[1] in ('AND', 'and', 'OR', 'or', 'NOT', 'not'):
- p[0] = p[1]
- else:
- raise SyntaxError
-
-def p_SP0(p):
- """SP0 : SP1
- | empty
- """
-
-def p_empty(p):
- """empty :"""
-
-def p_error(p):
- """Avoid warnings on stderr"""
- yacc.restart()
-
-precedence = (
- ('left', 'OR_OP'),
- ('left', 'AND_OP'),
- ('right', 'NOT_OP'),
-)
-
-parser = yacc.yacc()
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# CAVEAT UTILITOR
+#
+# This file was automatically generated by Grako.
+#
+# https://pypi.python.org/pypi/grako/
+#
+# Any changes you make to it will be overwritten the next time
+# the file is generated.
+
+
+from __future__ import print_function, division, absolute_import, unicode_literals
+
+from grako.parsing import graken, Parser
+from grako.util import re, RE_FLAGS, generic_main # noqa
+
+
+__version__ = (2016, 7, 9, 18, 10, 4, 5)
+
+__all__ = [
+ 'TagsParser',
+ 'TagsSemantics',
+ 'main'
+]
+
+KEYWORDS = set([])
+
+
+class TagsParser(Parser):
+ def __init__(self,
+ whitespace=None,
+ nameguard=None,
+ comments_re=None,
+ eol_comments_re=None,
+ ignorecase=None,
+ left_recursion=True,
+ keywords=KEYWORDS,
+ namechars='',
+ **kwargs):
+ super(TagsParser, self).__init__(
+ whitespace=whitespace,
+ nameguard=nameguard,
+ comments_re=comments_re,
+ eol_comments_re=eol_comments_re,
+ ignorecase=ignorecase,
+ left_recursion=left_recursion,
+ keywords=keywords,
+ namechars=namechars,
+ **kwargs
+ )
+
+ @graken()
+ def _start_(self):
+ self._expression_()
+ self._check_eof()
+
+ @graken()
+ def _expression_(self):
+ with self._group():
+ with self._choice():
+ with self._option():
+ self._expression_()
+ self._and_op_()
+ self._expression_()
+ with self._option():
+ self._expression_()
+ self._or_op_()
+ self._expression_()
+ with self._option():
+ self._not_op_()
+ self._expression_()
+ with self._option():
+ self._expression_parens_()
+ with self._option():
+ self._name_()
+ self._error('no available options')
+
+ @graken()
+ def _expression_parens_(self):
+ self._token('(')
+ self._expression_()
+ self._token(')')
+
+ @graken()
+ def _name_(self):
+ self._pattern(r'[a-z][a-z0-9_]+')
+
+ @graken()
+ def _and_op_(self):
+ with self._choice():
+ with self._option():
+ self._token('&')
+ with self._option():
+ self._token('&&')
+ with self._option():
+ self._token('AND')
+ with self._option():
+ self._token('and')
+ self._error('expecting one of: & && AND and')
+
+ @graken()
+ def _or_op_(self):
+ with self._choice():
+ with self._option():
+ self._token('|')
+ with self._option():
+ self._token('||')
+ with self._option():
+ self._token('OR')
+ with self._option():
+ self._token('or')
+ self._error('expecting one of: OR or | ||')
+
+ @graken()
+ def _not_op_(self):
+ with self._choice():
+ with self._option():
+ self._token('!')
+ with self._option():
+ self._token('NOT')
+ with self._option():
+ self._token('not')
+ self._error('expecting one of: ! NOT not')
+
+
+class TagsSemantics(object):
+ def start(self, ast):
+ return ast
+
+ def expression(self, ast):
+ return ast
+
+ def expression_parens(self, ast):
+ return ast
+
+ def name(self, ast):
+ return ast
+
+ def and_op(self, ast):
+ return ast
+
+ def or_op(self, ast):
+ return ast
+
+ def not_op(self, ast):
+ return ast
+
+
+def main(
+ filename,
+ startrule,
+ trace=False,
+ whitespace=None,
+ nameguard=None,
+ comments_re=None,
+ eol_comments_re=None,
+ ignorecase=None,
+ left_recursion=True,
+ **kwargs):
+
+ with open(filename) as f:
+ text = f.read()
+ whitespace = whitespace or None
+ parser = TagsParser(parseinfo=False)
+ ast = parser.parse(
+ text,
+ startrule,
+ filename=filename,
+ trace=trace,
+ whitespace=whitespace,
+ nameguard=nameguard,
+ ignorecase=ignorecase,
+ **kwargs)
+ return ast
+
+if __name__ == '__main__':
+ import json
+ ast = generic_main(main, TagsParser, name='Tags')
+ print('AST:')
+ print(ast)
+ print()
+ print('JSON:')
+ print(json.dumps(ast, indent=2))
+ print()
+++ /dev/null
-#! /usr/bin/env python
-
-import unittest
-
-class TestLexer(unittest.TestCase):
- def test_01_import(self):
- global lexer
- from parser import lexer
-
- def test_02_tag(self):
- global LexToken
- from ply.lex import LexToken
- lexer.input('xxx')
- tokens = list(lexer)
- self.assertEqual(len(tokens), 1)
- lextoken = tokens[0]
- self.assertEqual(lextoken.type, 'NAME')
- self.assertEqual(lextoken.value, 'xxx')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 0)
-
- def test_03_bad_tag(self):
- from ply.lex import LexError
- lexer.input('XXX')
- self.assertRaises(LexError, list, lexer)
-
- def test_04_expression(self):
- lexer.input('!(xxx&yyy)')
- tokens = list(lexer)
- self.assertEqual(len(tokens), 6)
- lextoken = tokens[0]
- self.assertEqual(lextoken.type, 'NOT_OP')
- self.assertEqual(lextoken.value, '!')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 0)
- lextoken = tokens[1]
- self.assertEqual(lextoken.type, '(')
- self.assertEqual(lextoken.value, '(')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 1)
- lextoken = tokens[2]
- self.assertEqual(lextoken.type, 'NAME')
- self.assertEqual(lextoken.value, 'xxx')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 2)
- lextoken = tokens[3]
- self.assertEqual(lextoken.type, 'AND_OP')
- self.assertEqual(lextoken.value, '&')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 5)
- lextoken = tokens[4]
- self.assertEqual(lextoken.type, 'NAME')
- self.assertEqual(lextoken.value, 'yyy')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 6)
- lextoken = tokens[5]
- self.assertEqual(lextoken.type, ')')
- self.assertEqual(lextoken.value, ')')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 9)
-
- def test_05_expression_2(self):
- lexer.input('xxx and yyy')
- tokens = list(lexer)
- self.assertEqual(len(tokens), 5)
- lextoken = tokens[0]
- self.assertEqual(lextoken.type, 'NAME')
- self.assertEqual(lextoken.value, 'xxx')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 0)
- lextoken = tokens[1]
- self.assertEqual(lextoken.type, 'SP1')
- self.assertEqual(lextoken.value, ' ')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 3)
- lextoken = tokens[2]
- self.assertEqual(lextoken.type, 'OP_WORD')
- self.assertEqual(lextoken.value, 'and')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 4)
- lextoken = tokens[3]
- self.assertEqual(lextoken.type, 'SP1')
- self.assertEqual(lextoken.value, ' ')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 7)
- lextoken = tokens[4]
- self.assertEqual(lextoken.type, 'NAME')
- self.assertEqual(lextoken.value, 'yyy')
- self.assertEqual(lextoken.lineno, 1)
- self.assertEqual(lextoken.lexpos, 9)
-
-if __name__ == "__main__":
- unittest.main()
#! /usr/bin/env python
+
import unittest
+from grako.exceptions import FailedParse
+
class TestParser(unittest.TestCase):
def test_01_import(self):
- global parser
- from parser import parser
+ global parser, TagsSemantics
+ from parser import TagsParser
+ from build_ast import TagsSemantics
+ parser = TagsParser(parseinfo=False)
def _parse(self, input):
- return parser.parse(input)
+ return parser.parse(input, semantics=TagsSemantics())
def test_02_tag(self):
self.assertEqual(self._parse('xxx'), ('NAME', 'xxx'))
def test_03_bad_tag(self):
- from ply.lex import LexError
- self.assertRaises(LexError, self._parse, 'XXX')
+ self.assertRaises(FailedParse, self._parse, 'XXX')
def test_04_expression(self):
self.assertEqual(self._parse('!(xxx&yyy)'),
)
def test_05_bad_expression(self):
- self.assertIs(self._parse('!(xxx&yyy'), None)
+ self.assertRaises(FailedParse, self._parse, '!(xxx&yyy')
if __name__ == "__main__":
unittest.main()