From 102d30993b1698195ea44db3a6e3c2d9402564fa Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sat, 20 May 2017 20:52:38 +0300 Subject: [PATCH 01/16] Use parsley: done --- TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO b/TODO index c77d12f..eda6989 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,3 @@ -Use parsley instead of parsimonious. - - Use lark instead of parsley. -- 2.39.2 From 2e7b7f0cd4c51eb465780317446a81401eb71faa Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 14 Dec 2017 14:27:22 +0300 Subject: [PATCH 02/16] Version 0.7: Use lark instead of parsley --- ChangeLog | 4 +++ TODO | 3 --- parser/grammar.ebnf | 59 ++++++++++++++++++++++++++++--------------- parser/parser.py | 39 +++++++++++++++++++++++++--- parser/test_parser.py | 2 +- search-tags.py | 2 +- 6 files changed, 80 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9b7a3dc..79857bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Version 0.7 (2017-12-14) + + Use lark instead of parsley. + Version 0.6 (2017-05-20) Use parsley instead of parsimonious. diff --git a/TODO b/TODO index eda6989..06ad45b 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,3 @@ -Use lark instead of parsley. - - Sort posts by date or tags. diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index 4375257..3be4d16 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -1,4 +1,4 @@ -# Grammar rules for tag searching +# Grammar rules for tag searching; EBNF. # The grammar defines expressions in the following forms: # TAG - search blog posts that contain the tag; @@ -11,41 +11,58 @@ # Allowed operators: conjunction - & && AND and # disjunction - | || OR or # negation - ! NOT not -# Usual priority: NOT recognized before AND, AND before OR. # This is a simple version of the grammar and it allows -# rather stupid expressions, like (TAG) or ((TAG)) or !(!(TAG)). +# rather stupid expressions, like !!TAG or ((TAG)); in the future +# it will be fixed by making the grammar more complex and stricter. -expression = inner_expression:e end -> e +?start : expression -inner_expression = (or_expression | aterm_expression):e -> e +?expression : or_expression + | and_expression + | and_sub_expression -or_expression = aterm_expression:a or_op inner_expression:e -> ('OR', a, e) +or_expression : or_sub_expression (or or_sub_expression)+ -and_expression = term_expression:t and_op aterm_expression:a -> ('AND', -t, a) +?or_sub_expression : and_expression + | and_sub_expression -not_expression = not_op ws (parens_expression | name):n -> ('NOT', n) +and_expression : and_sub_expression (and and_sub_expression)+ -aterm_expression = (and_expression | term_expression):e -> e +?and_sub_expression : not_expression + | expression_parens + | name -term_expression = (not_expression:e -> e) | (parens_expression:p -> p) | (name:n space_b4letter -> n) +not_expression: not and_sub_expression -parens_expression = '(' ws inner_expression:e ws ')' -> ('PARENS', e) +expression_parens : "(" expression ")" -and_op = (ws ('&&' | '&') ws) | (ws ('AND' | 'and') space_b4letter) +name : /[a-z][a-z0-9_]+/ -or_op = (ws ('||' | '|') ws) | (ws ('OR' | 'or') space_b4letter) +?and : and_op + | and_op and_op + | and_word -not_op = (ws '!' ws) | (ws ('NOT' | 'not') space_b4letter) +?or : or_op + | or_op or_op + | or_word -name = :n -> ('NAME', n) +?not : not_op + | not_word -lletter = :l ?(l in 'abcdefghijklmnopqrstuvwxyz') -> l +?and_op : "&" -digit = :d ?(d in '0123456789') -> d +?or_op : "|" -lletterOrDigit = (lletter | digit):c -> c +?not_op : "!" -space_b4letter = (' '+ ~~letter) | ws +?and_word : "AND" + | "and" -# vim: set ft=text : +?or_word : "OR" + | "or" + +?not_word : "NOT" + | "not" + +%import common.WS +%ignore WS diff --git a/parser/parser.py b/parser/parser.py index 3f51d3f..dbd0549 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,7 +1,32 @@ #! /usr/bin/env python import os -from parsley import makeGrammar +from lark import Lark, Transformer + + +class TagsTransformer(Transformer): + def and_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.and_expression(items[2:]) + return ('AND', left, right) + + def or_expression(self, items): + left = items[0] + right = items[2] + if len(items) > 3: + right = self.or_expression(items[2:]) + return ('OR', left, right) + + def not_expression(self, items): + return ('NOT', items[1]) + + def expression_parens(self, items): + return ('PARENS', items[0]) + + def name(self, name): + return ('NAME', name[0].value) # cache @@ -13,21 +38,29 @@ def load_grammar(): parser_dir = os.path.dirname(__file__) with open(os.path.join(parser_dir, 'grammar.ebnf'), 'rt') as grammar_file: grammar_text = grammar_file.read() - _grammar = makeGrammar(grammar_text, {}, 'Tags') + grammar_lines = [line for line in grammar_text.splitlines() + if not line.startswith('#')] + grammar_text = '\n'.join(grammar_lines) + _grammar = Lark(grammar_text) def parse(input): if _grammar is None: load_grammar() - return _grammar(input).expression() + tree = _grammar.parse(input) + return TagsTransformer().transform(tree) if __name__ == '__main__': + print '----------' print parse('test') print parse('!test') print parse('not test') print parse('foo or bar') print parse('foo && bar') + print parse('foo && bar && baz') + print parse('!foo && bar && baz') print parse('(test)') print parse('(foo || bar)') print parse('(foo and !bar)') + print '----------' diff --git a/parser/test_parser.py b/parser/test_parser.py index 67c0ae3..8783de5 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -1,7 +1,7 @@ #! /usr/bin/env python import unittest -from ometa.runtime import ParseError +from lark import ParseError from parser import parse class TestParser(unittest.TestCase): diff --git a/search-tags.py b/search-tags.py index 4f6810a..c2884fa 100755 --- a/search-tags.py +++ b/search-tags.py @@ -7,7 +7,7 @@ __copyright__ = "Copyright (C) 2014-2017 PhiloSoft Design" __license__ = "GNU GPL" import cgi, sys -from ometa.runtime import ParseError +from lark import ParseError from html.response import redirect, response from parser import parser -- 2.39.2 From 240857e1b474ca2d8d3e97f0ac234ca3e4b3ad3a Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Thu, 14 Dec 2017 14:47:58 +0300 Subject: [PATCH 03/16] version: 0.7 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 5a2a580..eb49d7c 100644 --- a/version +++ b/version @@ -1 +1 @@ -0.6 +0.7 -- 2.39.2 From d6adb10a5a815c34a6c7b34aa8566fbc08cf908b Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Fri, 25 May 2018 10:40:02 +0300 Subject: [PATCH 04/16] Upgrade: http://phdru.name -> https://phdru.name --- devscripts/README.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/README.html b/devscripts/README.html index 31e1c53..8a6e214 100644 --- a/devscripts/README.html +++ b/devscripts/README.html @@ -1 +1 @@ -CGI-BIN scripts for http://phdru.name/Russian/blog/ - search tags. +CGI-BIN scripts for https://phdru.name/Russian/blog/ - search tags. -- 2.39.2 From ff69d533177ddc2bb4123b10e907d544e152cda9 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Fri, 12 Jul 2019 21:56:58 +0300 Subject: [PATCH 05/16] Docs(TODO): Try Pyleri --- TODO | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TODO b/TODO index 06ad45b..463dd64 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,6 @@ +Try Pyleri: https://tomassetti.me/pyleri-tutorial/ + + Sort posts by date or tags. -- 2.39.2 From e34c70c04914c3884b0a474956edf36496038533 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 4 Jun 2024 18:02:11 +0300 Subject: [PATCH 06/16] Tests: Update to latest `lark` --- parser/test_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/test_parser.py b/parser/test_parser.py index 8783de5..df24b06 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -1,7 +1,7 @@ #! /usr/bin/env python import unittest -from lark import ParseError +from lark import LexError, ParseError from parser import parse class TestParser(unittest.TestCase): @@ -12,7 +12,7 @@ class TestParser(unittest.TestCase): self.assertEqual(self._parse('xxx'), ('NAME', 'xxx')) def test_03_bad_tag(self): - self.assertRaises(ParseError, self._parse, 'XXX') + self.assertRaises(LexError, self._parse, 'XXX') def test_04_expression(self): self.assertEqual(self._parse('!(xxx&yyy)'), -- 2.39.2 From bee03d50e5a6c083962d5d855cea5445bc015472 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Tue, 4 Jun 2024 17:56:41 +0300 Subject: [PATCH 07/16] Python 3 --- ChangeLog | 4 ++++ html/Makefile | 5 +++-- html/response.py | 42 +++++++++++++++++------------------ parser/parser.py | 26 +++++++++++----------- parser/test_parser.py | 51 +++++++++++++++++++++++++++---------------- search-tags.py | 23 ++++++++++++------- tags.py | 21 ++++++++++-------- 7 files changed, 100 insertions(+), 72 deletions(-) diff --git a/ChangeLog b/ChangeLog index 79857bb..d9dc455 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Version 0.8 (2024-06-04) + + Python 3. + Version 0.7 (2017-12-14) Use lark instead of parsley. diff --git a/html/Makefile b/html/Makefile index 63b6cca..9d43041 100644 --- a/html/Makefile +++ b/html/Makefile @@ -1,10 +1,11 @@ # Makefile. # # __author__ = "Oleg Broytman " -# __copyright__ = "Copyright (C) 2014 PhiloSoft Design" +# __copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" %.py: %.tmpl - cheetah compile --nobackup $< && compyle $@ + python3 -m Cheetah.CheetahWrapper compile --nobackup $< && \ + exec python3 -m compileall $@ .PHONY: all all: html.py redirect.py diff --git a/html/response.py b/html/response.py index 071c5df..adfea5e 100644 --- a/html/response.py +++ b/html/response.py @@ -1,28 +1,28 @@ - import sys + def response(title, body, status=None): - from .html import html - result = html() + from .html import html + result = html() + + result.title = title + result.body = body - result.title = title - result.body = body + if status: + print("Status:", status) + sys.stdout.write(str(result.cgiHeaders())) + sys.stdout.write(str(result)) - if status: - print "Status:", status - sys.stdout.write(str(result.cgiHeaders())) - #sys.stdout.write(str(result)) - sys.stdout.write(unicode(result).encode('latin1')) def redirect(url, parameters=None, status=None): - import urllib - from .redirect import redirect - result = redirect() - if parameters: - result.url = url + '?' + urllib.urlencode(parameters) - else: - result.url = url - if status: - print "Status:", status - sys.stdout.write(str(result.cgiHeaders())) - sys.stdout.write(str(result)) + import urllib + from .redirect import redirect + result = redirect() + if parameters: + result.url = url + '?' + urllib.urlencode(parameters) + else: + result.url = url + if status: + print("Status:", status) + sys.stdout.write(str(result.cgiHeaders())) + sys.stdout.write(str(result)) diff --git a/parser/parser.py b/parser/parser.py index dbd0549..0d9351b 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import os from lark import Lark, Transformer @@ -52,15 +52,15 @@ def parse(input): if __name__ == '__main__': - print '----------' - print parse('test') - print parse('!test') - print parse('not test') - print parse('foo or bar') - print parse('foo && bar') - print parse('foo && bar && baz') - print parse('!foo && bar && baz') - print parse('(test)') - print parse('(foo || bar)') - print parse('(foo and !bar)') - print '----------' + print('----------') + print(parse('test')) + print(parse('!test')) + print(parse('not test')) + print(parse('foo or bar')) + print(parse('foo && bar')) + print(parse('foo && bar && baz')) + print(parse('!foo && bar && baz')) + print(parse('(test)')) + print(parse('(foo || bar)')) + print(parse('(foo and !bar)')) + print('----------') diff --git a/parser/test_parser.py b/parser/test_parser.py index df24b06..cf053b8 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -1,9 +1,10 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 import unittest from lark import LexError, ParseError from parser import parse + class TestParser(unittest.TestCase): def _parse(self, input): return parse(input) @@ -15,55 +16,67 @@ class TestParser(unittest.TestCase): self.assertRaises(LexError, self._parse, 'XXX') def test_04_expression(self): - self.assertEqual(self._parse('!(xxx&yyy)'), + self.assertEqual( + self._parse('!(xxx&yyy)'), ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) ) - self.assertEqual(self._parse('!(xxx & yyy)'), + self.assertEqual( + self._parse('!(xxx & yyy)'), ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) ) - self.assertEqual(self._parse('!xxx&yyy&zzz|ooo'), + self.assertEqual( + self._parse('!xxx&yyy&zzz|ooo'), ('OR', ('AND', - ('NOT', ('NAME', 'xxx')), - ('AND', ('NAME', 'yyy'), ('NAME', 'zzz'))), - ('NAME', 'ooo')) + ('NOT', ('NAME', 'xxx')), + ('AND', ('NAME', 'yyy'), ('NAME', 'zzz'))), + ('NAME', 'ooo')) ) - self.assertEqual(self._parse('!(xxx && yyy)'), + self.assertEqual( + self._parse('!(xxx && yyy)'), ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')))) ) - self.assertEqual(self._parse('!(xxx || yyy)'), + self.assertEqual( + self._parse('!(xxx || yyy)'), ('NOT', ('PARENS', ('OR', ('NAME', 'xxx'), ('NAME', 'yyy')))) ) - self.assertEqual(self._parse('xxx and yyy'), + self.assertEqual( + self._parse('xxx and yyy'), ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')) ) - self.assertEqual(self._parse('xxx or yyy'), + self.assertEqual( + self._parse('xxx or yyy'), ('OR', ('NAME', 'xxx'), ('NAME', 'yyy')) ) - self.assertEqual(self._parse('xxx OR yyy'), + self.assertEqual( + self._parse('xxx OR yyy'), ('OR', ('NAME', 'xxx'), ('NAME', 'yyy')) ) - self.assertEqual(self._parse('not xxx'), + self.assertEqual( + self._parse('not xxx'), ('NOT', ('NAME', 'xxx')) ) - self.assertEqual(self._parse('NOT xxx'), + self.assertEqual( + self._parse('NOT xxx'), ('NOT', ('NAME', 'xxx')) ) - self.assertEqual(self._parse('NOT (xxx & yyy) AND zzz | ooo'), + self.assertEqual( + self._parse('NOT (xxx & yyy) AND zzz | ooo'), ('OR', ('AND', ('NOT', ('PARENS', ('AND', ('NAME', 'xxx'), ('NAME', 'yyy')) - ) - ), + ) + ), ('NAME', 'zzz') - ), + ), ('NAME', 'ooo') - ) + ) ) def test_05_bad_expression(self): self.assertRaises(ParseError, self._parse, '!(xxx&yyy') + if __name__ == "__main__": unittest.main() diff --git a/search-tags.py b/search-tags.py index c2884fa..a844960 100755 --- a/search-tags.py +++ b/search-tags.py @@ -1,18 +1,21 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 # coding: koi8-r """Search tags CGI""" __author__ = "Oleg Broytman " -__copyright__ = "Copyright (C) 2014-2017 PhiloSoft Design" +__copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" __license__ = "GNU GPL" -import cgi, sys +import cgi +import sys + from lark import ParseError + from html.response import redirect, response from parser import parser form = cgi.FieldStorage() -if not form.has_key('q'): +if 'q' not in form: status = "400 Bad request" title = "Error!" body = "Required parameter is missing!" @@ -26,17 +29,18 @@ else: title = "Error!" body = "Bad query syntax!" else: - if tree[0] == 'NAME': # Single tag - just do redirect + if tree[0] == 'NAME': # Single tag - just do redirect tag = tree[1] assert isinstance(tag, str) from tags import tag_exists if tag_exists(tag): - redirect("/Russian/blog/tags/%s.html" % tag, status="301 Moved") + redirect( + "/Russian/blog/tags/%s.html" % tag, status="301 Moved") sys.exit() status = "404 Tag not found" title = "ïÛÉÂËÁ!" body = "ôÅÇ %s ÎÅ ÓÕÝÅÓÔ×ÕÅÔ!" % tag - else: # Process tree + else: # Process tree from tags import find_tags posts = find_tags(tree) status = None @@ -47,7 +51,10 @@ else:
    """] for year, month, day, suburl, _title in posts: - _posts.append('
  • %s-%s-%s %s
  • \n' % (year, month, day, suburl, _title)) + _posts.append( + '
  • %s-%s-%s ' + '%s
  • \n' + % (year, month, day, suburl, _title)) _posts .append("""\

diff --git a/tags.py b/tags.py index 44f1a07..3fe860b 100644 --- a/tags.py +++ b/tags.py @@ -1,29 +1,30 @@ blog_filename = "blog_dict.pickle" try: - import cPickle as pickle + import cPickle as pickle except ImportError: - import pickle + import pickle try: - blog_file = open('../../../../phdru.name/ru/' + blog_filename, "rb") + blog_file = open('../../../../phdru.name/ru/' + blog_filename, "rb") except IOError: - blog_dict = {} + blog_dict = {} else: - blog_dict = pickle.load(blog_file) - blog_file.close() + blog_dict = pickle.load(blog_file) + blog_file.close() # blog_dict is a mapping # (year, month, day) => [list of (file, title, lead, tags)] def tag_exists(tag): - for posts in blog_dict.itervalues(): + for posts in blog_dict.values(): for _file, _title, _lead, _tags in posts: if tag in _tags: return True return False + def _test_post(post, tree): """Test if the list of tags in the post satisfies condition @@ -50,6 +51,7 @@ def _test_post(post, tree): else: raise ValueError("Cannot get there") + def find_tags(tree): """Test every blog post against parsed expression @@ -57,12 +59,13 @@ def find_tags(tree): """ _posts = [] - for (year, month, day), posts in blog_dict.iteritems(): + for (year, month, day), posts in blog_dict.items(): for post in posts: if _test_post(post, tree): _posts.append(( year, month, day, - '/'.join((year, month, day, post[0][:-len("tmpl")] + "html")), + '/'.join( + (year, month, day, post[0][:-len("tmpl")] + "html")), post[1])) _posts.sort(reverse=True) return _posts -- 2.39.2 From 7affd5882b4c0af22bddb5916fae47da166987af Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 02:10:58 +0300 Subject: [PATCH 08/16] Feat(Python3): Use `mycgi` --- requirements.txt | 3 +++ search-tags.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..913a44d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Cheetah3 +lark +mycgi diff --git a/search-tags.py b/search-tags.py index a844960..fec3705 100755 --- a/search-tags.py +++ b/search-tags.py @@ -6,15 +6,15 @@ __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" __license__ = "GNU GPL" -import cgi import sys from lark import ParseError +import mycgi from html.response import redirect, response from parser import parser -form = cgi.FieldStorage() +form = mycgi.Form() if 'q' not in form: status = "400 Bad request" title = "Error!" @@ -44,7 +44,7 @@ else: from tags import find_tags posts = find_tags(tree) status = None - title = "úÁÐÉÓÉ, ÎÁÊÄÅÎÎÙÅ ÄÌÑ ×ÙÒÁÖÅÎÉÑ " + cgi.escape(q) + title = "úÁÐÉÓÉ, ÎÁÊÄÅÎÎÙÅ ÄÌÑ ×ÙÒÁÖÅÎÉÑ " + mycgi.escape(q) if posts: _posts = ["""\

-- 2.39.2 From 6b8f72d09fb42173a895b00197fa1fb86009fc2f Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 02:12:30 +0300 Subject: [PATCH 09/16] Feat(Python3): Rename `html` to `html_output` Do not shadow a module from the stdlib. --- html/.gitignore | 2 -- html_output/.gitignore | 2 ++ {html => html_output}/HTTPResponse.py | 0 {html => html_output}/Makefile | 2 +- {html => html_output}/Redirect.py | 0 {html => html_output}/__init__.py | 0 html/html.tmpl => html_output/html_output.tmpl | 2 +- {html => html_output}/redirect.tmpl | 2 +- {html => html_output}/response.py | 4 ++-- search-tags.py | 2 +- 10 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 html/.gitignore create mode 100644 html_output/.gitignore rename {html => html_output}/HTTPResponse.py (100%) rename {html => html_output}/Makefile (88%) rename {html => html_output}/Redirect.py (100%) rename {html => html_output}/__init__.py (100%) rename html/html.tmpl => html_output/html_output.tmpl (96%) rename {html => html_output}/redirect.tmpl (76%) rename {html => html_output}/response.py (89%) diff --git a/html/.gitignore b/html/.gitignore deleted file mode 100644 index f878ca4..0000000 --- a/html/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/html.py -/redirect.py diff --git a/html_output/.gitignore b/html_output/.gitignore new file mode 100644 index 0000000..fd9c047 --- /dev/null +++ b/html_output/.gitignore @@ -0,0 +1,2 @@ +/html_output.py +/redirect.py diff --git a/html/HTTPResponse.py b/html_output/HTTPResponse.py similarity index 100% rename from html/HTTPResponse.py rename to html_output/HTTPResponse.py diff --git a/html/Makefile b/html_output/Makefile similarity index 88% rename from html/Makefile rename to html_output/Makefile index 9d43041..070b147 100644 --- a/html/Makefile +++ b/html_output/Makefile @@ -8,4 +8,4 @@ exec python3 -m compileall $@ .PHONY: all -all: html.py redirect.py +all: html_output.py redirect.py diff --git a/html/Redirect.py b/html_output/Redirect.py similarity index 100% rename from html/Redirect.py rename to html_output/Redirect.py diff --git a/html/__init__.py b/html_output/__init__.py similarity index 100% rename from html/__init__.py rename to html_output/__init__.py diff --git a/html/html.tmpl b/html_output/html_output.tmpl similarity index 96% rename from html/html.tmpl rename to html_output/html_output.tmpl index 9d46413..5701941 100644 --- a/html/html.tmpl +++ b/html_output/html_output.tmpl @@ -1,5 +1,5 @@ #encoding koi8-r -#extends HTTPResponse +#extends html_output.HTTPResponse #implements respond diff --git a/html/redirect.tmpl b/html_output/redirect.tmpl similarity index 76% rename from html/redirect.tmpl rename to html_output/redirect.tmpl index 02f3f37..97c2cef 100644 --- a/html/redirect.tmpl +++ b/html_output/redirect.tmpl @@ -1,4 +1,4 @@ -#extends Redirect +#extends html_output.Redirect #implements respond

Moved

diff --git a/html/response.py b/html_output/response.py similarity index 89% rename from html/response.py rename to html_output/response.py index adfea5e..9b0a0ab 100644 --- a/html/response.py +++ b/html_output/response.py @@ -2,8 +2,8 @@ import sys def response(title, body, status=None): - from .html import html - result = html() + from .html_output import html_output + result = html_output() result.title = title result.body = body diff --git a/search-tags.py b/search-tags.py index fec3705..2a0a0fa 100755 --- a/search-tags.py +++ b/search-tags.py @@ -11,7 +11,7 @@ import sys from lark import ParseError import mycgi -from html.response import redirect, response +from html_output.response import redirect, response from parser import parser form = mycgi.Form() -- 2.39.2 From ca6730a16664f67f3317766a35d52de7b3c3e870 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 02:24:22 +0300 Subject: [PATCH 10/16] Feat(Python3): Use `html.escape()` --- search-tags.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/search-tags.py b/search-tags.py index 2a0a0fa..6281a1b 100755 --- a/search-tags.py +++ b/search-tags.py @@ -6,6 +6,7 @@ __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2014-2024 PhiloSoft Design" __license__ = "GNU GPL" +from html import escape import sys from lark import ParseError @@ -44,7 +45,7 @@ else: from tags import find_tags posts = find_tags(tree) status = None - title = "úÁÐÉÓÉ, ÎÁÊÄÅÎÎÙÅ ÄÌÑ ×ÙÒÁÖÅÎÉÑ " + mycgi.escape(q) + title = "úÁÐÉÓÉ, ÎÁÊÄÅÎÎÙÅ ÄÌÑ ×ÙÒÁÖÅÎÉÑ " + escape(q) if posts: _posts = ["""\

-- 2.39.2 From 117a079203b0b2edae63b654dca5decfc8da9c5c Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 02:36:55 +0300 Subject: [PATCH 11/16] Feat(parser): Allow uppercase Latin letters and Russian letters --- parser/grammar.ebnf | 2 +- parser/test_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index 3be4d16..ccfe6eb 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -36,7 +36,7 @@ not_expression: not and_sub_expression expression_parens : "(" expression ")" -name : /[a-z][a-z0-9_]+/ +name : /[A-Za-zá-ñÁ-Ñ][A-Za-zá-ñÁ-Ñ0-9_]+/ ?and : and_op | and_op and_op diff --git a/parser/test_parser.py b/parser/test_parser.py index cf053b8..6027ce5 100755 --- a/parser/test_parser.py +++ b/parser/test_parser.py @@ -13,7 +13,7 @@ class TestParser(unittest.TestCase): self.assertEqual(self._parse('xxx'), ('NAME', 'xxx')) def test_03_bad_tag(self): - self.assertRaises(LexError, self._parse, 'XXX') + self.assertRaises(LexError, self._parse, '0XXX') def test_04_expression(self): self.assertEqual( -- 2.39.2 From f44018177d69b84a4e5aeb1ecf037f740dd1aaab Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 02:37:48 +0300 Subject: [PATCH 12/16] Tests: Update tests --- test_tags | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_tags b/test_tags index 754fe52..c7b1b8d 100755 --- a/test_tags +++ b/test_tags @@ -1,5 +1,5 @@ #! /bin/sh -QUERY_STRING='q=linux' ./search-tags.py | $PAGER -QUERY_STRING='q=linux%26%21audio' ./search-tags.py | $PAGER -QUERY_STRING='q=linux%26%21audio%26%21debian' ./search-tags.py | $PAGER +QUERY_STRING='q=Linux' ./search-tags.py | $PAGER +QUERY_STRING='q=Linux%26%21audio' ./search-tags.py | $PAGER +QUERY_STRING='q=Linux%26%21audio%26%21debian' ./search-tags.py | $PAGER -- 2.39.2 From ec1760a789b025060639ff9fae941a34e22390f9 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 03:02:12 +0300 Subject: [PATCH 13/16] Build: Add `Makefile` to manage a virtual env --- Makefile | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f07aa0f --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +.SUFFIXES: # Clear the suffix list + +PYTHON=python3 +VENVDIR=../../../../.search-tags-venv + +.PHONY: all +all: + @echo Nothing to do for \`all\' + +.PHONY: clean-venv +clean-venv: + rm -rf $(VENVDIR) + +.PHONY: venv +venv: + @if [ -d $(VENVDIR) ] ; then \ + echo "venv already exists."; \ + echo "To recreate it, remove it first with \`make clean-venv'."; \ + else \ + sudo apt-get install -y python3-pip python3-wheel python3-virtualenv; \ + $(PYTHON) -m virtualenv $(VENVDIR); \ + $(VENVDIR)/bin/python -m pip install -U --compile pip wheel; \ + $(VENVDIR)/bin/python -m pip install -U --compile -r requirements.txt; \ + echo "The venv has been created in the $(VENVDIR) directory"; \ + fi + -- 2.39.2 From a4b4cf32ed4f7a1037acab2f22da6e14010e5d0a Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 15:01:39 +0300 Subject: [PATCH 14/16] =?utf8?q?Feat(parser):=20=D0=9E=D0=BF=D0=B5=D1=80?= =?utf8?q?=D0=B0=D1=82=D0=BE=D1=80=D1=8B=20=D0=98,=20=D0=98=D0=9B=D0=98,?= =?utf8?q?=20=D0=9D=D0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- parser/grammar.ebnf | 6 ++++++ parser/parser.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index ccfe6eb..2877e8f 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -57,12 +57,18 @@ name : /[A-Za-z ?and_word : "AND" | "and" + | "é" + | "É" ?or_word : "OR" | "or" + | "éìé" + | "ÉÌÉ" ?not_word : "NOT" | "not" + | "îå" + | "ÎÅ" %import common.WS %ignore WS diff --git a/parser/parser.py b/parser/parser.py index 0d9351b..da5c6e5 100755 --- a/parser/parser.py +++ b/parser/parser.py @@ -1,4 +1,5 @@ #! /usr/bin/env python3 +# coding: koi8-r import os from lark import Lark, Transformer @@ -58,9 +59,10 @@ if __name__ == '__main__': print(parse('not test')) print(parse('foo or bar')) print(parse('foo && bar')) - print(parse('foo && bar && baz')) + print(parse('Foo && bar && baz')) print(parse('!foo && bar && baz')) print(parse('(test)')) print(parse('(foo || bar)')) print(parse('(foo and !bar)')) + print(parse(u'(ôÅÓÔ ÉÌÉ ÔÅÓÔ)')) print('----------') -- 2.39.2 From 8b3def415a8d3a454f522238396864aaffbd93d5 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 16:21:53 +0300 Subject: [PATCH 15/16] Refactor(parser): Improve grammar: do not allow trailing `_` --- parser/grammar.ebnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/grammar.ebnf b/parser/grammar.ebnf index 2877e8f..8e0a403 100644 --- a/parser/grammar.ebnf +++ b/parser/grammar.ebnf @@ -36,7 +36,7 @@ not_expression: not and_sub_expression expression_parens : "(" expression ")" -name : /[A-Za-zá-ñÁ-Ñ][A-Za-zá-ñÁ-Ñ0-9_]+/ +name : /[A-Za-zá-ñÁ-Ñ][A-Za-zá-ñÁ-Ñ0-9_]*[A-Za-zá-ñÁ-Ñ0-9]/ ?and : and_op | and_op and_op -- 2.39.2 From a269183254b03e72131d9060adad7ea0ee40ac56 Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Sun, 16 Jun 2024 16:22:33 +0300 Subject: [PATCH 16/16] Feat: Search tags ignoring case --- search-tags.py | 7 ++++--- tags.py | 27 ++++++++++++++++++++------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/search-tags.py b/search-tags.py index 6281a1b..63f4241 100755 --- a/search-tags.py +++ b/search-tags.py @@ -33,10 +33,11 @@ else: if tree[0] == 'NAME': # Single tag - just do redirect tag = tree[1] assert isinstance(tag, str) - from tags import tag_exists - if tag_exists(tag): + from tags import real_tag + rtag = real_tag(tag) + if rtag: redirect( - "/Russian/blog/tags/%s.html" % tag, status="301 Moved") + "/Russian/blog/tags/%s.html" % rtag, status="301 Moved") sys.exit() status = "404 Tag not found" title = "ïÛÉÂËÁ!" diff --git a/tags.py b/tags.py index 3fe860b..a4ae88b 100644 --- a/tags.py +++ b/tags.py @@ -17,12 +17,26 @@ else: # blog_dict is a mapping # (year, month, day) => [list of (file, title, lead, tags)] -def tag_exists(tag): +# Add lower-case tags +_new_dict = {} +for (year, month, day), posts in blog_dict.items(): + _new_dict[year, month, day] = _posts = [] + for _file, _title, _lead, _tags in posts: + tags_lower = [tag.lower() for tag in _tags] + _posts.append((_file, _title, _lead, _tags, tags_lower)) +blog_dict = _new_dict + + +def real_tag(tag): + ltag = tag.lower() for posts in blog_dict.values(): - for _file, _title, _lead, _tags in posts: - if tag in _tags: - return True - return False + for _file, _title, _lead, _tags, _tags_lower in posts: + try: + ix = _tags_lower.index(ltag) + except ValueError: + continue + else: + return _tags[ix] def _test_post(post, tree): @@ -35,8 +49,7 @@ def _test_post(post, tree): if op == 'NAME': tag = tree[1] assert isinstance(tag, str) - _tags = post[3] - return tag in _tags + return tag.lower() in post[4] elif op in ('AND', 'OR'): value1 = _test_post(post, tree[1]) value2 = _test_post(post, tree[2]) -- 2.39.2