From 37e27a836a08c1746612101ad128431ab95baafe Mon Sep 17 00:00:00 2001 From: Oleg Broytman Date: Wed, 25 Feb 2009 20:08:14 +0000 Subject: [PATCH] Extended URL regexp. Enhanced wikipedia quoting. git-svn-id: file:///home/phd/archive/SVN/phdru.name/scripts@81 7bb0bf08-9e0d-0410-b083-99cee3bf18b8 --- phd_pp.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/phd_pp.py b/phd_pp.py index 45b17aa..80331b0 100644 --- a/phd_pp.py +++ b/phd_pp.py @@ -1,19 +1,12 @@ import os, re, time, urllib from Cheetah.Template import Template - -# Copied from ZWiki and Gajim - -urlchars = r'[A-Za-z0-9/:@_%~#=&\.\-\?\+\$,]+' -urlendchar = r'[A-Za-z0-9/]' -url = r'["=]?((about|file|ftp|gopher|http|https|mailto|news|telnet|ed2k|irc|sip|magnet):%s)' % \ - (urlchars+urlendchar) +url_re = r"(((https?|ftp|gopher|telnet)://|(mailto|file|news|about|ed2k|irc|sip|magnet):)[^' \t<>\"]+|(www|web|w3)[A-Za-z0-9_-]*\.[A-Za-z0-9._-]+\.[^' \t<>\"]+)[A-Za-z0-9/]" def _url2href(match): url = match.group(0) return '%s' % (url, url) - full_dirs = len(os.getcwd().split('/')) + 1 class phd_pp(Template): @@ -50,7 +43,7 @@ class phd_pp(Template): return self.rst2html() def text2html(self): - body = re.sub(url, _url2href, self.body_text()) + body = re.sub(url_re, _url2href, self.body_text()) paragraphs = body.split("\n\n") @@ -102,11 +95,10 @@ class phd_pp(Template): return "http://www.yandex.ru/yandsearch?text=%s&rpt=rad" % quote_string(query, "cp1251") def wikipedia(self, query): - return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_')) + return "http://en.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',') def wikipedia_ru(self, query): - return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_')) - + return "http://ru.wikipedia.org/wiki/%s" % quote_string(query.replace(' ', '_'), ext_safe=',') -def quote_string(s, to_encoding="utf-8"): - return urllib.quote(unicode(s, "koi8-r").encode(to_encoding)) +def quote_string(s, to_encoding="utf-8", ext_safe=''): + return urllib.quote(unicode(s, "koi8-r").encode(to_encoding), '/' + ext_safe) -- 2.39.2