diff --git a/ChangeLog b/ChangeLog index f30af2fdb..91f9efd05 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2013-06-25 Werner Lemberg + + [docmaker] Code shuffling. + + * src/tools/docmaker/tohtml.py (re_url): Move regexp... + * src/tools/docmaker/sources.py: ... to this file. + 2013-06-25 Werner Lemberg [docmaker] Remove unused functions. diff --git a/src/tools/docmaker/sources.py b/src/tools/docmaker/sources.py index 490ba2506..df9620886 100644 --- a/src/tools/docmaker/sources.py +++ b/src/tools/docmaker/sources.py @@ -1,4 +1,4 @@ -# Sources (c) 2002-2004, 2006-2009, 2012 +# Sources (c) 2002-2004, 2006-2009, 2012, 2013 # David Turner # # @@ -140,6 +140,42 @@ re_crossref = re.compile( r'@((?:\w|-)*)(.*)' ) re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_ re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold* +# +# this regular expression code to identify an URL has been taken from +# +# http://mail.python.org/pipermail/tutor/2002-September/017228.html +# +# (with slight modifications) +# + +urls = r'(?:https?|telnet|gopher|file|wais|ftp)' +ltrs = r'\w' +gunk = r'/#~:.?+=&%@!\-' +punc = r'.:?\-' +any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs, + 'gunk' : gunk, + 'punc' : punc } +url = r""" + ( + \b # start at word boundary + %(urls)s : # need resource and a colon + [%(any)s] +? # followed by one or more of any valid + # character, but be conservative and + # take only what you need to... + (?= # [look-ahead non-consumptive assertion] + [%(punc)s]* # either 0 or more punctuation + (?: # [non-grouping parentheses] + [^%(any)s] | $ # followed by a non-url char + # or end of the string + ) + ) + ) + """ % {'urls' : urls, + 'any' : any, + 'punc' : punc } + +re_url = re.compile( url, re.VERBOSE | re.MULTILINE ) + # # used to detect the end of commented source lines # diff --git a/src/tools/docmaker/tohtml.py b/src/tools/docmaker/tohtml.py index 4b0d0f9bd..7944f1c99 100644 --- a/src/tools/docmaker/tohtml.py +++ b/src/tools/docmaker/tohtml.py @@ -5,42 +5,7 @@ from sources import * from content import * from formatter import * -import time, re - - -# this regular expression code to identify an URL has been taken from -# -# http://mail.python.org/pipermail/tutor/2002-September/017228.html -# -# (with slight modifications) - -urls = r'(?:https?|telnet|gopher|file|wais|ftp)' -ltrs = r'\w' -gunk = r'/#~:.?+=&%@!\-' -punc = r'.:?\-' -any = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs, - 'gunk' : gunk, - 'punc' : punc } -url = r""" - ( - \b # start at word boundary - %(urls)s : # need resource and a colon - [%(any)s] +? # followed by one or more of any valid - # character, but be conservative and - # take only what you need to... - (?= # [look-ahead non-consumptive assertion] - [%(punc)s]* # either 0 or more punctuation - (?: # [non-grouping parentheses] - [^%(any)s] | $ # followed by a non-url char - # or end of the string - ) - ) - ) - """ % {'urls' : urls, - 'any' : any, - 'punc' : punc } - -re_url = re.compile( url, re.VERBOSE | re.MULTILINE ) +import time # The following defines the HTML header used by all generated pages.