diff options
author | Chris Johns <chrisj@rtems.org> | 2018-02-02 10:59:21 +1100 |
---|---|---|
committer | Chris Johns <chrisj@rtems.org> | 2018-02-05 14:22:27 +1100 |
commit | 652a90f0a8613ee3bc4112a69137730b5abd9938 (patch) | |
tree | f462f74e3e2ce3758a1ae4d2cce87aee54429a78 /source-builder/sb/markdown/extensions | |
parent | sb: Backport from 4.11 the --rsb-file with releases fixes. (diff) | |
download | rtems-source-builder-652a90f0a8613ee3bc4112a69137730b5abd9938.tar.bz2 |
sb: Back port changes to support mailing list posting.
Close #3287
Diffstat (limited to 'source-builder/sb/markdown/extensions')
18 files changed, 2675 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/extensions/__init__.py b/source-builder/sb/markdown/extensions/__init__.py new file mode 100644 index 0000000..6e7a08a --- /dev/null +++ b/source-builder/sb/markdown/extensions/__init__.py @@ -0,0 +1,100 @@ +""" +Extensions +----------------------------------------------------------------------------- +""" + +from __future__ import unicode_literals +from ..util import parseBoolValue +import warnings + + +class Extension(object): + """ Base class for extensions to subclass. """ + + # Default config -- to be overriden by a subclass + # Must be of the following format: + # { + # 'key': ['value', 'description'] + # } + # Note that Extension.setConfig will raise a KeyError + # if a default is not set here. + config = {} + + def __init__(self, *args, **kwargs): + """ Initiate Extension and set up configs. """ + + # check for configs arg for backward compat. + # (there only ever used to be one so we use arg[0]) + if len(args): + if args[0] is not None: + self.setConfigs(args[0]) + warnings.warn('Extension classes accepting positional args is ' + 'pending Deprecation. Each setting should be ' + 'passed into the Class as a keyword. Positional ' + 'args are deprecated and will raise ' + 'an error in version 2.7. See the Release Notes for ' + 'Python-Markdown version 2.6 for more info.', + DeprecationWarning) + # check for configs kwarg for backward compat. + if 'configs' in kwargs.keys(): + if kwargs['configs'] is not None: + self.setConfigs(kwargs.pop('configs', {})) + warnings.warn('Extension classes accepting a dict on the single ' + 'keyword "config" is pending Deprecation. Each ' + 'setting should be passed into the Class as a ' + 'keyword directly. The "config" keyword is ' + 'deprecated and raise an error in ' + 'version 2.7. See the Release Notes for ' + 'Python-Markdown version 2.6 for more info.', + DeprecationWarning) + # finally, use kwargs + self.setConfigs(kwargs) + + def getConfig(self, key, default=''): + """ Return a setting for the given key or an empty string. """ + if key in self.config: + return self.config[key][0] + else: + return default + + def getConfigs(self): + """ Return all configs settings as a dict. """ + return dict([(key, self.getConfig(key)) for key in self.config.keys()]) + + def getConfigInfo(self): + """ Return all config descriptions as a list of tuples. """ + return [(key, self.config[key][1]) for key in self.config.keys()] + + def setConfig(self, key, value): + """ Set a config setting for `key` with the given `value`. """ + if isinstance(self.config[key][0], bool): + value = parseBoolValue(value) + if self.config[key][0] is None: + value = parseBoolValue(value, preserve_none=True) + self.config[key][0] = value + + def setConfigs(self, items): + """ Set multiple config settings given a dict or list of tuples. """ + if hasattr(items, 'items'): + # it's a dict + items = items.items() + for key, value in items: + self.setConfig(key, value) + + def extendMarkdown(self, md, md_globals): + """ + Add the various proccesors and patterns to the Markdown Instance. + + This method must be overriden by every extension. + + Keyword arguments: + + * md: The Markdown instance. + + * md_globals: Global variables in the markdown module namespace. + + """ + raise NotImplementedError( + 'Extension "%s.%s" must define an "extendMarkdown"' + 'method.' % (self.__class__.__module__, self.__class__.__name__) + ) diff --git a/source-builder/sb/markdown/extensions/abbr.py b/source-builder/sb/markdown/extensions/abbr.py new file mode 100644 index 0000000..353d126 --- /dev/null +++ b/source-builder/sb/markdown/extensions/abbr.py @@ -0,0 +1,91 @@ +''' +Abbreviation Extension for Python-Markdown +========================================== + +This extension adds abbreviation handling to Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/abbreviations.html> +for documentation. + +Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and + [Seemant Kulleen](http://www.kulleen.org/) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +''' + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from ..inlinepatterns import Pattern +from ..util import etree, AtomicString +import re + +# Global Vars +ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)') + + +class AbbrExtension(Extension): + """ Abbreviation Extension for Python-Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Insert AbbrPreprocessor before ReferencePreprocessor. """ + md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference') + + +class AbbrPreprocessor(Preprocessor): + """ Abbreviation Preprocessor - parse text for abbr references. """ + + def run(self, lines): + ''' + Find and remove all Abbreviation references from the text. + Each reference is set as a new AbbrPattern in the markdown instance. + + ''' + new_text = [] + for line in lines: + m = ABBR_REF_RE.match(line) + if m: + abbr = m.group('abbr').strip() + title = m.group('title').strip() + self.markdown.inlinePatterns['abbr-%s' % abbr] = \ + AbbrPattern(self._generate_pattern(abbr), title) + else: + new_text.append(line) + return new_text + + def _generate_pattern(self, text): + ''' + Given a string, returns an regex pattern to match that string. + + 'HTML' -> r'(?P<abbr>[H][T][M][L])' + + Note: we force each char as a literal match (in brackets) as we don't + know what they will be beforehand. + + ''' + chars = list(text) + for i in range(len(chars)): + chars[i] = r'[%s]' % chars[i] + return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) + + +class AbbrPattern(Pattern): + """ Abbreviation inline pattern. """ + + def __init__(self, pattern, title): + super(AbbrPattern, self).__init__(pattern) + self.title = title + + def handleMatch(self, m): + abbr = etree.Element('abbr') + abbr.text = AtomicString(m.group('abbr')) + abbr.set('title', self.title) + return abbr + + +def makeExtension(*args, **kwargs): + return AbbrExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/admonition.py b/source-builder/sb/markdown/extensions/admonition.py new file mode 100644 index 0000000..76e0fb5 --- /dev/null +++ b/source-builder/sb/markdown/extensions/admonition.py @@ -0,0 +1,96 @@ +""" +Admonition extension for Python-Markdown +======================================== + +Adds rST-style admonitions. Inspired by [rST][] feature with the same name. + +[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions # noqa + +See <https://pythonhosted.org/Markdown/extensions/admonition.html> +for documentation. + +Original code Copyright [Tiago Serafim](http://www.tiagoserafim.com/). + +All changes Copyright The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor +from ..util import etree +import re + + +class AdmonitionExtension(Extension): + """ Admonition extension for Python-Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Add Admonition to Markdown instance. """ + md.registerExtension(self) + + md.parser.blockprocessors.add('admonition', + AdmonitionProcessor(md.parser), + '_begin') + + +class AdmonitionProcessor(BlockProcessor): + + CLASSNAME = 'admonition' + CLASSNAME_TITLE = 'admonition-title' + RE = re.compile(r'(?:^|\n)!!!\ ?([\w\-]+)(?:\ "(.*?)")?') + + def test(self, parent, block): + sibling = self.lastChild(parent) + return self.RE.search(block) or \ + (block.startswith(' ' * self.tab_length) and sibling is not None and + sibling.get('class', '').find(self.CLASSNAME) != -1) + + def run(self, parent, blocks): + sibling = self.lastChild(parent) + block = blocks.pop(0) + m = self.RE.search(block) + + if m: + block = block[m.end() + 1:] # removes the first line + + block, theRest = self.detab(block) + + if m: + klass, title = self.get_class_and_title(m) + div = etree.SubElement(parent, 'div') + div.set('class', '%s %s' % (self.CLASSNAME, klass)) + if title: + p = etree.SubElement(div, 'p') + p.text = title + p.set('class', self.CLASSNAME_TITLE) + else: + div = sibling + + self.parser.parseChunk(div, block) + + if theRest: + # This block contained unindented line(s) after the first indented + # line. Insert these lines as the first block of the master blocks + # list for future processing. + blocks.insert(0, theRest) + + def get_class_and_title(self, match): + klass, title = match.group(1).lower(), match.group(2) + if title is None: + # no title was provided, use the capitalized classname as title + # e.g.: `!!! note` will render + # `<p class="admonition-title">Note</p>` + title = klass.capitalize() + elif title == '': + # an explicit blank title should not be rendered + # e.g.: `!!! warning ""` will *not* render `p` with a title + title = None + return klass, title + + +def makeExtension(*args, **kwargs): + return AdmonitionExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/attr_list.py b/source-builder/sb/markdown/extensions/attr_list.py new file mode 100644 index 0000000..a7f92b6 --- /dev/null +++ b/source-builder/sb/markdown/extensions/attr_list.py @@ -0,0 +1,178 @@ +""" +Attribute List Extension for Python-Markdown +============================================ + +Adds attribute list syntax. Inspired by +[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s +feature of the same name. + +See <https://pythonhosted.org/Markdown/extensions/attr_list.html> +for documentation. + +Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import isBlockLevel +import re + +try: + Scanner = re.Scanner +except AttributeError: # pragma: no cover + # must be on Python 2.4 + from sre import Scanner + + +def _handle_double_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip('"') + + +def _handle_single_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip("'") + + +def _handle_key_value(s, t): + return t.split('=', 1) + + +def _handle_word(s, t): + if t.startswith('.'): + return '.', t[1:] + if t.startswith('#'): + return 'id', t[1:] + return t, t + + +_scanner = Scanner([ + (r'[^ =]+=".*?"', _handle_double_quote), + (r"[^ =]+='.*?'", _handle_single_quote), + (r'[^ =]+=[^ =]+', _handle_key_value), + (r'[^ =]+', _handle_word), + (r' ', None) +]) + + +def get_attrs(str): + """ Parse attribute list and return a list of attribute tuples. """ + return _scanner.scan(str)[0] + + +def isheader(elem): + return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] + + +class AttrListTreeprocessor(Treeprocessor): + + BASE_RE = r'\{\:?([^\}\n]*)\}' + HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) + BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) + INLINE_RE = re.compile(r'^%s' % BASE_RE) + NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' + r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' + r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' + r'\uf900-\ufdcf\ufdf0-\ufffd' + r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') + + def run(self, doc): + for elem in doc.iter(): + if isBlockLevel(elem.tag): + # Block level: check for attrs on last line of text + RE = self.BLOCK_RE + if isheader(elem) or elem.tag == 'dt': + # header or def-term: check for attrs at end of line + RE = self.HEADER_RE + if len(elem) and elem.tag == 'li': + # special case list items. children may include a ul or ol. + pos = None + # find the ul or ol position + for i, child in enumerate(elem): + if child.tag in ['ul', 'ol']: + pos = i + break + if pos is None and elem[-1].tail: + # use tail of last child. no ul or ol. + m = RE.search(elem[-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[-1].tail = elem[-1].tail[:m.start()] + elif pos is not None and pos > 0 and elem[pos-1].tail: + # use tail of last child before ul or ol + m = RE.search(elem[pos-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[pos-1].tail = elem[pos-1].tail[:m.start()] + elif elem.text: + # use text. ul is first child. + m = RE.search(elem.text) + if m: + self.assign_attrs(elem, m.group(1)) + elem.text = elem.text[:m.start()] + elif len(elem) and elem[-1].tail: + # has children. Get from tail of last child + m = RE.search(elem[-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[-1].tail = elem[-1].tail[:m.start()] + if isheader(elem): + # clean up trailing #s + elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() + elif elem.text: + # no children. Get from text. + m = RE.search(elem.text) + if not m and elem.tag == 'td': + m = re.search(self.BASE_RE, elem.text) + if m: + self.assign_attrs(elem, m.group(1)) + elem.text = elem.text[:m.start()] + if isheader(elem): + # clean up trailing #s + elem.text = elem.text.rstrip('#').rstrip() + else: + # inline: check for attrs at start of tail + if elem.tail: + m = self.INLINE_RE.match(elem.tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem.tail = elem.tail[m.end():] + + def assign_attrs(self, elem, attrs): + """ Assign attrs to element. """ + for k, v in get_attrs(attrs): + if k == '.': + # add to class + cls = elem.get('class') + if cls: + elem.set('class', '%s %s' % (cls, v)) + else: + elem.set('class', v) + else: + # assign attr k with v + elem.set(self.sanitize_name(k), v) + + def sanitize_name(self, name): + """ + Sanitize name as 'an XML Name, minus the ":"'. + See http://www.w3.org/TR/REC-xml-names/#NT-NCName + """ + return self.NAME_RE.sub('_', name) + + +class AttrListExtension(Extension): + def extendMarkdown(self, md, md_globals): + md.treeprocessors.add( + 'attr_list', AttrListTreeprocessor(md), '>prettify' + ) + + +def makeExtension(*args, **kwargs): + return AttrListExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/codehilite.py b/source-builder/sb/markdown/extensions/codehilite.py new file mode 100644 index 0000000..20b889c --- /dev/null +++ b/source-builder/sb/markdown/extensions/codehilite.py @@ -0,0 +1,265 @@ +""" +CodeHilite Extension for Python-Markdown +======================================== + +Adds code/syntax highlighting to standard Python-Markdown code blocks. + +See <https://pythonhosted.org/Markdown/extensions/code_hilite.html> +for documentation. + +Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor + +try: + from pygments import highlight + from pygments.lexers import get_lexer_by_name, guess_lexer + from pygments.formatters import get_formatter_by_name + pygments = True +except ImportError: + pygments = False + + +def parse_hl_lines(expr): + """Support our syntax for emphasizing certain lines of code. + + expr should be like '1 2' to emphasize lines 1 and 2 of a code block. + Returns a list of ints, the line numbers to emphasize. + """ + if not expr: + return [] + + try: + return list(map(int, expr.split())) + except ValueError: + return [] + + +# ------------------ The Main CodeHilite Class ---------------------- +class CodeHilite(object): + """ + Determine language of source code, and pass it into pygments hilighter. + + Basic Usage: + >>> code = CodeHilite(src = 'some text') + >>> html = code.hilite() + + * src: Source string or any object with a .readline attribute. + + * linenums: (Boolean) Set line numbering to 'on' (True), + 'off' (False) or 'auto'(None). Set to 'auto' by default. + + * guess_lang: (Boolean) Turn language auto-detection + 'on' or 'off' (on by default). + + * css_class: Set class name of wrapper div ('codehilite' by default). + + * hl_lines: (List of integers) Lines to emphasize, 1-indexed. + + Low Level Usage: + >>> code = CodeHilite() + >>> code.src = 'some text' # String or anything with a .readline attr. + >>> code.linenos = True # Turns line numbering on or of. + >>> html = code.hilite() + + """ + + def __init__(self, src=None, linenums=None, guess_lang=True, + css_class="codehilite", lang=None, style='default', + noclasses=False, tab_length=4, hl_lines=None, use_pygments=True): + self.src = src + self.lang = lang + self.linenums = linenums + self.guess_lang = guess_lang + self.css_class = css_class + self.style = style + self.noclasses = noclasses + self.tab_length = tab_length + self.hl_lines = hl_lines or [] + self.use_pygments = use_pygments + + def hilite(self): + """ + Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with + optional line numbers. The output should then be styled with css to + your liking. No styles are applied by default - only styling hooks + (i.e.: <span class="k">). + + returns : A string of html. + + """ + + self.src = self.src.strip('\n') + + if self.lang is None: + self._parseHeader() + + if pygments and self.use_pygments: + try: + lexer = get_lexer_by_name(self.lang) + except ValueError: + try: + if self.guess_lang: + lexer = guess_lexer(self.src) + else: + lexer = get_lexer_by_name('text') + except ValueError: + lexer = get_lexer_by_name('text') + formatter = get_formatter_by_name('html', + linenos=self.linenums, + cssclass=self.css_class, + style=self.style, + noclasses=self.noclasses, + hl_lines=self.hl_lines) + return highlight(self.src, lexer, formatter) + else: + # just escape and build markup usable by JS highlighting libs + txt = self.src.replace('&', '&') + txt = txt.replace('<', '<') + txt = txt.replace('>', '>') + txt = txt.replace('"', '"') + classes = [] + if self.lang: + classes.append('language-%s' % self.lang) + if self.linenums: + classes.append('linenums') + class_str = '' + if classes: + class_str = ' class="%s"' % ' '.join(classes) + return '<pre class="%s"><code%s>%s</code></pre>\n' % \ + (self.css_class, class_str, txt) + + def _parseHeader(self): + """ + Determines language of a code block from shebang line and whether said + line should be removed or left in place. If the sheband line contains a + path (even a single /) then it is assumed to be a real shebang line and + left alone. However, if no path is given (e.i.: #!python or :::python) + then it is assumed to be a mock shebang for language identifitation of + a code fragment and removed from the code block prior to processing for + code highlighting. When a mock shebang (e.i: #!python) is found, line + numbering is turned on. When colons are found in place of a shebang + (e.i.: :::python), line numbering is left in the current state - off + by default. + + Also parses optional list of highlight lines, like: + + :::python hl_lines="1 3" + """ + + import re + + # split text into lines + lines = self.src.split("\n") + # pull first line to examine + fl = lines.pop(0) + + c = re.compile(r''' + (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons + (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path + (?P<lang>[\w#.+-]*) # The language + \s* # Arbitrary whitespace + # Optional highlight lines, single- or double-quote-delimited + (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? + ''', re.VERBOSE) + # search first line for shebang + m = c.search(fl) + if m: + # we have a match + try: + self.lang = m.group('lang').lower() + except IndexError: + self.lang = None + if m.group('path'): + # path exists - restore first line + lines.insert(0, fl) + if self.linenums is None and m.group('shebang'): + # Overridable and Shebang exists - use line numbers + self.linenums = True + + self.hl_lines = parse_hl_lines(m.group('hl_lines')) + else: + # No match + lines.insert(0, fl) + + self.src = "\n".join(lines).strip("\n") + + +# ------------------ The Markdown Extension ------------------------------- + + +class HiliteTreeprocessor(Treeprocessor): + """ Hilight source code in code blocks. """ + + def run(self, root): + """ Find code blocks and store in htmlStash. """ + blocks = root.iter('pre') + for block in blocks: + if len(block) == 1 and block[0].tag == 'code': + code = CodeHilite( + block[0].text, + linenums=self.config['linenums'], + guess_lang=self.config['guess_lang'], + css_class=self.config['css_class'], + style=self.config['pygments_style'], + noclasses=self.config['noclasses'], + tab_length=self.markdown.tab_length, + use_pygments=self.config['use_pygments'] + ) + placeholder = self.markdown.htmlStash.store(code.hilite(), + safe=True) + # Clear codeblock in etree instance + block.clear() + # Change to p element which will later + # be removed when inserting raw html + block.tag = 'p' + block.text = placeholder + + +class CodeHiliteExtension(Extension): + """ Add source code hilighting to markdown codeblocks. """ + + def __init__(self, *args, **kwargs): + # define default configs + self.config = { + 'linenums': [None, + "Use lines numbers. True=yes, False=no, None=auto"], + 'guess_lang': [True, + "Automatic language detection - Default: True"], + 'css_class': ["codehilite", + "Set class name for wrapper <div> - " + "Default: codehilite"], + 'pygments_style': ['default', + 'Pygments HTML Formatter Style ' + '(Colorscheme) - Default: default'], + 'noclasses': [False, + 'Use inline styles instead of CSS classes - ' + 'Default false'], + 'use_pygments': [True, + 'Use Pygments to Highlight code blocks. ' + 'Disable if using a JavaScript library. ' + 'Default: True'] + } + + super(CodeHiliteExtension, self).__init__(*args, **kwargs) + + def extendMarkdown(self, md, md_globals): + """ Add HilitePostprocessor to Markdown instance. """ + hiliter = HiliteTreeprocessor(md) + hiliter.config = self.getConfigs() + md.treeprocessors.add("hilite", hiliter, "<inline") + + md.registerExtension(self) + + +def makeExtension(*args, **kwargs): + return CodeHiliteExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/def_list.py b/source-builder/sb/markdown/extensions/def_list.py new file mode 100644 index 0000000..77cca6e --- /dev/null +++ b/source-builder/sb/markdown/extensions/def_list.py @@ -0,0 +1,115 @@ +""" +Definition List Extension for Python-Markdown +============================================= + +Adds parsing of Definition Lists to Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/definition_lists.html> +for documentation. + +Original code Copyright 2008 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor, ListIndentProcessor +from ..util import etree +import re + + +class DefListProcessor(BlockProcessor): + """ Process Definition Lists. """ + + RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') + NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]') + + def test(self, parent, block): + return bool(self.RE.search(block)) + + def run(self, parent, blocks): + + raw_block = blocks.pop(0) + m = self.RE.search(raw_block) + terms = [l.strip() for l in + raw_block[:m.start()].split('\n') if l.strip()] + block = raw_block[m.end():] + no_indent = self.NO_INDENT_RE.match(block) + if no_indent: + d, theRest = (block, None) + else: + d, theRest = self.detab(block) + if d: + d = '%s\n%s' % (m.group(2), d) + else: + d = m.group(2) + sibling = self.lastChild(parent) + if not terms and sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the begining of a document or list. + blocks.insert(0, raw_block) + return False + if not terms and sibling.tag == 'p': + # The previous paragraph contains the terms + state = 'looselist' + terms = sibling.text.split('\n') + parent.remove(sibling) + # Aquire new sibling + sibling = self.lastChild(parent) + else: + state = 'list' + + if sibling is not None and sibling.tag == 'dl': + # This is another item on an existing list + dl = sibling + if not terms and len(dl) and dl[-1].tag == 'dd' and len(dl[-1]): + state = 'looselist' + else: + # This is a new list + dl = etree.SubElement(parent, 'dl') + # Add terms + for term in terms: + dt = etree.SubElement(dl, 'dt') + dt.text = term + # Add definition + self.parser.state.set(state) + dd = etree.SubElement(dl, 'dd') + self.parser.parseBlocks(dd, [d]) + self.parser.state.reset() + + if theRest: + blocks.insert(0, theRest) + + +class DefListIndentProcessor(ListIndentProcessor): + """ Process indented children of definition list items. """ + + ITEM_TYPES = ['dd'] + LIST_TYPES = ['dl'] + + def create_item(self, parent, block): + """ Create a new dd and parse the block with it as the parent. """ + dd = etree.SubElement(parent, 'dd') + self.parser.parseBlocks(dd, [block]) + + +class DefListExtension(Extension): + """ Add definition lists to Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Add an instance of DefListProcessor to BlockParser. """ + md.parser.blockprocessors.add('defindent', + DefListIndentProcessor(md.parser), + '>indent') + md.parser.blockprocessors.add('deflist', + DefListProcessor(md.parser), + '>ulist') + + +def makeExtension(*args, **kwargs): + return DefListExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/extra.py b/source-builder/sb/markdown/extensions/extra.py new file mode 100644 index 0000000..de5db03 --- /dev/null +++ b/source-builder/sb/markdown/extensions/extra.py @@ -0,0 +1,132 @@ +""" +Python-Markdown Extra Extension +=============================== + +A compilation of various Python-Markdown extensions that imitates +[PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/). + +Note that each of the individual extensions still need to be available +on your PYTHONPATH. This extension simply wraps them all up as a +convenience so that only one extension needs to be listed when +initiating Markdown. See the documentation for each individual +extension for specifics about that extension. + +There may be additional extensions that are distributed with +Python-Markdown that are not included here in Extra. Those extensions +are not part of PHP Markdown Extra, and therefore, not part of +Python-Markdown Extra. If you really would like Extra to include +additional extensions, we suggest creating your own clone of Extra +under a differant name. You could also edit the `extensions` global +variable defined below, but be aware that such changes may be lost +when you upgrade to any future version of Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/extra.html> +for documentation. + +Copyright The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor +from .. import util +import re + +extensions = [ + 'markdown.extensions.smart_strong', + 'markdown.extensions.fenced_code', + 'markdown.extensions.footnotes', + 'markdown.extensions.attr_list', + 'markdown.extensions.def_list', + 'markdown.extensions.tables', + 'markdown.extensions.abbr' +] + + +class ExtraExtension(Extension): + """ Add various extensions to Markdown class.""" + + def __init__(self, *args, **kwargs): + """ config is a dumb holder which gets passed to actual ext later. """ + self.config = kwargs.pop('configs', {}) + self.config.update(kwargs) + + def extendMarkdown(self, md, md_globals): + """ Register extension instances. """ + md.registerExtensions(extensions, self.config) + if not md.safeMode: + # Turn on processing of markdown text within raw html + md.preprocessors['html_block'].markdown_in_raw = True + md.parser.blockprocessors.add('markdown_block', + MarkdownInHtmlProcessor(md.parser), + '_begin') + md.parser.blockprocessors.tag_counter = -1 + md.parser.blockprocessors.contain_span_tags = re.compile( + r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) + + +def makeExtension(*args, **kwargs): + return ExtraExtension(*args, **kwargs) + + +class MarkdownInHtmlProcessor(BlockProcessor): + """Process Markdown Inside HTML Blocks.""" + def test(self, parent, block): + return block == util.TAG_PLACEHOLDER % \ + str(self.parser.blockprocessors.tag_counter + 1) + + def _process_nests(self, element, block): + """Process the element's child elements in self.run.""" + # Build list of indexes of each nest within the parent element. + nest_index = [] # a list of tuples: (left index, right index) + i = self.parser.blockprocessors.tag_counter + 1 + while len(self._tag_data) > i and self._tag_data[i]['left_index']: + left_child_index = self._tag_data[i]['left_index'] + right_child_index = self._tag_data[i]['right_index'] + nest_index.append((left_child_index - 1, right_child_index)) + i += 1 + + # Create each nest subelement. + for i, (left_index, right_index) in enumerate(nest_index[:-1]): + self.run(element, block[left_index:right_index], + block[right_index:nest_index[i + 1][0]], True) + self.run(element, block[nest_index[-1][0]:nest_index[-1][1]], # last + block[nest_index[-1][1]:], True) # nest + + def run(self, parent, blocks, tail=None, nest=False): + self._tag_data = self.parser.markdown.htmlStash.tag_data + + self.parser.blockprocessors.tag_counter += 1 + tag = self._tag_data[self.parser.blockprocessors.tag_counter] + + # Create Element + markdown_value = tag['attrs'].pop('markdown') + element = util.etree.SubElement(parent, tag['tag'], tag['attrs']) + + # Slice Off Block + if nest: + self.parser.parseBlocks(parent, tail) # Process Tail + block = blocks[1:] + else: # includes nests since a third level of nesting isn't supported + block = blocks[tag['left_index'] + 1: tag['right_index']] + del blocks[:tag['right_index']] + + # Process Text + if (self.parser.blockprocessors.contain_span_tags.match( # Span Mode + tag['tag']) and markdown_value != 'block') or \ + markdown_value == 'span': + element.text = '\n'.join(block) + else: # Block Mode + i = self.parser.blockprocessors.tag_counter + 1 + if len(self._tag_data) > i and self._tag_data[i]['left_index']: + first_subelement_index = self._tag_data[i]['left_index'] - 1 + self.parser.parseBlocks( + element, block[:first_subelement_index]) + if not nest: + block = self._process_nests(element, block) + else: + self.parser.parseBlocks(element, block) diff --git a/source-builder/sb/markdown/extensions/fenced_code.py b/source-builder/sb/markdown/extensions/fenced_code.py new file mode 100644 index 0000000..277bac4 --- /dev/null +++ b/source-builder/sb/markdown/extensions/fenced_code.py @@ -0,0 +1,113 @@ +""" +Fenced Code Extension for Python Markdown +========================================= + +This extension adds Fenced Code Blocks to Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/fenced_code_blocks.html> +for documentation. + +Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/). + + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines +import re + + +class FencedCodeExtension(Extension): + + def extendMarkdown(self, md, md_globals): + """ Add FencedBlockPreprocessor to the Markdown instance. """ + md.registerExtension(self) + + md.preprocessors.add('fenced_code_block', + FencedBlockPreprocessor(md), + ">normalize_whitespace") + + +class FencedBlockPreprocessor(Preprocessor): + FENCED_BLOCK_RE = re.compile(r''' +(?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ +(\{?\.?(?P<lang>[\w#.+-]*))?[ ]* # Optional {, and lang +# Optional highlight lines, single- or double-quote-delimited +(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* +}?[ ]*\n # Optional closing } +(?P<code>.*?)(?<=\n) +(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) + CODE_WRAP = '<pre><code%s>%s</code></pre>' + LANG_TAG = ' class="%s"' + + def __init__(self, md): + super(FencedBlockPreprocessor, self).__init__(md) + + self.checked_for_codehilite = False + self.codehilite_conf = {} + + def run(self, lines): + """ Match and store Fenced Code Blocks in the HtmlStash. """ + + # Check for code hilite extension + if not self.checked_for_codehilite: + for ext in self.markdown.registeredExtensions: + if isinstance(ext, CodeHiliteExtension): + self.codehilite_conf = ext.config + break + + self.checked_for_codehilite = True + + text = "\n".join(lines) + while 1: + m = self.FENCED_BLOCK_RE.search(text) + if m: + lang = '' + if m.group('lang'): + lang = self.LANG_TAG % m.group('lang') + + # If config is not empty, then the codehighlite extension + # is enabled, so we call it to highlight the code + if self.codehilite_conf: + highliter = CodeHilite( + m.group('code'), + linenums=self.codehilite_conf['linenums'][0], + guess_lang=self.codehilite_conf['guess_lang'][0], + css_class=self.codehilite_conf['css_class'][0], + style=self.codehilite_conf['pygments_style'][0], + use_pygments=self.codehilite_conf['use_pygments'][0], + lang=(m.group('lang') or None), + noclasses=self.codehilite_conf['noclasses'][0], + hl_lines=parse_hl_lines(m.group('hl_lines')) + ) + + code = highliter.hilite() + else: + code = self.CODE_WRAP % (lang, + self._escape(m.group('code'))) + + placeholder = self.markdown.htmlStash.store(code, safe=True) + text = '%s\n%s\n%s' % (text[:m.start()], + placeholder, + text[m.end():]) + else: + break + return text.split("\n") + + def _escape(self, txt): + """ basic html escaping """ + txt = txt.replace('&', '&') + txt = txt.replace('<', '<') + txt = txt.replace('>', '>') + txt = txt.replace('"', '"') + return txt + + +def makeExtension(*args, **kwargs): + return FencedCodeExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/footnotes.py b/source-builder/sb/markdown/extensions/footnotes.py new file mode 100644 index 0000000..8bd8595 --- /dev/null +++ b/source-builder/sb/markdown/extensions/footnotes.py @@ -0,0 +1,416 @@ +""" +Footnotes Extension for Python-Markdown +======================================= + +Adds footnote handling to Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/footnotes.html> +for documentation. + +Copyright The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from ..inlinepatterns import Pattern +from ..treeprocessors import Treeprocessor +from ..postprocessors import Postprocessor +from .. import util +from ..odict import OrderedDict +import re +import copy + +FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX +NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX +DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)') +TABBED_RE = re.compile(r'((\t)|( ))(.*)') +RE_REF_ID = re.compile(r'(fnref)(\d+)') + + +class FootnoteExtension(Extension): + """ Footnote Extension. """ + + def __init__(self, *args, **kwargs): + """ Setup configs. """ + + self.config = { + 'PLACE_MARKER': + ["///Footnotes Go Here///", + "The text string that marks where the footnotes go"], + 'UNIQUE_IDS': + [False, + "Avoid name collisions across " + "multiple calls to reset()."], + "BACKLINK_TEXT": + ["↩", + "The text string that links from the footnote " + "to the reader's place."] + } + super(FootnoteExtension, self).__init__(*args, **kwargs) + + # In multiple invocations, emit links that don't get tangled. + self.unique_prefix = 0 + self.found_refs = {} + self.used_refs = set() + + self.reset() + + def extendMarkdown(self, md, md_globals): + """ Add pieces to Markdown. """ + md.registerExtension(self) + self.parser = md.parser + self.md = md + # Insert a preprocessor before ReferencePreprocessor + md.preprocessors.add( + "footnote", FootnotePreprocessor(self), "<reference" + ) + # Insert an inline pattern before ImageReferencePattern + FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah + md.inlinePatterns.add( + "footnote", FootnotePattern(FOOTNOTE_RE, self), "<reference" + ) + # Insert a tree-processor that would actually add the footnote div + # This must be before all other treeprocessors (i.e., inline and + # codehilite) so they can run on the the contents of the div. + md.treeprocessors.add( + "footnote", FootnoteTreeprocessor(self), "_begin" + ) + + # Insert a tree-processor that will run after inline is done. + # In this tree-processor we want to check our duplicate footnote tracker + # And add additional backrefs to the footnote pointing back to the + # duplicated references. + md.treeprocessors.add( + "footnote-duplicate", FootnotePostTreeprocessor(self), '>inline' + ) + + # Insert a postprocessor after amp_substitute oricessor + md.postprocessors.add( + "footnote", FootnotePostprocessor(self), ">amp_substitute" + ) + + def reset(self): + """ Clear footnotes on reset, and prepare for distinct document. """ + self.footnotes = OrderedDict() + self.unique_prefix += 1 + self.found_refs = {} + self.used_refs = set() + + def unique_ref(self, reference, found=False): + """ Get a unique reference if there are duplicates. """ + if not found: + return reference + + original_ref = reference + while reference in self.used_refs: + ref, rest = reference.split(self.get_separator(), 1) + m = RE_REF_ID.match(ref) + if m: + reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest) + else: + reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest) + + self.used_refs.add(reference) + if original_ref in self.found_refs: + self.found_refs[original_ref] += 1 + else: + self.found_refs[original_ref] = 1 + return reference + + def findFootnotesPlaceholder(self, root): + """ Return ElementTree Element that contains Footnote placeholder. """ + def finder(element): + for child in element: + if child.text: + if child.text.find(self.getConfig("PLACE_MARKER")) > -1: + return child, element, True + if child.tail: + if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: + return child, element, False + child_res = finder(child) + if child_res is not None: + return child_res + return None + + res = finder(root) + return res + + def setFootnote(self, id, text): + """ Store a footnote for later retrieval. """ + self.footnotes[id] = text + + def get_separator(self): + if self.md.output_format in ['html5', 'xhtml5']: + return '-' + return ':' + + def makeFootnoteId(self, id): + """ Return footnote link id. """ + if self.getConfig("UNIQUE_IDS"): + return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) + else: + return 'fn%s%s' % (self.get_separator(), id) + + def makeFootnoteRefId(self, id, found=False): + """ Return footnote back-link id. """ + if self.getConfig("UNIQUE_IDS"): + return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) + else: + return self.unique_ref('fnref%s%s' % (self.get_separator(), id), found) + + def makeFootnotesDiv(self, root): + """ Return div of footnotes as et Element. """ + + if not list(self.footnotes.keys()): + return None + + div = util.etree.Element("div") + div.set('class', 'footnote') + util.etree.SubElement(div, "hr") + ol = util.etree.SubElement(div, "ol") + surrogate_parent = util.etree.Element("div") + + for id in self.footnotes.keys(): + li = util.etree.SubElement(ol, "li") + li.set("id", self.makeFootnoteId(id)) + # Parse footnote with surrogate parent as li cannot be used. + # List block handlers have special logic to deal with li. + # When we are done parsing, we will copy everything over to li. + self.parser.parseChunk(surrogate_parent, self.footnotes[id]) + for el in list(surrogate_parent): + li.append(el) + surrogate_parent.remove(el) + backlink = util.etree.Element("a") + backlink.set("href", "#" + self.makeFootnoteRefId(id)) + if self.md.output_format not in ['html5', 'xhtml5']: + backlink.set("rev", "footnote") # Invalid in HTML5 + backlink.set("class", "footnote-backref") + backlink.set( + "title", + "Jump back to footnote %d in the text" % + (self.footnotes.index(id)+1) + ) + backlink.text = FN_BACKLINK_TEXT + + if li.getchildren(): + node = li[-1] + if node.tag == "p": + node.text = node.text + NBSP_PLACEHOLDER + node.append(backlink) + else: + p = util.etree.SubElement(li, "p") + p.append(backlink) + return div + + +class FootnotePreprocessor(Preprocessor): + """ Find all footnote references and store for later use. """ + + def __init__(self, footnotes): + self.footnotes = footnotes + + def run(self, lines): + """ + Loop through lines and find, set, and remove footnote definitions. + + Keywords: + + * lines: A list of lines of text + + Return: A list of lines of text with footnote definitions removed. + + """ + newlines = [] + i = 0 + while True: + m = DEF_RE.match(lines[i]) + if m: + fn, _i = self.detectTabbed(lines[i+1:]) + fn.insert(0, m.group(2)) + i += _i-1 # skip past footnote + self.footnotes.setFootnote(m.group(1), "\n".join(fn)) + else: + newlines.append(lines[i]) + if len(lines) > i+1: + i += 1 + else: + break + return newlines + + def detectTabbed(self, lines): + """ Find indented text and remove indent before further proccesing. + + Keyword arguments: + + * lines: an array of strings + + Returns: a list of post processed items and the index of last line. + + """ + items = [] + blank_line = False # have we encountered a blank line yet? + i = 0 # to keep track of where we are + + def detab(line): + match = TABBED_RE.match(line) + if match: + return match.group(4) + + for line in lines: + if line.strip(): # Non-blank line + detabbed_line = detab(line) + if detabbed_line: + items.append(detabbed_line) + i += 1 + continue + elif not blank_line and not DEF_RE.match(line): + # not tabbed but still part of first par. + items.append(line) + i += 1 + continue + else: + return items, i+1 + + else: # Blank line: _maybe_ we are done. + blank_line = True + i += 1 # advance + + # Find the next non-blank line + for j in range(i, len(lines)): + if lines[j].strip(): + next_line = lines[j] + break + else: + break # There is no more text; we are done. + + # Check if the next non-blank line is tabbed + if detab(next_line): # Yes, more work to do. + items.append("") + continue + else: + break # No, we are done. + else: + i += 1 + + return items, i + + +class FootnotePattern(Pattern): + """ InlinePattern for footnote markers in a document's body text. """ + + def __init__(self, pattern, footnotes): + super(FootnotePattern, self).__init__(pattern) + self.footnotes = footnotes + + def handleMatch(self, m): + id = m.group(2) + if id in self.footnotes.footnotes.keys(): + sup = util.etree.Element("sup") + a = util.etree.SubElement(sup, "a") + sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) + a.set('href', '#' + self.footnotes.makeFootnoteId(id)) + if self.footnotes.md.output_format not in ['html5', 'xhtml5']: + a.set('rel', 'footnote') # invalid in HTML5 + a.set('class', 'footnote-ref') + a.text = util.text_type(self.footnotes.footnotes.index(id) + 1) + return sup + else: + return None + + +class FootnotePostTreeprocessor(Treeprocessor): + """ Ammend footnote div with duplicates. """ + + def __init__(self, footnotes): + self.footnotes = footnotes + + def add_duplicates(self, li, duplicates): + """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """ + for link in li.iter('a'): + # Find the link that needs to be duplicated. + if link.attrib.get('class', '') == 'footnote-backref': + ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1) + # Duplicate link the number of times we need to + # and point the to the appropriate references. + links = [] + for index in range(2, duplicates + 1): + sib_link = copy.deepcopy(link) + sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest) + links.append(sib_link) + self.offset += 1 + # Add all the new duplicate links. + el = list(li)[-1] + for l in links: + el.append(l) + break + + def get_num_duplicates(self, li): + """ Get the number of duplicate refs of the footnote. """ + fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) + link_id = '%sref%s%s' % (fn, self.footnotes.get_separator(), rest) + return self.footnotes.found_refs.get(link_id, 0) + + def handle_duplicates(self, parent): + """ Find duplicate footnotes and format and add the duplicates. """ + for li in list(parent): + # Check number of duplicates footnotes and insert + # additional links if needed. + count = self.get_num_duplicates(li) + if count > 1: + self.add_duplicates(li, count) + + def run(self, root): + """ Crawl the footnote div and add missing duplicate footnotes. """ + self.offset = 0 + for div in root.iter('div'): + if div.attrib.get('class', '') == 'footnote': + # Footnotes shoul be under the first orderd list under + # the footnote div. So once we find it, quit. + for ol in div.iter('ol'): + self.handle_duplicates(ol) + break + + +class FootnoteTreeprocessor(Treeprocessor): + """ Build and append footnote div to end of document. """ + + def __init__(self, footnotes): + self.footnotes = footnotes + + def run(self, root): + footnotesDiv = self.footnotes.makeFootnotesDiv(root) + if footnotesDiv is not None: + result = self.footnotes.findFootnotesPlaceholder(root) + if result: + child, parent, isText = result + ind = parent.getchildren().index(child) + if isText: + parent.remove(child) + parent.insert(ind, footnotesDiv) + else: + parent.insert(ind + 1, footnotesDiv) + child.tail = None + else: + root.append(footnotesDiv) + + +class FootnotePostprocessor(Postprocessor): + """ Replace placeholders with html entities. """ + def __init__(self, footnotes): + self.footnotes = footnotes + + def run(self, text): + text = text.replace( + FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") + ) + return text.replace(NBSP_PLACEHOLDER, " ") + + +def makeExtension(*args, **kwargs): + """ Return an instance of the FootnoteExtension """ + return FootnoteExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/headerid.py b/source-builder/sb/markdown/extensions/headerid.py new file mode 100644 index 0000000..2cb20b9 --- /dev/null +++ b/source-builder/sb/markdown/extensions/headerid.py @@ -0,0 +1,97 @@ +""" +HeaderID Extension for Python-Markdown +====================================== + +Auto-generate id attributes for HTML headers. + +See <https://pythonhosted.org/Markdown/extensions/header_id.html> +for documentation. + +Original code Copyright 2007-2011 [Waylan Limberg](http://achinghead.com/). + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import parseBoolValue +from .toc import slugify, unique, stashedHTML2text +import warnings + + +class HeaderIdTreeprocessor(Treeprocessor): + """ Assign IDs to headers. """ + + IDs = set() + + def run(self, doc): + start_level, force_id = self._get_meta() + slugify = self.config['slugify'] + sep = self.config['separator'] + for elem in doc: + if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: + if force_id: + if "id" in elem.attrib: + id = elem.get('id') + else: + id = stashedHTML2text(''.join(elem.itertext()), self.md) + id = slugify(id, sep) + elem.set('id', unique(id, self.IDs)) + if start_level: + level = int(elem.tag[-1]) + start_level + if level > 6: + level = 6 + elem.tag = 'h%d' % level + + def _get_meta(self): + """ Return meta data suported by this ext as a tuple """ + level = int(self.config['level']) - 1 + force = parseBoolValue(self.config['forceid']) + if hasattr(self.md, 'Meta'): + if 'header_level' in self.md.Meta: + level = int(self.md.Meta['header_level'][0]) - 1 + if 'header_forceid' in self.md.Meta: + force = parseBoolValue(self.md.Meta['header_forceid'][0]) + return level, force + + +class HeaderIdExtension(Extension): + def __init__(self, *args, **kwargs): + # set defaults + self.config = { + 'level': ['1', 'Base level for headers.'], + 'forceid': ['True', 'Force all headers to have an id.'], + 'separator': ['-', 'Word separator.'], + 'slugify': [slugify, 'Callable to generate anchors'] + } + + super(HeaderIdExtension, self).__init__(*args, **kwargs) + + warnings.warn( + 'The HeaderId Extension is pending deprecation. Use the TOC Extension instead.', + PendingDeprecationWarning + ) + + def extendMarkdown(self, md, md_globals): + md.registerExtension(self) + self.processor = HeaderIdTreeprocessor() + self.processor.md = md + self.processor.config = self.getConfigs() + if 'attr_list' in md.treeprocessors.keys(): + # insert after attr_list treeprocessor + md.treeprocessors.add('headerid', self.processor, '>attr_list') + else: + # insert after 'prettify' treeprocessor. + md.treeprocessors.add('headerid', self.processor, '>prettify') + + def reset(self): + self.processor.IDs = set() + + +def makeExtension(*args, **kwargs): + return HeaderIdExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/meta.py b/source-builder/sb/markdown/extensions/meta.py new file mode 100644 index 0000000..711235e --- /dev/null +++ b/source-builder/sb/markdown/extensions/meta.py @@ -0,0 +1,78 @@ +""" +Meta Data Extension for Python-Markdown +======================================= + +This extension adds Meta Data handling to markdown. + +See <https://pythonhosted.org/Markdown/extensions/meta_data.html> +for documentation. + +Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com). + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +import re +import logging + +log = logging.getLogger('MARKDOWN') + +# Global Vars +META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)') +META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)') +BEGIN_RE = re.compile(r'^-{3}(\s.*)?') +END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?') + + +class MetaExtension (Extension): + """ Meta-Data extension for Python-Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Add MetaPreprocessor to Markdown instance. """ + md.preprocessors.add("meta", + MetaPreprocessor(md), + ">normalize_whitespace") + + +class MetaPreprocessor(Preprocessor): + """ Get Meta-Data. """ + + def run(self, lines): + """ Parse Meta-Data and store in Markdown.Meta. """ + meta = {} + key = None + if lines and BEGIN_RE.match(lines[0]): + lines.pop(0) + while lines: + line = lines.pop(0) + m1 = META_RE.match(line) + if line.strip() == '' or END_RE.match(line): + break # blank line or end of YAML header - done + if m1: + key = m1.group('key').lower().strip() + value = m1.group('value').strip() + try: + meta[key].append(value) + except KeyError: + meta[key] = [value] + else: + m2 = META_MORE_RE.match(line) + if m2 and key: + # Add another line to existing key + meta[key].append(m2.group('value').strip()) + else: + lines.insert(0, line) + break # no meta data - done + self.markdown.Meta = meta + return lines + + +def makeExtension(*args, **kwargs): + return MetaExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/nl2br.py b/source-builder/sb/markdown/extensions/nl2br.py new file mode 100644 index 0000000..8acd60c --- /dev/null +++ b/source-builder/sb/markdown/extensions/nl2br.py @@ -0,0 +1,35 @@ +""" +NL2BR Extension +=============== + +A Python-Markdown extension to treat newlines as hard breaks; like +GitHub-flavored Markdown does. + +See <https://pythonhosted.org/Markdown/extensions/nl2br.html> +for documentation. + +Oringinal code Copyright 2011 [Brian Neal](http://deathofagremmie.com/) + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import SubstituteTagPattern + +BR_RE = r'\n' + + +class Nl2BrExtension(Extension): + + def extendMarkdown(self, md, md_globals): + br_tag = SubstituteTagPattern(BR_RE, 'br') + md.inlinePatterns.add('nl', br_tag, '_end') + + +def makeExtension(*args, **kwargs): + return Nl2BrExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/sane_lists.py b/source-builder/sb/markdown/extensions/sane_lists.py new file mode 100644 index 0000000..828ae7a --- /dev/null +++ b/source-builder/sb/markdown/extensions/sane_lists.py @@ -0,0 +1,55 @@ +""" +Sane List Extension for Python-Markdown +======================================= + +Modify the behavior of Lists in Python-Markdown to act in a sane manor. + +See <https://pythonhosted.org/Markdown/extensions/sane_lists.html> +for documentation. + +Original code Copyright 2011 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import OListProcessor, UListProcessor +import re + + +class SaneOListProcessor(OListProcessor): + + SIBLING_TAGS = ['ol'] + + def __init__(self, parser): + super(SaneOListProcessor, self).__init__(parser) + self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.))[ ]+(.*)' % + (self.tab_length - 1)) + + +class SaneUListProcessor(UListProcessor): + + SIBLING_TAGS = ['ul'] + + def __init__(self, parser): + super(SaneUListProcessor, self).__init__(parser) + self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % + (self.tab_length - 1)) + + +class SaneListExtension(Extension): + """ Add sane lists to Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Override existing Processors. """ + md.parser.blockprocessors['olist'] = SaneOListProcessor(md.parser) + md.parser.blockprocessors['ulist'] = SaneUListProcessor(md.parser) + + +def makeExtension(*args, **kwargs): + return SaneListExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/smart_strong.py b/source-builder/sb/markdown/extensions/smart_strong.py new file mode 100644 index 0000000..58570bb --- /dev/null +++ b/source-builder/sb/markdown/extensions/smart_strong.py @@ -0,0 +1,41 @@ +''' +Smart_Strong Extension for Python-Markdown +========================================== + +This extention adds smarter handling of double underscores within words. + +See <https://pythonhosted.org/Markdown/extensions/smart_strong.html> +for documentation. + +Original code Copyright 2011 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +''' + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import SimpleTagPattern + +SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)' +STRONG_RE = r'(\*{2})(.+?)\2' + + +class SmartEmphasisExtension(Extension): + """ Add smart_emphasis extension to Markdown class.""" + + def extendMarkdown(self, md, md_globals): + """ Modify inline patterns. """ + md.inlinePatterns['strong'] = SimpleTagPattern(STRONG_RE, 'strong') + md.inlinePatterns.add( + 'strong2', + SimpleTagPattern(SMART_STRONG_RE, 'strong'), + '>emphasis2' + ) + + +def makeExtension(*args, **kwargs): + return SmartEmphasisExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/smarty.py b/source-builder/sb/markdown/extensions/smarty.py new file mode 100644 index 0000000..600d74c --- /dev/null +++ b/source-builder/sb/markdown/extensions/smarty.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +''' +Smarty extension for Python-Markdown +==================================== + +Adds conversion of ASCII dashes, quotes and ellipses to their HTML +entity equivalents. + +See <https://pythonhosted.org/Markdown/extensions/smarty.html> +for documentation. + +Author: 2013, Dmitry Shachnev <mitya57@gmail.com> + +All changes Copyright 2013-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +SmartyPants license: + + Copyright (c) 2003 John Gruber <http://daringfireball.net/> + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + +smartypants.py license: + + smartypants.py is a derivative work of SmartyPants. + Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + +''' + + +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import HtmlPattern, HTML_RE +from ..odict import OrderedDict +from ..treeprocessors import InlineProcessor + + +# Constants for quote education. +punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" +endOfWordClass = r"[\s.,;:!?)]" +closeClass = "[^\ \t\r\n\[\{\(\-\u0002\u0003]" + +openingQuotesBase = ( + '(\s' # a whitespace char + '| ' # or a non-breaking space entity + '|--' # or dashes + '|–|—' # or unicode + '|&[mn]dash;' # or named dash entities + '|–|—' # or decimal entities + ')' +) + +substitutions = { + 'mdash': '—', + 'ndash': '–', + 'ellipsis': '…', + 'left-angle-quote': '«', + 'right-angle-quote': '»', + 'left-single-quote': '‘', + 'right-single-quote': '’', + 'left-double-quote': '“', + 'right-double-quote': '”', +} + + +# Special case if the very first character is a quote +# followed by punctuation at a non-word-break. Close the quotes by brute force: +singleQuoteStartRe = r"^'(?=%s\B)" % punctClass +doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass + +# Special case for double sets of quotes, e.g.: +# <p>He said, "'Quoted' words in a larger quote."</p> +doubleQuoteSetsRe = r""""'(?=\w)""" +singleQuoteSetsRe = r"""'"(?=\w)""" + +# Special case for decade abbreviations (the '80s): +decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" + +# Get most opening double quotes: +openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase + +# Double closing quotes: +closingDoubleQuotesRegex = r'"(?=\s)' +closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass + +# Get most opening single quotes: +openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase + +# Single closing quotes: +closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass +closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass + +# All remaining quotes should be opening ones +remainingSingleQuotesRegex = "'" +remainingDoubleQuotesRegex = '"' + +HTML_STRICT_RE = HTML_RE + r'(?!\>)' + + +class SubstituteTextPattern(HtmlPattern): + def __init__(self, pattern, replace, markdown_instance): + """ Replaces matches with some text. """ + HtmlPattern.__init__(self, pattern) + self.replace = replace + self.markdown = markdown_instance + + def handleMatch(self, m): + result = '' + for part in self.replace: + if isinstance(part, int): + result += m.group(part) + else: + result += self.markdown.htmlStash.store(part, safe=True) + return result + + +class SmartyExtension(Extension): + def __init__(self, *args, **kwargs): + self.config = { + 'smart_quotes': [True, 'Educate quotes'], + 'smart_angled_quotes': [False, 'Educate angled quotes'], + 'smart_dashes': [True, 'Educate dashes'], + 'smart_ellipses': [True, 'Educate ellipses'], + 'substitutions': [{}, 'Overwrite default substitutions'], + } + super(SmartyExtension, self).__init__(*args, **kwargs) + self.substitutions = dict(substitutions) + self.substitutions.update(self.getConfig('substitutions', default={})) + + def _addPatterns(self, md, patterns, serie): + for ind, pattern in enumerate(patterns): + pattern += (md,) + pattern = SubstituteTextPattern(*pattern) + after = ('>smarty-%s-%d' % (serie, ind - 1) if ind else '_begin') + name = 'smarty-%s-%d' % (serie, ind) + self.inlinePatterns.add(name, pattern, after) + + def educateDashes(self, md): + emDashesPattern = SubstituteTextPattern( + r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md + ) + enDashesPattern = SubstituteTextPattern( + r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md + ) + self.inlinePatterns.add('smarty-em-dashes', emDashesPattern, '_begin') + self.inlinePatterns.add( + 'smarty-en-dashes', enDashesPattern, '>smarty-em-dashes' + ) + + def educateEllipses(self, md): + ellipsesPattern = SubstituteTextPattern( + r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md + ) + self.inlinePatterns.add('smarty-ellipses', ellipsesPattern, '_begin') + + def educateAngledQuotes(self, md): + leftAngledQuotePattern = SubstituteTextPattern( + r'\<\<', (self.substitutions['left-angle-quote'],), md + ) + rightAngledQuotePattern = SubstituteTextPattern( + r'\>\>', (self.substitutions['right-angle-quote'],), md + ) + self.inlinePatterns.add( + 'smarty-left-angle-quotes', leftAngledQuotePattern, '_begin' + ) + self.inlinePatterns.add( + 'smarty-right-angle-quotes', + rightAngledQuotePattern, + '>smarty-left-angle-quotes' + ) + + def educateQuotes(self, md): + lsquo = self.substitutions['left-single-quote'] + rsquo = self.substitutions['right-single-quote'] + ldquo = self.substitutions['left-double-quote'] + rdquo = self.substitutions['right-double-quote'] + patterns = ( + (singleQuoteStartRe, (rsquo,)), + (doubleQuoteStartRe, (rdquo,)), + (doubleQuoteSetsRe, (ldquo + lsquo,)), + (singleQuoteSetsRe, (lsquo + ldquo,)), + (decadeAbbrRe, (rsquo,)), + (openingSingleQuotesRegex, (2, lsquo)), + (closingSingleQuotesRegex, (rsquo,)), + (closingSingleQuotesRegex2, (rsquo, 2)), + (remainingSingleQuotesRegex, (lsquo,)), + (openingDoubleQuotesRegex, (2, ldquo)), + (closingDoubleQuotesRegex, (rdquo,)), + (closingDoubleQuotesRegex2, (rdquo,)), + (remainingDoubleQuotesRegex, (ldquo,)) + ) + self._addPatterns(md, patterns, 'quotes') + + def extendMarkdown(self, md, md_globals): + configs = self.getConfigs() + self.inlinePatterns = OrderedDict() + if configs['smart_ellipses']: + self.educateEllipses(md) + if configs['smart_quotes']: + self.educateQuotes(md) + if configs['smart_angled_quotes']: + self.educateAngledQuotes(md) + # Override HTML_RE from inlinepatterns.py so that it does not + # process tags with duplicate closing quotes. + md.inlinePatterns["html"] = HtmlPattern(HTML_STRICT_RE, md) + if configs['smart_dashes']: + self.educateDashes(md) + inlineProcessor = InlineProcessor(md) + inlineProcessor.inlinePatterns = self.inlinePatterns + md.treeprocessors.add('smarty', inlineProcessor, '_end') + md.ESCAPED_CHARS.extend(['"', "'"]) + + +def makeExtension(*args, **kwargs): + return SmartyExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/tables.py b/source-builder/sb/markdown/extensions/tables.py new file mode 100644 index 0000000..ebe6ffa --- /dev/null +++ b/source-builder/sb/markdown/extensions/tables.py @@ -0,0 +1,196 @@ +""" +Tables Extension for Python-Markdown +==================================== + +Added parsing of tables to Python-Markdown. + +See <https://pythonhosted.org/Markdown/extensions/tables.html> +for documentation. + +Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor +from ..util import etree +import re + + +class TableProcessor(BlockProcessor): + """ Process Tables. """ + + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') + RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') + + def __init__(self, parser): + self.border = False + self.separator = '' + super(TableProcessor, self).__init__(parser) + + def test(self, parent, block): + """ + Ensure first two rows (column header and separator row) are valid table rows. + + Keep border check and separator row do avoid repeating the work. + """ + is_table = False + header = [row.strip() for row in block.split('\n')[0:2]] + if len(header) == 2: + self.border = header[0].startswith('|') + row = self._split_row(header[0]) + is_table = len(row) > 1 + + if is_table: + row = self._split_row(header[1]) + is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ') + if is_table: + self.separator = row + return is_table + + def run(self, parent, blocks): + """ Parse a table block and build table. """ + block = blocks.pop(0).split('\n') + header = block[0].strip() + rows = [] if len(block) < 3 else block[2:] + + # Get alignment of columns + align = [] + for c in self.separator: + c = c.strip() + if c.startswith(':') and c.endswith(':'): + align.append('center') + elif c.startswith(':'): + align.append('left') + elif c.endswith(':'): + align.append('right') + else: + align.append(None) + + # Build table + table = etree.SubElement(parent, 'table') + thead = etree.SubElement(table, 'thead') + self._build_row(header, thead, align) + tbody = etree.SubElement(table, 'tbody') + for row in rows: + self._build_row(row.strip(), tbody, align) + + def _build_row(self, row, parent, align): + """ Given a row of text, build table cells. """ + tr = etree.SubElement(parent, 'tr') + tag = 'td' + if parent.tag == 'thead': + tag = 'th' + cells = self._split_row(row) + # We use align here rather than cells to ensure every row + # contains the same number of columns. + for i, a in enumerate(align): + c = etree.SubElement(tr, tag) + try: + c.text = cells[i].strip() + except IndexError: # pragma: no cover + c.text = "" + if a: + c.set('align', a) + + def _split_row(self, row): + """ split a row of text into list of cells. """ + if self.border: + if row.startswith('|'): + row = row[1:] + row = self.RE_END_BORDER.sub('', row) + return self._split(row) + + def _split(self, row): + """ split a row of text with some code into a list of cells. """ + elements = [] + pipes = [] + tics = [] + tic_points = [] + tic_region = [] + good_pipes = [] + + # Parse row + # Throw out \\, and \| + for m in self.RE_CODE_PIPES.finditer(row): + # Store ` data (len, start_pos, end_pos) + if m.group(2): + # \`+ + # Store length of each tic group: subtract \ + tics.append(len(m.group(2)) - 1) + # Store start of group, end of group, and escape length + tic_points.append((m.start(2), m.end(2) - 1, 1)) + elif m.group(3): + # `+ + # Store length of each tic group + tics.append(len(m.group(3))) + # Store start of group, end of group, and escape length + tic_points.append((m.start(3), m.end(3) - 1, 0)) + # Store pipe location + elif m.group(5): + pipes.append(m.start(5)) + + # Pair up tics according to size if possible + # Subtract the escape length *only* from the opening. + # Walk through tic list and see if tic has a close. + # Store the tic region (start of region, end of region). + pos = 0 + tic_len = len(tics) + while pos < tic_len: + try: + tic_size = tics[pos] - tic_points[pos][2] + if tic_size == 0: + raise ValueError + index = tics[pos + 1:].index(tic_size) + 1 + tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) + pos += index + 1 + except ValueError: + pos += 1 + + # Resolve pipes. Check if they are within a tic pair region. + # Walk through pipes comparing them to each region. + # - If pipe position is less that a region, it isn't in a region + # - If it is within a region, we don't want it, so throw it out + # - If we didn't throw it out, it must be a table pipe + for pipe in pipes: + throw_out = False + for region in tic_region: + if pipe < region[0]: + # Pipe is not in a region + break + elif region[0] <= pipe <= region[1]: + # Pipe is within a code region. Throw it out. + throw_out = True + break + if not throw_out: + good_pipes.append(pipe) + + # Split row according to table delimeters. + pos = 0 + for pipe in good_pipes: + elements.append(row[pos:pipe]) + pos = pipe + 1 + elements.append(row[pos:]) + return elements + + +class TableExtension(Extension): + """ Add tables to Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Add an instance of TableProcessor to BlockParser. """ + if '|' not in md.ESCAPED_CHARS: + md.ESCAPED_CHARS.append('|') + md.parser.blockprocessors.add('table', + TableProcessor(md.parser), + '<hashheader') + + +def makeExtension(*args, **kwargs): + return TableExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/toc.py b/source-builder/sb/markdown/extensions/toc.py new file mode 100644 index 0000000..56db33c --- /dev/null +++ b/source-builder/sb/markdown/extensions/toc.py @@ -0,0 +1,310 @@ +""" +Table of Contents Extension for Python-Markdown +=============================================== + +See <https://pythonhosted.org/Markdown/extensions/toc.html> +for documentation. + +Oringinal code Copyright 2008 [Jack Miller](http://codezen.org) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, string_type +import re +import unicodedata + + +def slugify(value, separator): + """ Slugify a string, to make it URL friendly. """ + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() + return re.sub('[%s\s]+' % separator, separator, value) + + +IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') + + +def unique(id, ids): + """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ + while id in ids or not id: + m = IDCOUNT_RE.match(id) + if m: + id = '%s_%d' % (m.group(1), int(m.group(2))+1) + else: + id = '%s_%d' % (id, 1) + ids.add(id) + return id + + +def stashedHTML2text(text, md): + """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): # pragma: no cover + return m.group(0) + if md.safeMode and not safe: # pragma: no cover + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) + + +def nest_toc_tokens(toc_list): + """Given an unsorted list with errors and skips, return a nested one. + [{'level': 1}, {'level': 2}] + => + [{'level': 1, 'children': [{'level': 2, 'children': []}]}] + + A wrong list is also converted: + [{'level': 2}, {'level': 1}] + => + [{'level': 2, 'children': []}, {'level': 1, 'children': []}] + """ + + ordered_list = [] + if len(toc_list): + # Initialize everything by processing the first entry + last = toc_list.pop(0) + last['children'] = [] + levels = [last['level']] + ordered_list.append(last) + parents = [] + + # Walk the rest nesting the entries properly + while toc_list: + t = toc_list.pop(0) + current_level = t['level'] + t['children'] = [] + + # Reduce depth if current level < last item's level + if current_level < levels[-1]: + # Pop last level since we know we are less than it + levels.pop() + + # Pop parents and levels we are less than or equal to + to_pop = 0 + for p in reversed(parents): + if current_level <= p['level']: + to_pop += 1 + else: # pragma: no cover + break + if to_pop: + levels = levels[:-to_pop] + parents = parents[:-to_pop] + + # Note current level as last + levels.append(current_level) + + # Level is the same, so append to + # the current parent (if available) + if current_level == levels[-1]: + (parents[-1]['children'] if parents + else ordered_list).append(t) + + # Current level is > last item's level, + # So make last item a parent and append current as child + else: + last['children'].append(t) + parents.append(last) + levels.append(current_level) + last = t + + return ordered_list + + +class TocTreeprocessor(Treeprocessor): + def __init__(self, md, config): + super(TocTreeprocessor, self).__init__(md) + + self.marker = config["marker"] + self.title = config["title"] + self.base_level = int(config["baselevel"]) - 1 + self.slugify = config["slugify"] + self.sep = config["separator"] + self.use_anchors = parseBoolValue(config["anchorlink"]) + self.use_permalinks = parseBoolValue(config["permalink"], False) + if self.use_permalinks is None: + self.use_permalinks = config["permalink"] + + self.header_rgx = re.compile("[Hh][123456]") + + def iterparent(self, root): + ''' Iterator wrapper to get parent and child all at once. ''' + for parent in root.iter(): + for child in parent: + yield parent, child + + def replace_marker(self, root, elem): + ''' Replace marker with elem. ''' + for (p, c) in self.iterparent(root): + text = ''.join(c.itertext()).strip() + if not text: + continue + + # To keep the output from screwing up the + # validation by putting a <div> inside of a <p> + # we actually replace the <p> in its entirety. + # We do not allow the marker inside a header as that + # would causes an enless loop of placing a new TOC + # inside previously generated TOC. + if c.text and c.text.strip() == self.marker and \ + not self.header_rgx.match(c.tag) and c.tag not in ['pre', 'code']: + for i in range(len(p)): + if p[i] == c: + p[i] = elem + break + + def set_level(self, elem): + ''' Adjust header level according to base level. ''' + level = int(elem.tag[-1]) + self.base_level + if level > 6: + level = 6 + elem.tag = 'h%d' % level + + def add_anchor(self, c, elem_id): # @ReservedAssignment + anchor = etree.Element("a") + anchor.text = c.text + anchor.attrib["href"] = "#" + elem_id + anchor.attrib["class"] = "toclink" + c.text = "" + for elem in c: + anchor.append(elem) + while c: + c.remove(c[0]) + c.append(anchor) + + def add_permalink(self, c, elem_id): + permalink = etree.Element("a") + permalink.text = ("%spara;" % AMP_SUBSTITUTE + if self.use_permalinks is True + else self.use_permalinks) + permalink.attrib["href"] = "#" + elem_id + permalink.attrib["class"] = "headerlink" + permalink.attrib["title"] = "Permanent link" + c.append(permalink) + + def build_toc_div(self, toc_list): + """ Return a string div given a toc list. """ + div = etree.Element("div") + div.attrib["class"] = "toc" + + # Add title to the div + if self.title: + header = etree.SubElement(div, "span") + header.attrib["class"] = "toctitle" + header.text = self.title + + def build_etree_ul(toc_list, parent): + ul = etree.SubElement(parent, "ul") + for item in toc_list: + # List item link, to be inserted into the toc div + li = etree.SubElement(ul, "li") + link = etree.SubElement(li, "a") + link.text = item.get('name', '') + link.attrib["href"] = '#' + item.get('id', '') + if item['children']: + build_etree_ul(item['children'], li) + return ul + + build_etree_ul(toc_list, div) + prettify = self.markdown.treeprocessors.get('prettify') + if prettify: + prettify.run(div) + return div + + def run(self, doc): + # Get a list of id attributes + used_ids = set() + for el in doc.iter(): + if "id" in el.attrib: + used_ids.add(el.attrib["id"]) + + toc_tokens = [] + for el in doc.iter(): + if isinstance(el.tag, string_type) and self.header_rgx.match(el.tag): + self.set_level(el) + text = ''.join(el.itertext()).strip() + + # Do not override pre-existing ids + if "id" not in el.attrib: + innertext = stashedHTML2text(text, self.markdown) + el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids) + + toc_tokens.append({ + 'level': int(el.tag[-1]), + 'id': el.attrib["id"], + 'name': text + }) + + if self.use_anchors: + self.add_anchor(el, el.attrib["id"]) + if self.use_permalinks: + self.add_permalink(el, el.attrib["id"]) + + div = self.build_toc_div(nest_toc_tokens(toc_tokens)) + if self.marker: + self.replace_marker(doc, div) + + # serialize and attach to markdown instance. + toc = self.markdown.serializer(div) + for pp in self.markdown.postprocessors.values(): + toc = pp.run(toc) + self.markdown.toc = toc + + +class TocExtension(Extension): + + TreeProcessorClass = TocTreeprocessor + + def __init__(self, *args, **kwargs): + self.config = { + "marker": ['[TOC]', + 'Text to find and replace with Table of Contents - ' + 'Set to an empty string to disable. Defaults to "[TOC]"'], + "title": ["", + "Title to insert into TOC <div> - " + "Defaults to an empty string"], + "anchorlink": [False, + "True if header should be a self link - " + "Defaults to False"], + "permalink": [0, + "True or link text if a Sphinx-style permalink should " + "be added - Defaults to False"], + "baselevel": ['1', 'Base level for headers.'], + "slugify": [slugify, + "Function to generate anchors based on header text - " + "Defaults to the headerid ext's slugify function."], + 'separator': ['-', 'Word separator. Defaults to "-".'] + } + + super(TocExtension, self).__init__(*args, **kwargs) + + def extendMarkdown(self, md, md_globals): + md.registerExtension(self) + self.md = md + self.reset() + tocext = self.TreeProcessorClass(md, self.getConfigs()) + # Headerid ext is set to '>prettify'. With this set to '_end', + # it should always come after headerid ext (and honor ids assinged + # by the header id extension) if both are used. Same goes for + # attr_list extension. This must come last because we don't want + # to redefine ids after toc is created. But we do want toc prettified. + md.treeprocessors.add("toc", tocext, "_end") + + def reset(self): + self.md.toc = '' + + +def makeExtension(*args, **kwargs): + return TocExtension(*args, **kwargs) diff --git a/source-builder/sb/markdown/extensions/wikilinks.py b/source-builder/sb/markdown/extensions/wikilinks.py new file mode 100644 index 0000000..94e1b67 --- /dev/null +++ b/source-builder/sb/markdown/extensions/wikilinks.py @@ -0,0 +1,89 @@ +''' +WikiLinks Extension for Python-Markdown +====================================== + +Converts [[WikiLinks]] to relative links. + +See <https://pythonhosted.org/Markdown/extensions/wikilinks.html> +for documentation. + +Original code Copyright [Waylan Limberg](http://achinghead.com/). + +All changes Copyright The Python Markdown Project + +License: [BSD](http://www.opensource.org/licenses/bsd-license.php) + +''' + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import Pattern +from ..util import etree +import re + + +def build_url(label, base, end): + """ Build a url from the label, a base, and an end. """ + clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label) + return '%s%s%s' % (base, clean_label, end) + + +class WikiLinkExtension(Extension): + + def __init__(self, *args, **kwargs): + self.config = { + 'base_url': ['/', 'String to append to beginning or URL.'], + 'end_url': ['/', 'String to append to end of URL.'], + 'html_class': ['wikilink', 'CSS hook. Leave blank for none.'], + 'build_url': [build_url, 'Callable formats URL from label.'], + } + + super(WikiLinkExtension, self).__init__(*args, **kwargs) + + def extendMarkdown(self, md, md_globals): + self.md = md + + # append to end of inline patterns + WIKILINK_RE = r'\[\[([\w0-9_ -]+)\]\]' + wikilinkPattern = WikiLinks(WIKILINK_RE, self.getConfigs()) + wikilinkPattern.md = md + md.inlinePatterns.add('wikilink', wikilinkPattern, "<not_strong") + + +class WikiLinks(Pattern): + def __init__(self, pattern, config): + super(WikiLinks, self).__init__(pattern) + self.config = config + + def handleMatch(self, m): + if m.group(2).strip(): + base_url, end_url, html_class = self._getMeta() + label = m.group(2).strip() + url = self.config['build_url'](label, base_url, end_url) + a = etree.Element('a') + a.text = label + a.set('href', url) + if html_class: + a.set('class', html_class) + else: + a = '' + return a + + def _getMeta(self): + """ Return meta data or config data. """ + base_url = self.config['base_url'] + end_url = self.config['end_url'] + html_class = self.config['html_class'] + if hasattr(self.md, 'Meta'): + if 'wiki_base_url' in self.md.Meta: + base_url = self.md.Meta['wiki_base_url'][0] + if 'wiki_end_url' in self.md.Meta: + end_url = self.md.Meta['wiki_end_url'][0] + if 'wiki_html_class' in self.md.Meta: + html_class = self.md.Meta['wiki_html_class'][0] + return base_url, end_url, html_class + + +def makeExtension(*args, **kwargs): + return WikiLinkExtension(*args, **kwargs) |