diff options
Diffstat (limited to 'source-builder/sb/markdown/postprocessors.py')
-rw-r--r-- | source-builder/sb/markdown/postprocessors.py | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/postprocessors.py b/source-builder/sb/markdown/postprocessors.py new file mode 100644 index 0000000..8b311b2 --- /dev/null +++ b/source-builder/sb/markdown/postprocessors.py @@ -0,0 +1,111 @@ +""" +POST-PROCESSORS +============================================================================= + +Markdown also allows post-processors, which are similar to preprocessors in +that they need to implement a "run" method. However, they are run after core +processing. + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from collections import OrderedDict +from . import util +from . import odict +import re + + +def build_postprocessors(md_instance, **kwargs): + """ Build the default postprocessors for Markdown. """ + postprocessors = odict.OrderedDict() + postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) + postprocessors["amp_substitute"] = AndSubstitutePostprocessor() + postprocessors["unescape"] = UnescapePostprocessor() + return postprocessors + + +class Postprocessor(util.Processor): + """ + Postprocessors are run after the ElementTree it converted back into text. + + Each Postprocessor implements a "run" method that takes a pointer to a + text string, modifies it as necessary and returns a text string. + + Postprocessors must extend markdown.Postprocessor. + + """ + + def run(self, text): + """ + Subclasses of Postprocessor should implement a `run` method, which + takes the html document as a single text string and returns a + (possibly modified) string. + + """ + pass # pragma: no cover + + +class RawHtmlPostprocessor(Postprocessor): + """ Restore raw html to the document. """ + + def run(self, text): + """ Iterate over html stash and restore "safe" html. """ + replacements = OrderedDict() + for i in range(self.markdown.htmlStash.html_counter): + html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] + if self.markdown.safeMode and not safe: + if str(self.markdown.safeMode).lower() == 'escape': + html = self.escape(html) + elif str(self.markdown.safeMode).lower() == 'remove': + html = '' + else: + html = self.markdown.html_replacement_text + if (self.isblocklevel(html) and + (safe or not self.markdown.safeMode)): + replacements["<p>%s</p>" % + (self.markdown.htmlStash.get_placeholder(i))] = \ + html + "\n" + replacements[self.markdown.htmlStash.get_placeholder(i)] = html + + if replacements: + pattern = re.compile("|".join(re.escape(k) for k in replacements)) + text = pattern.sub(lambda m: replacements[m.group(0)], text) + + return text + + def escape(self, html): + """ Basic html escaping """ + html = html.replace('&', '&') + html = html.replace('<', '<') + html = html.replace('>', '>') + return html.replace('"', '"') + + def isblocklevel(self, html): + m = re.match(r'^\<\/?([^ >]+)', html) + if m: + if m.group(1)[0] in ('!', '?', '@', '%'): + # Comment, php etc... + return True + return util.isBlockLevel(m.group(1)) + return False + + +class AndSubstitutePostprocessor(Postprocessor): + """ Restore valid entities """ + + def run(self, text): + text = text.replace(util.AMP_SUBSTITUTE, "&") + return text + + +class UnescapePostprocessor(Postprocessor): + """ Restore escaped chars """ + + RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) + + def unescape(self, m): + return util.int2str(int(m.group(1))) + + def run(self, text): + return self.RE.sub(self.unescape, text) |