1 files changed, 371 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/treeprocessors.py b/source-builder/sb/markdown/treeprocessors.py
new file mode 100644
index 0000000..bb76572
--- /dev/null
+++ b/source-builder/sb/markdown/treeprocessors.py
@@ -0,0 +1,371 @@
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+from . import odict
+from . import inlinepatterns
+
+
+def build_treeprocessors(md_instance, **kwargs):
+    """ Build the default treeprocessors for Markdown. """
+    treeprocessors = odict.OrderedDict()
+    treeprocessors["inline"] = InlineProcessor(md_instance)
+    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
+    return treeprocessors
+
+
+def isString(s):
+    """ Check if it's string """
+    if not isinstance(s, util.AtomicString):
+        return isinstance(s, util.string_type)
+    return False
+
+
+class Treeprocessor(util.Processor):
+    """
+    Treeprocessors are run on the ElementTree object before serialization.
+
+    Each Treeprocessor implements a "run" method that takes a pointer to an
+    ElementTree, modifies it as necessary and returns an ElementTree
+    object.
+
+    Treeprocessors must extend markdown.Treeprocessor.
+
+    """
+    def run(self, root):
+        """
+        Subclasses of Treeprocessor should implement a `run` method, which
+        takes a root ElementTree. This method can return another ElementTree
+        object, and the existing root ElementTree will be replaced, or it can
+        modify the current tree and return None.
+        """
+        pass  # pragma: no cover
+
+
+class InlineProcessor(Treeprocessor):
+    """
+    A Treeprocessor that traverses a tree, applying inline patterns.
+    """
+
+    def __init__(self, md):
+        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+        self.__placeholder_suffix = util.ETX
+        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+                                      + len(self.__placeholder_suffix)
+        self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
+        self.markdown = md
+        self.inlinePatterns = md.inlinePatterns
+
+    def __makePlaceholder(self, type):
+        """ Generate a placeholder """
+        id = "%04d" % len(self.stashed_nodes)
+        hash = util.INLINE_PLACEHOLDER % id
+        return hash, id
+
+    def __findPlaceholder(self, data, index):
+        """
+        Extract id from data string, start from index
+
+        Keyword arguments:
+
+        * data: string
+        * index: index, from which we start search
+
+        Returns: placeholder id and string index, after the found placeholder.
+
+        """
+        m = self.__placeholder_re.search(data, index)
+        if m:
+            return m.group(1), m.end()
+        else:
+            return None, index + 1
+
+    def __stashNode(self, node, type):
+        """ Add node to stash """
+        placeholder, id = self.__makePlaceholder(type)
+        self.stashed_nodes[id] = node
+        return placeholder
+
+    def __handleInline(self, data, patternIndex=0):
+        """
+        Process string with inline patterns and replace it
+        with placeholders
+
+        Keyword arguments:
+
+        * data: A line of Markdown text
+        * patternIndex: The index of the inlinePattern to start with
+
+        Returns: String with placeholders.
+
+        """
+        if not isinstance(data, util.AtomicString):
+            startIndex = 0
+            while patternIndex < len(self.inlinePatterns):
+                data, matched, startIndex = self.__applyPattern(
+                    self.inlinePatterns.value_for_index(patternIndex),
+                    data, patternIndex, startIndex)
+                if not matched:
+                    patternIndex += 1
+        return data
+
+    def __processElementText(self, node, subnode, isText=True):
+        """
+        Process placeholders in Element.text or Element.tail
+        of Elements popped from self.stashed_nodes.
+
+        Keywords arguments:
+
+        * node: parent node
+        * subnode: processing node
+        * isText: bool variable, True - it's text, False - it's tail
+
+        Returns: None
+
+        """
+        if isText:
+            text = subnode.text
+            subnode.text = None
+        else:
+            text = subnode.tail
+            subnode.tail = None
+
+        childResult = self.__processPlaceholders(text, subnode, isText)
+
+        if not isText and node is not subnode:
+            pos = list(node).index(subnode) + 1
+        else:
+            pos = 0
+
+        childResult.reverse()
+        for newChild in childResult:
+            node.insert(pos, newChild)
+
+    def __processPlaceholders(self, data, parent, isText=True):
+        """
+        Process string with placeholders and generate ElementTree tree.
+
+        Keyword arguments:
+
+        * data: string with placeholders instead of ElementTree elements.
+        * parent: Element, which contains processing inline data
+
+        Returns: list with ElementTree elements with applied inline patterns.
+
+        """
+        def linkText(text):
+            if text:
+                if result:
+                    if result[-1].tail:
+                        result[-1].tail += text
+                    else:
+                        result[-1].tail = text
+                elif not isText:
+                    if parent.tail:
+                        parent.tail += text
+                    else:
+                        parent.tail = text
+                else:
+                    if parent.text:
+                        parent.text += text
+                    else:
+                        parent.text = text
+        result = []
+        strartIndex = 0
+        while data:
+            index = data.find(self.__placeholder_prefix, strartIndex)
+            if index != -1:
+                id, phEndIndex = self.__findPlaceholder(data, index)
+
+                if id in self.stashed_nodes:
+                    node = self.stashed_nodes.get(id)
+
+                    if index > 0:
+                        text = data[strartIndex:index]
+                        linkText(text)
+
+                    if not isString(node):  # it's Element
+                        for child in [node] + list(node):
+                            if child.tail:
+                                if child.tail.strip():
+                                    self.__processElementText(
+                                        node, child, False
+                                    )
+                            if child.text:
+                                if child.text.strip():
+                                    self.__processElementText(child, child)
+                    else:  # it's just a string
+                        linkText(node)
+                        strartIndex = phEndIndex
+                        continue
+
+                    strartIndex = phEndIndex
+                    result.append(node)
+
+                else:  # wrong placeholder
+                    end = index + len(self.__placeholder_prefix)
+                    linkText(data[strartIndex:end])
+                    strartIndex = end
+            else:
+                text = data[strartIndex:]
+                if isinstance(data, util.AtomicString):
+                    # We don't want to loose the AtomicString
+                    text = util.AtomicString(text)
+                linkText(text)
+                data = ""
+
+        return result
+
+    def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
+        """
+        Check if the line fits the pattern, create the necessary
+        elements, add it to stashed_nodes.
+
+        Keyword arguments:
+
+        * data: the text to be processed
+        * pattern: the pattern to be checked
+        * patternIndex: index of current pattern
+        * startIndex: string index, from which we start searching
+
+        Returns: String with placeholders instead of ElementTree elements.
+
+        """
+        match = pattern.getCompiledRegExp().match(data[startIndex:])
+        leftData = data[:startIndex]
+
+        if not match:
+            return data, False, 0
+
+        node = pattern.handleMatch(match)
+
+        if node is None:
+            return data, True, len(leftData)+match.span(len(match.groups()))[0]
+
+        if not isString(node):
+            if not isinstance(node.text, util.AtomicString):
+                # We need to process current node too
+                for child in [node] + list(node):
+                    if not isString(node):
+                        if child.text:
+                            child.text = self.__handleInline(
+                                child.text, patternIndex + 1
+                            )
+                        if child.tail:
+                            child.tail = self.__handleInline(
+                                child.tail, patternIndex
+                            )
+
+        placeholder = self.__stashNode(node, pattern.type())
+
+        return "%s%s%s%s" % (leftData,
+                             match.group(1),
+                             placeholder, match.groups()[-1]), True, 0
+
+    def run(self, tree):
+        """Apply inline patterns to a parsed Markdown tree.
+
+        Iterate over ElementTree, find elements with inline tag, apply inline
+        patterns and append newly created Elements to tree.  If you don't
+        want to process your data with inline paterns, instead of normal
+        string, use subclass AtomicString:
+
+            node.text = markdown.AtomicString("This will not be processed.")
+
+        Arguments:
+
+        * tree: ElementTree object, representing Markdown tree.
+
+        Returns: ElementTree object with applied inline patterns.
+
+        """
+        self.stashed_nodes = {}
+
+        stack = [tree]
+
+        while stack:
+            currElement = stack.pop()
+            insertQueue = []
+            for child in currElement:
+                if child.text and not isinstance(
+                    child.text, util.AtomicString
+                ):
+                    text = child.text
+                    child.text = None
+                    lst = self.__processPlaceholders(
+                        self.__handleInline(text), child
+                    )
+                    stack += lst
+                    insertQueue.append((child, lst))
+                if child.tail:
+                    tail = self.__handleInline(child.tail)
+                    dumby = util.etree.Element('d')
+                    child.tail = None
+                    tailResult = self.__processPlaceholders(tail, dumby, False)
+                    if dumby.tail:
+                        child.tail = dumby.tail
+                    pos = list(currElement).index(child) + 1
+                    tailResult.reverse()
+                    for newChild in tailResult:
+                        currElement.insert(pos, newChild)
+                if len(child):
+                    stack.append(child)
+
+            for element, lst in insertQueue:
+                if self.markdown.enable_attributes:
+                    if element.text and isString(element.text):
+                        element.text = inlinepatterns.handleAttributes(
+                            element.text, element
+                        )
+                i = 0
+                for newChild in lst:
+                    if self.markdown.enable_attributes:
+                        # Processing attributes
+                        if newChild.tail and isString(newChild.tail):
+                            newChild.tail = inlinepatterns.handleAttributes(
+                                newChild.tail, element
+                            )
+                        if newChild.text and isString(newChild.text):
+                            newChild.text = inlinepatterns.handleAttributes(
+                                newChild.text, newChild
+                            )
+                    element.insert(i, newChild)
+                    i += 1
+        return tree
+
+
+class PrettifyTreeprocessor(Treeprocessor):
+    """ Add linebreaks to the html document. """
+
+    def _prettifyETree(self, elem):
+        """ Recursively add linebreaks to ElementTree children. """
+
+        i = "\n"
+        if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+            if (not elem.text or not elem.text.strip()) \
+                    and len(elem) and util.isBlockLevel(elem[0].tag):
+                elem.text = i
+            for e in elem:
+                if util.isBlockLevel(e.tag):
+                    self._prettifyETree(e)
+            if not elem.tail or not elem.tail.strip():
+                elem.tail = i
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+
+    def run(self, root):
+        """ Add linebreaks to ElementTree root object. """
+
+        self._prettifyETree(root)
+        # Do <br />'s seperately as they are often in the middle of
+        # inline content and missed by _prettifyETree.
+        brs = root.iter('br')
+        for br in brs:
+            if not br.tail or not br.tail.strip():
+                br.tail = '\n'
+            else:
+                br.tail = '\n%s' % br.tail
+        # Clean up extra empty lines at end of code blocks.
+        pres = root.iter('pre')
+        for pre in pres:
+            if len(pre) and pre[0].tag == 'code':
+                pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n')