summaryrefslogtreecommitdiffstats
path: root/source-builder/sb/markdown/treeprocessors.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--source-builder/sb/markdown/treeprocessors.py371
1 files changed, 371 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/treeprocessors.py b/source-builder/sb/markdown/treeprocessors.py
new file mode 100644
index 0000000..bb76572
--- /dev/null
+++ b/source-builder/sb/markdown/treeprocessors.py
@@ -0,0 +1,371 @@
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+from . import odict
+from . import inlinepatterns
+
+
+def build_treeprocessors(md_instance, **kwargs):
+ """ Build the default treeprocessors for Markdown. """
+ treeprocessors = odict.OrderedDict()
+ treeprocessors["inline"] = InlineProcessor(md_instance)
+ treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
+ return treeprocessors
+
+
+def isString(s):
+ """ Check if it's string """
+ if not isinstance(s, util.AtomicString):
+ return isinstance(s, util.string_type)
+ return False
+
+
+class Treeprocessor(util.Processor):
+ """
+ Treeprocessors are run on the ElementTree object before serialization.
+
+ Each Treeprocessor implements a "run" method that takes a pointer to an
+ ElementTree, modifies it as necessary and returns an ElementTree
+ object.
+
+ Treeprocessors must extend markdown.Treeprocessor.
+
+ """
+ def run(self, root):
+ """
+ Subclasses of Treeprocessor should implement a `run` method, which
+ takes a root ElementTree. This method can return another ElementTree
+ object, and the existing root ElementTree will be replaced, or it can
+ modify the current tree and return None.
+ """
+ pass # pragma: no cover
+
+
+class InlineProcessor(Treeprocessor):
+ """
+ A Treeprocessor that traverses a tree, applying inline patterns.
+ """
+
+ def __init__(self, md):
+ self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+ self.__placeholder_suffix = util.ETX
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ + len(self.__placeholder_suffix)
+ self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
+ self.markdown = md
+ self.inlinePatterns = md.inlinePatterns
+
+ def __makePlaceholder(self, type):
+ """ Generate a placeholder """
+ id = "%04d" % len(self.stashed_nodes)
+ hash = util.INLINE_PLACEHOLDER % id
+ return hash, id
+
+ def __findPlaceholder(self, data, index):
+ """
+ Extract id from data string, start from index
+
+ Keyword arguments:
+
+ * data: string
+ * index: index, from which we start search
+
+ Returns: placeholder id and string index, after the found placeholder.
+
+ """
+ m = self.__placeholder_re.search(data, index)
+ if m:
+ return m.group(1), m.end()
+ else:
+ return None, index + 1
+
+ def __stashNode(self, node, type):
+ """ Add node to stash """
+ placeholder, id = self.__makePlaceholder(type)
+ self.stashed_nodes[id] = node
+ return placeholder
+
+ def __handleInline(self, data, patternIndex=0):
+ """
+ Process string with inline patterns and replace it
+ with placeholders
+
+ Keyword arguments:
+
+ * data: A line of Markdown text
+ * patternIndex: The index of the inlinePattern to start with
+
+ Returns: String with placeholders.
+
+ """
+ if not isinstance(data, util.AtomicString):
+ startIndex = 0
+ while patternIndex < len(self.inlinePatterns):
+ data, matched, startIndex = self.__applyPattern(
+ self.inlinePatterns.value_for_index(patternIndex),
+ data, patternIndex, startIndex)
+ if not matched:
+ patternIndex += 1
+ return data
+
+ def __processElementText(self, node, subnode, isText=True):
+ """
+ Process placeholders in Element.text or Element.tail
+ of Elements popped from self.stashed_nodes.
+
+ Keywords arguments:
+
+ * node: parent node
+ * subnode: processing node
+ * isText: bool variable, True - it's text, False - it's tail
+
+ Returns: None
+
+ """
+ if isText:
+ text = subnode.text
+ subnode.text = None
+ else:
+ text = subnode.tail
+ subnode.tail = None
+
+ childResult = self.__processPlaceholders(text, subnode, isText)
+
+ if not isText and node is not subnode:
+ pos = list(node).index(subnode) + 1
+ else:
+ pos = 0
+
+ childResult.reverse()
+ for newChild in childResult:
+ node.insert(pos, newChild)
+
+ def __processPlaceholders(self, data, parent, isText=True):
+ """
+ Process string with placeholders and generate ElementTree tree.
+
+ Keyword arguments:
+
+ * data: string with placeholders instead of ElementTree elements.
+ * parent: Element, which contains processing inline data
+
+ Returns: list with ElementTree elements with applied inline patterns.
+
+ """
+ def linkText(text):
+ if text:
+ if result:
+ if result[-1].tail:
+ result[-1].tail += text
+ else:
+ result[-1].tail = text
+ elif not isText:
+ if parent.tail:
+ parent.tail += text
+ else:
+ parent.tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
+ result = []
+ strartIndex = 0
+ while data:
+ index = data.find(self.__placeholder_prefix, strartIndex)
+ if index != -1:
+ id, phEndIndex = self.__findPlaceholder(data, index)
+
+ if id in self.stashed_nodes:
+ node = self.stashed_nodes.get(id)
+
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
+
+ if not isString(node): # it's Element
+ for child in [node] + list(node):
+ if child.tail:
+ if child.tail.strip():
+ self.__processElementText(
+ node, child, False
+ )
+ if child.text:
+ if child.text.strip():
+ self.__processElementText(child, child)
+ else: # it's just a string
+ linkText(node)
+ strartIndex = phEndIndex
+ continue
+
+ strartIndex = phEndIndex
+ result.append(node)
+
+ else: # wrong placeholder
+ end = index + len(self.__placeholder_prefix)
+ linkText(data[strartIndex:end])
+ strartIndex = end
+ else:
+ text = data[strartIndex:]
+ if isinstance(data, util.AtomicString):
+ # We don't want to loose the AtomicString
+ text = util.AtomicString(text)
+ linkText(text)
+ data = ""
+
+ return result
+
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
+ """
+ Check if the line fits the pattern, create the necessary
+ elements, add it to stashed_nodes.
+
+ Keyword arguments:
+
+ * data: the text to be processed
+ * pattern: the pattern to be checked
+ * patternIndex: index of current pattern
+ * startIndex: string index, from which we start searching
+
+ Returns: String with placeholders instead of ElementTree elements.
+
+ """
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
+
+ if not match:
+ return data, False, 0
+
+ node = pattern.handleMatch(match)
+
+ if node is None:
+ return data, True, len(leftData)+match.span(len(match.groups()))[0]
+
+ if not isString(node):
+ if not isinstance(node.text, util.AtomicString):
+ # We need to process current node too
+ for child in [node] + list(node):
+ if not isString(node):
+ if child.text:
+ child.text = self.__handleInline(
+ child.text, patternIndex + 1
+ )
+ if child.tail:
+ child.tail = self.__handleInline(
+ child.tail, patternIndex
+ )
+
+ placeholder = self.__stashNode(node, pattern.type())
+
+ return "%s%s%s%s" % (leftData,
+ match.group(1),
+ placeholder, match.groups()[-1]), True, 0
+
+ def run(self, tree):
+ """Apply inline patterns to a parsed Markdown tree.
+
+ Iterate over ElementTree, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. If you don't
+ want to process your data with inline paterns, instead of normal
+ string, use subclass AtomicString:
+
+ node.text = markdown.AtomicString("This will not be processed.")
+
+ Arguments:
+
+ * tree: ElementTree object, representing Markdown tree.
+
+ Returns: ElementTree object with applied inline patterns.
+
+ """
+ self.stashed_nodes = {}
+
+ stack = [tree]
+
+ while stack:
+ currElement = stack.pop()
+ insertQueue = []
+ for child in currElement:
+ if child.text and not isinstance(
+ child.text, util.AtomicString
+ ):
+ text = child.text
+ child.text = None
+ lst = self.__processPlaceholders(
+ self.__handleInline(text), child
+ )
+ stack += lst
+ insertQueue.append((child, lst))
+ if child.tail:
+ tail = self.__handleInline(child.tail)
+ dumby = util.etree.Element('d')
+ child.tail = None
+ tailResult = self.__processPlaceholders(tail, dumby, False)
+ if dumby.tail:
+ child.tail = dumby.tail
+ pos = list(currElement).index(child) + 1
+ tailResult.reverse()
+ for newChild in tailResult:
+ currElement.insert(pos, newChild)
+ if len(child):
+ stack.append(child)
+
+ for element, lst in insertQueue:
+ if self.markdown.enable_attributes:
+ if element.text and isString(element.text):
+ element.text = inlinepatterns.handleAttributes(
+ element.text, element
+ )
+ i = 0
+ for newChild in lst:
+ if self.markdown.enable_attributes:
+ # Processing attributes
+ if newChild.tail and isString(newChild.tail):
+ newChild.tail = inlinepatterns.handleAttributes(
+ newChild.tail, element
+ )
+ if newChild.text and isString(newChild.text):
+ newChild.text = inlinepatterns.handleAttributes(
+ newChild.text, newChild
+ )
+ element.insert(i, newChild)
+ i += 1
+ return tree
+
+
+class PrettifyTreeprocessor(Treeprocessor):
+ """ Add linebreaks to the html document. """
+
+ def _prettifyETree(self, elem):
+ """ Recursively add linebreaks to ElementTree children. """
+
+ i = "\n"
+ if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+ if (not elem.text or not elem.text.strip()) \
+ and len(elem) and util.isBlockLevel(elem[0].tag):
+ elem.text = i
+ for e in elem:
+ if util.isBlockLevel(e.tag):
+ self._prettifyETree(e)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+
+ def run(self, root):
+ """ Add linebreaks to ElementTree root object. """
+
+ self._prettifyETree(root)
+ # Do <br />'s seperately as they are often in the middle of
+ # inline content and missed by _prettifyETree.
+ brs = root.iter('br')
+ for br in brs:
+ if not br.tail or not br.tail.strip():
+ br.tail = '\n'
+ else:
+ br.tail = '\n%s' % br.tail
+ # Clean up extra empty lines at end of code blocks.
+ pres = root.iter('pre')
+ for pre in pres:
+ if len(pre) and pre[0].tag == 'code':
+ pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n')