summaryrefslogtreecommitdiffstats
path: root/source-builder/sb/markdown/serializers.py
diff options
context:
space:
mode:
Diffstat (limited to 'source-builder/sb/markdown/serializers.py')
-rw-r--r--source-builder/sb/markdown/serializers.py282
1 files changed, 282 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/serializers.py b/source-builder/sb/markdown/serializers.py
new file mode 100644
index 0000000..1e8d9dd
--- /dev/null
+++ b/source-builder/sb/markdown/serializers.py
@@ -0,0 +1,282 @@
+# markdown/searializers.py
+#
+# Add x/html serialization to Elementree
+# Taken from ElementTree 1.3 preview with slight modifications
+#
+# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2007 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+ElementTree = util.etree.ElementTree
+QName = util.etree.QName
+if hasattr(util.etree, 'test_comment'): # pragma: no cover
+ Comment = util.etree.test_comment
+else: # pragma: no cover
+ Comment = util.etree.Comment
+PI = util.etree.PI
+ProcessingInstruction = util.etree.ProcessingInstruction
+
+__all__ = ['to_html_string', 'to_xhtml_string']
+
+HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
+ "img", "input", "isindex", "link", "meta" "param")
+
+try:
+ HTML_EMPTY = set(HTML_EMPTY)
+except NameError: # pragma: no cover
+ pass
+
+_namespace_map = {
+ # "well-known" namespace prefixes
+ "http://www.w3.org/XML/1998/namespace": "xml",
+ "http://www.w3.org/1999/xhtml": "html",
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
+ "http://schemas.xmlsoap.org/wsdl/": "wsdl",
+ # xml schema
+ "http://www.w3.org/2001/XMLSchema": "xs",
+ "http://www.w3.org/2001/XMLSchema-instance": "xsi",
+ # dublic core
+ "http://purl.org/dc/elements/1.1/": "dc",
+}
+
+
+def _raise_serialization_error(text): # pragma: no cover
+ raise TypeError(
+ "cannot serialize %r (type %s)" % (text, type(text).__name__)
+ )
+
+
+def _encode(text, encoding):
+ try:
+ return text.encode(encoding, "xmlcharrefreplace")
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _escape_cdata(text):
+ # escape character data
+ try:
+ # it's worth avoiding do-nothing calls for strings that are
+ # shorter than 500 character, or so. assume that's, by far,
+ # the most common case in most applications.
+ if "&" in text:
+ text = text.replace("&", "&")
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _escape_attrib(text):
+ # escape attribute value
+ try:
+ if "&" in text:
+ text = text.replace("&", "&amp;")
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ if "\"" in text:
+ text = text.replace("\"", "&quot;")
+ if "\n" in text:
+ text = text.replace("\n", "&#10;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _escape_attrib_html(text):
+ # escape attribute value
+ try:
+ if "&" in text:
+ text = text.replace("&", "&amp;")
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ if "\"" in text:
+ text = text.replace("\"", "&quot;")
+ return text
+ except (TypeError, AttributeError): # pragma: no cover
+ _raise_serialization_error(text)
+
+
+def _serialize_html(write, elem, qnames, namespaces, format):
+ tag = elem.tag
+ text = elem.text
+ if tag is Comment:
+ write("<!--%s-->" % _escape_cdata(text))
+ elif tag is ProcessingInstruction:
+ write("<?%s?>" % _escape_cdata(text))
+ else:
+ tag = qnames[tag]
+ if tag is None:
+ if text:
+ write(_escape_cdata(text))
+ for e in elem:
+ _serialize_html(write, e, qnames, None, format)
+ else:
+ write("<" + tag)
+ items = elem.items()
+ if items or namespaces:
+ items = sorted(items) # lexical order
+ for k, v in items:
+ if isinstance(k, QName):
+ k = k.text
+ if isinstance(v, QName):
+ v = qnames[v.text]
+ else:
+ v = _escape_attrib_html(v)
+ if qnames[k] == v and format == 'html':
+ # handle boolean attributes
+ write(" %s" % v)
+ else:
+ write(" %s=\"%s\"" % (qnames[k], v))
+ if namespaces:
+ items = namespaces.items()
+ items.sort(key=lambda x: x[1]) # sort on prefix
+ for v, k in items:
+ if k:
+ k = ":" + k
+ write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
+ if format == "xhtml" and tag.lower() in HTML_EMPTY:
+ write(" />")
+ else:
+ write(">")
+ if text:
+ if tag.lower() in ["script", "style"]:
+ write(text)
+ else:
+ write(_escape_cdata(text))
+ for e in elem:
+ _serialize_html(write, e, qnames, None, format)
+ if tag.lower() not in HTML_EMPTY:
+ write("</" + tag + ">")
+ if elem.tail:
+ write(_escape_cdata(elem.tail))
+
+
+def _write_html(root,
+ encoding=None,
+ default_namespace=None,
+ format="html"):
+ assert root is not None
+ data = []
+ write = data.append
+ qnames, namespaces = _namespaces(root, default_namespace)
+ _serialize_html(write, root, qnames, namespaces, format)
+ if encoding is None:
+ return "".join(data)
+ else:
+ return _encode("".join(data))
+
+
+# --------------------------------------------------------------------
+# serialization support
+
+def _namespaces(elem, default_namespace=None):
+ # identify namespaces used in this tree
+
+ # maps qnames to *encoded* prefix:local names
+ qnames = {None: None}
+
+ # maps uri:s to prefixes
+ namespaces = {}
+ if default_namespace:
+ namespaces[default_namespace] = ""
+
+ def add_qname(qname):
+ # calculate serialized qname representation
+ try:
+ if qname[:1] == "{":
+ uri, tag = qname[1:].split("}", 1)
+ prefix = namespaces.get(uri)
+ if prefix is None:
+ prefix = _namespace_map.get(uri)
+ if prefix is None:
+ prefix = "ns%d" % len(namespaces)
+ if prefix != "xml":
+ namespaces[uri] = prefix
+ if prefix:
+ qnames[qname] = "%s:%s" % (prefix, tag)
+ else:
+ qnames[qname] = tag # default element
+ else:
+ if default_namespace:
+ raise ValueError(
+ "cannot use non-qualified names with "
+ "default_namespace option"
+ )
+ qnames[qname] = qname
+ except TypeError: # pragma: no cover
+ _raise_serialization_error(qname)
+
+ # populate qname and namespaces table
+ try:
+ iterate = elem.iter
+ except AttributeError:
+ iterate = elem.getiterator # cET compatibility
+ for elem in iterate():
+ tag = elem.tag
+ if isinstance(tag, QName) and tag.text not in qnames:
+ add_qname(tag.text)
+ elif isinstance(tag, util.string_type):
+ if tag not in qnames:
+ add_qname(tag)
+ elif tag is not None and tag is not Comment and tag is not PI:
+ _raise_serialization_error(tag)
+ for key, value in elem.items():
+ if isinstance(key, QName):
+ key = key.text
+ if key not in qnames:
+ add_qname(key)
+ if isinstance(value, QName) and value.text not in qnames:
+ add_qname(value.text)
+ text = elem.text
+ if isinstance(text, QName) and text.text not in qnames:
+ add_qname(text.text)
+ return qnames, namespaces
+
+
+def to_html_string(element):
+ return _write_html(ElementTree(element).getroot(), format="html")
+
+
+def to_xhtml_string(element):
+ return _write_html(ElementTree(element).getroot(), format="xhtml")