diff options
Diffstat (limited to 'source-builder/sb/markdown/serializers.py')
-rw-r--r-- | source-builder/sb/markdown/serializers.py | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/source-builder/sb/markdown/serializers.py b/source-builder/sb/markdown/serializers.py new file mode 100644 index 0000000..1e8d9dd --- /dev/null +++ b/source-builder/sb/markdown/serializers.py @@ -0,0 +1,282 @@ +# markdown/searializers.py +# +# Add x/html serialization to Elementree +# Taken from ElementTree 1.3 preview with slight modifications +# +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2007 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import util +ElementTree = util.etree.ElementTree +QName = util.etree.QName +if hasattr(util.etree, 'test_comment'): # pragma: no cover + Comment = util.etree.test_comment +else: # pragma: no cover + Comment = util.etree.Comment +PI = util.etree.PI +ProcessingInstruction = util.etree.ProcessingInstruction + +__all__ = ['to_html_string', 'to_xhtml_string'] + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta" "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: # pragma: no cover + pass + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublic core + "http://purl.org/dc/elements/1.1/": "dc", +} + + +def _raise_serialization_error(text): # pragma: no cover + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + + +def _encode(text, encoding): + try: + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_cdata(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib_html(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _serialize_html(write, elem, qnames, namespaces, format): + tag = elem.tag + text = elem.text + if tag is Comment: + write("<!--%s-->" % _escape_cdata(text)) + elif tag is ProcessingInstruction: + write("<?%s?>" % _escape_cdata(text)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None, format) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + items = sorted(items) # lexical order + for k, v in items: + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v) + if qnames[k] == v and format == 'html': + # handle boolean attributes + write(" %s" % v) + else: + write(" %s=\"%s\"" % (qnames[k], v)) + if namespaces: + items = namespaces.items() + items.sort(key=lambda x: x[1]) # sort on prefix + for v, k in items: + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) + if format == "xhtml" and tag.lower() in HTML_EMPTY: + write(" />") + else: + write(">") + if text: + if tag.lower() in ["script", "style"]: + write(text) + else: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None, format) + if tag.lower() not in HTML_EMPTY: + write("</" + tag + ">") + if elem.tail: + write(_escape_cdata(elem.tail)) + + +def _write_html(root, + encoding=None, + default_namespace=None, + format="html"): + assert root is not None + data = [] + write = data.append + qnames, namespaces = _namespaces(root, default_namespace) + _serialize_html(write, root, qnames, namespaces, format) + if encoding is None: + return "".join(data) + else: + return _encode("".join(data)) + + +# -------------------------------------------------------------------- +# serialization support + +def _namespaces(elem, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].split("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = "%s:%s" % (prefix, tag) + else: + qnames[qname] = tag # default element + else: + if default_namespace: + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = qname + except TypeError: # pragma: no cover + _raise_serialization_error(qname) + + # populate qname and namespaces table + try: + iterate = elem.iter + except AttributeError: + iterate = elem.getiterator # cET compatibility + for elem in iterate(): + tag = elem.tag + if isinstance(tag, QName) and tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, util.string_type): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + + +def to_html_string(element): + return _write_html(ElementTree(element).getroot(), format="html") + + +def to_xhtml_string(element): + return _write_html(ElementTree(element).getroot(), format="xhtml") |