summaryrefslogtreecommitdiffstats
path: root/common/sphinxcontrib/bibtex/cache.py
diff options
context:
space:
mode:
Diffstat (limited to 'common/sphinxcontrib/bibtex/cache.py')
-rw-r--r--common/sphinxcontrib/bibtex/cache.py406
1 files changed, 406 insertions, 0 deletions
diff --git a/common/sphinxcontrib/bibtex/cache.py b/common/sphinxcontrib/bibtex/cache.py
new file mode 100644
index 0000000..aa9064f
--- /dev/null
+++ b/common/sphinxcontrib/bibtex/cache.py
@@ -0,0 +1,406 @@
+# -*- coding: utf-8 -*-
+"""
+ Cached Information
+ ~~~~~~~~~~~~~~~~~~
+
+ Classes and methods to maintain any information that is stored
+ outside the doctree.
+
+ .. autoclass:: Cache
+ :members:
+
+ .. autoclass:: BibfileCache
+ :members:
+
+ .. autoclass:: BibliographyCache
+ :members:
+"""
+
+import six
+try: # pragma: no cover
+ from collections import OrderedDict
+except ImportError: # pragma: no cover
+ from ordereddict import OrderedDict
+import ast
+import collections
+import copy
+from oset import oset
+import re
+
+
+def _raise_invalid_node(node):
+ """Helper method to raise an exception when an invalid node is
+ visited.
+ """
+ raise ValueError("invalid node %s in filter expression" % node)
+
+
+class _FilterVisitor(ast.NodeVisitor):
+
+ """Visit the abstract syntax tree of a parsed filter expression."""
+
+ entry = None
+ """The bibliographic entry to which the filter must be applied."""
+
+ cited_docnames = False
+ """The documents where the entry is cited (empty if not cited)."""
+
+ def __init__(self, entry, docname, cited_docnames):
+ self.entry = entry
+ self.docname = docname
+ self.cited_docnames = cited_docnames
+
+ def visit_Module(self, node):
+ if len(node.body) != 1:
+ raise ValueError(
+ "filter expression cannot contain multiple expressions")
+ return self.visit(node.body[0])
+
+ def visit_Expr(self, node):
+ return self.visit(node.value)
+
+ def visit_BoolOp(self, node):
+ outcomes = (self.visit(value) for value in node.values)
+ if isinstance(node.op, ast.And):
+ return all(outcomes)
+ elif isinstance(node.op, ast.Or):
+ return any(outcomes)
+ else: # pragma: no cover
+ # there are no other boolean operators
+ # so this code should never execute
+ assert False, "unexpected boolean operator %s" % node.op
+
+ def visit_UnaryOp(self, node):
+ if isinstance(node.op, ast.Not):
+ return not self.visit(node.operand)
+ else:
+ _raise_invalid_node(node)
+
+ def visit_BinOp(self, node):
+ left = self.visit(node.left)
+ op = node.op
+ right = self.visit(node.right)
+ if isinstance(op, ast.Mod):
+ # modulo operator is used for regular expression matching
+ if not isinstance(left, six.string_types):
+ raise ValueError(
+ "expected a string on left side of %s" % node.op)
+ if not isinstance(right, six.string_types):
+ raise ValueError(
+ "expected a string on right side of %s" % node.op)
+ return re.search(right, left, re.IGNORECASE)
+ elif isinstance(op, ast.BitOr):
+ return left | right
+ elif isinstance(op, ast.BitAnd):
+ return left & right
+ else:
+ _raise_invalid_node(node)
+
+ def visit_Compare(self, node):
+ # keep it simple: binary comparators only
+ if len(node.ops) != 1:
+ raise ValueError("syntax for multiple comparators not supported")
+ left = self.visit(node.left)
+ op = node.ops[0]
+ right = self.visit(node.comparators[0])
+ if isinstance(op, ast.Eq):
+ return left == right
+ elif isinstance(op, ast.NotEq):
+ return left != right
+ elif isinstance(op, ast.Lt):
+ return left < right
+ elif isinstance(op, ast.LtE):
+ return left <= right
+ elif isinstance(op, ast.Gt):
+ return left > right
+ elif isinstance(op, ast.GtE):
+ return left >= right
+ elif isinstance(op, ast.In):
+ return left in right
+ elif isinstance(op, ast.NotIn):
+ return left not in right
+ else:
+ # not used currently: ast.Is | ast.IsNot
+ _raise_invalid_node(op)
+
+ def visit_Name(self, node):
+ """Calculate the value of the given identifier."""
+ id_ = node.id
+ if id_ == 'type':
+ return self.entry.type.lower()
+ elif id_ == 'key':
+ return self.entry.key.lower()
+ elif id_ == 'cited':
+ return bool(self.cited_docnames)
+ elif id_ == 'docname':
+ return self.docname
+ elif id_ == 'docnames':
+ return self.cited_docnames
+ elif id_ == 'True':
+ return True
+ elif id_ == 'False':
+ return False
+ elif id_ == 'author' or id_ == 'editor':
+ if id_ in self.entry.persons:
+ return u' and '.join(
+ six.text_type(person) # XXX needs fix in pybtex?
+ for person in self.entry.persons[id_])
+ else:
+ return u''
+ else:
+ return self.entry.fields.get(id_, "")
+
+ def visit_Set(self, node):
+ return frozenset(self.visit(elt) for elt in node.elts)
+
+ def visit_Str(self, node):
+ return node.s
+
+ # NameConstant is Python 3.4 only so do not insist on coverage
+ def visit_NameConstant(self, node): # pragma: no cover
+ return node.value
+
+ def generic_visit(self, node):
+ _raise_invalid_node(node)
+
+
+class Cache:
+
+ """Global bibtex extension information cache. Stored in
+ ``app.env.bibtex_cache``, so must be picklable.
+ """
+
+ bibfiles = None
+ """A :class:`dict` mapping .bib file names (relative to the top
+ source folder) to :class:`BibfileCache` instances.
+ """
+
+ _bibliographies = None
+ """Each bibliography directive is assigned an id of the form
+ bibtex-bibliography-xxx. This :class:`dict` maps each docname
+ to another :class:`dict` which maps each id
+ to information about the bibliography directive,
+ :class:`BibliographyCache`. We need to store this extra
+ information separately because it cannot be stored in the
+ :class:`~sphinxcontrib.bibtex.nodes.bibliography` nodes
+ themselves.
+ """
+
+ _cited = None
+ """A :class:`dict` mapping each docname to a :class:`set` of
+ citation keys.
+ """
+
+ _enum_count = None
+ """A :class:`dict` mapping each docname to an :class:`int`
+ representing the current bibliography enumeration counter.
+ """
+
+ def __init__(self):
+
+ self.bibfiles = {}
+ self._bibliographies = collections.defaultdict(dict)
+ self._cited = collections.defaultdict(oset)
+ self._enum_count = {}
+
+ def purge(self, docname):
+ """Remove all information related to *docname*.
+
+ :param docname: The document name.
+ :type docname: :class:`str`
+ """
+ self._bibliographies.pop(docname, None)
+ self._cited.pop(docname, None)
+ self._enum_count.pop(docname, None)
+
+ def inc_enum_count(self, docname):
+ """Increment enumeration list counter for document *docname*."""
+ self._enum_count[docname] += 1
+
+ def set_enum_count(self, docname, value):
+ """Set enumeration list counter for document *docname* to *value*."""
+ self._enum_count[docname] = value
+
+ def get_enum_count(self, docname):
+ """Get enumeration list counter for document *docname*."""
+ return self._enum_count[docname]
+
+ def add_cited(self, key, docname):
+ """Add the given *key* to the set of cited keys for
+ *docname*.
+
+ :param key: The citation key.
+ :type key: :class:`str`
+ :param docname: The document name.
+ :type docname: :class:`str`
+ """
+ self._cited[docname].add(key)
+
+ def get_cited_docnames(self, key):
+ """Return the *docnames* from which the given *key* is cited.
+
+ :param key: The citation key.
+ :type key: :class:`str`
+ """
+ return frozenset([
+ docname for docname, keys in six.iteritems(self._cited)
+ if key in keys])
+
+ def get_label_from_key(self, key):
+ """Return label for the given key."""
+ for bibcache in self.get_all_bibliography_caches():
+ if key in bibcache.labels:
+ return bibcache.labels[key]
+ else:
+ raise KeyError("%s not found" % key)
+
+ def get_all_cited_keys(self):
+ """Yield all citation keys, sorted first by document
+ (alphabetical), then by citation order in the document.
+ """
+ for docname in sorted(self._cited):
+ for key in self._cited[docname]:
+ yield key
+
+ def set_bibliography_cache(self, docname, id_, bibcache):
+ """Register *bibcache* (:class:`BibliographyCache`)
+ with id *id_* for document *docname*.
+ """
+ assert id_ not in self._bibliographies[docname]
+ self._bibliographies[docname][id_] = bibcache
+
+ def get_bibliography_cache(self, docname, id_):
+ """Return :class:`BibliographyCache` with id *id_* in
+ document *docname*.
+ """
+ return self._bibliographies[docname][id_]
+
+ def get_all_bibliography_caches(self):
+ """Return all bibliography caches."""
+ for bibcaches in six.itervalues(self._bibliographies):
+ for bibcache in six.itervalues(bibcaches):
+ yield bibcache
+
+ def _get_bibliography_entries(self, docname, id_, warn):
+ """Return filtered bibliography entries, sorted by occurence
+ in the bib file.
+ """
+ # get the information of this bibliography node
+ bibcache = self.get_bibliography_cache(docname=docname, id_=id_)
+ # generate entries
+ for bibfile in bibcache.bibfiles:
+ data = self.bibfiles[bibfile].data
+ for entry in six.itervalues(data.entries):
+ # beware: the prefix is not stored in the data
+ # to allow reusing the data for multiple bibliographies
+ cited_docnames = self.get_cited_docnames(
+ bibcache.keyprefix + entry.key)
+ visitor = _FilterVisitor(
+ entry=entry,
+ docname=docname,
+ cited_docnames=cited_docnames)
+ try:
+ success = visitor.visit(bibcache.filter_)
+ except ValueError as err:
+ warn("syntax error in :filter: expression; %s" % err)
+ # recover by falling back to the default
+ success = bool(cited_docnames)
+ if success:
+ # entries are modified in an unpickable way
+ # when formatting, so fetch a deep copy
+ # and return this copy with prefixed key
+ # we do not deep copy entry.collection because that
+ # consumes enormous amounts of memory
+ entry.collection = None
+ entry2 = copy.deepcopy(entry)
+ entry2.key = bibcache.keyprefix + entry.key
+ entry2.collection = data
+ entry.collection = data
+ yield entry2
+
+ def get_bibliography_entries(self, docname, id_, warn):
+ """Return filtered bibliography entries, sorted by citation order."""
+ # get entries, ordered by bib file occurrence
+ entries = OrderedDict(
+ (entry.key, entry) for entry in
+ self._get_bibliography_entries(
+ docname=docname, id_=id_, warn=warn))
+ # order entries according to which were cited first
+ # first, we add all keys that were cited
+ # then, we add all remaining keys
+ sorted_entries = []
+ for key in self.get_all_cited_keys():
+ try:
+ entry = entries.pop(key)
+ except KeyError:
+ pass
+ else:
+ sorted_entries.append(entry)
+ sorted_entries += six.itervalues(entries)
+ return sorted_entries
+
+
+class BibfileCache(collections.namedtuple('BibfileCache', 'mtime data')):
+
+ """Contains information about a parsed .bib file.
+
+ .. attribute:: mtime
+
+ A :class:`float` representing the modification time of the .bib
+ file when it was last parsed.
+
+ .. attribute:: data
+
+ A :class:`pybtex.database.BibliographyData` containing the
+ parsed .bib file.
+
+ """
+
+
+class BibliographyCache(collections.namedtuple(
+ 'BibliographyCache',
+ """bibfiles style encoding
+list_ enumtype start labels labelprefix
+filter_ curly_bracket_strip keyprefix
+""")):
+
+ """Contains information about a bibliography directive.
+
+ .. attribute:: bibfiles
+
+ A :class:`list` of :class:`str`\\ s containing the .bib file
+ names (relative to the top source folder) that contain the
+ references.
+
+ .. attribute:: style
+
+ The bibtex style.
+
+ .. attribute:: list_
+
+ The list type.
+
+ .. attribute:: enumtype
+
+ The sequence type (only used for enumerated lists).
+
+ .. attribute:: start
+
+ The first ordinal of the sequence (only used for enumerated lists).
+
+ .. attribute:: labels
+
+ Maps citation keys to their final labels.
+
+ .. attribute:: labelprefix
+
+ This bibliography's string prefix for pybtex generated labels.
+
+ .. attribute:: keyprefix
+
+ This bibliography's string prefix for citation keys.
+
+ .. attribute:: filter_
+
+ An :class:`ast.AST` node, containing the parsed filter expression.
+ """