summaryrefslogtreecommitdiffstats
path: root/common/sphinxcontrib/bibtex/cache.py
blob: aa9064f001b0b1b22511095d3020202de6176ffe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# -*- coding: utf-8 -*-
"""
    Cached Information
    ~~~~~~~~~~~~~~~~~~

    Classes and methods to maintain any information that is stored
    outside the doctree.

    .. autoclass:: Cache
        :members:

    .. autoclass:: BibfileCache
        :members:

    .. autoclass:: BibliographyCache
        :members:
"""

import six
try:                 # pragma: no cover
    from collections import OrderedDict
except ImportError:  # pragma: no cover
    from ordereddict import OrderedDict
import ast
import collections
import copy
from oset import oset
import re


def _raise_invalid_node(node):
    """Helper method to raise an exception when an invalid node is
    visited.
    """
    raise ValueError("invalid node %s in filter expression" % node)


class _FilterVisitor(ast.NodeVisitor):

    """Visit the abstract syntax tree of a parsed filter expression."""

    entry = None
    """The bibliographic entry to which the filter must be applied."""

    cited_docnames = False
    """The documents where the entry is cited (empty if not cited)."""

    def __init__(self, entry, docname, cited_docnames):
        self.entry = entry
        self.docname = docname
        self.cited_docnames = cited_docnames

    def visit_Module(self, node):
        if len(node.body) != 1:
            raise ValueError(
                "filter expression cannot contain multiple expressions")
        return self.visit(node.body[0])

    def visit_Expr(self, node):
        return self.visit(node.value)

    def visit_BoolOp(self, node):
        outcomes = (self.visit(value) for value in node.values)
        if isinstance(node.op, ast.And):
            return all(outcomes)
        elif isinstance(node.op, ast.Or):
            return any(outcomes)
        else:  # pragma: no cover
            # there are no other boolean operators
            # so this code should never execute
            assert False, "unexpected boolean operator %s" % node.op

    def visit_UnaryOp(self, node):
        if isinstance(node.op, ast.Not):
            return not self.visit(node.operand)
        else:
            _raise_invalid_node(node)

    def visit_BinOp(self, node):
        left = self.visit(node.left)
        op = node.op
        right = self.visit(node.right)
        if isinstance(op, ast.Mod):
            # modulo operator is used for regular expression matching
            if not isinstance(left, six.string_types):
                raise ValueError(
                    "expected a string on left side of %s" % node.op)
            if not isinstance(right, six.string_types):
                raise ValueError(
                    "expected a string on right side of %s" % node.op)
            return re.search(right, left, re.IGNORECASE)
        elif isinstance(op, ast.BitOr):
            return left | right
        elif isinstance(op, ast.BitAnd):
            return left & right
        else:
            _raise_invalid_node(node)

    def visit_Compare(self, node):
        # keep it simple: binary comparators only
        if len(node.ops) != 1:
            raise ValueError("syntax for multiple comparators not supported")
        left = self.visit(node.left)
        op = node.ops[0]
        right = self.visit(node.comparators[0])
        if isinstance(op, ast.Eq):
            return left == right
        elif isinstance(op, ast.NotEq):
            return left != right
        elif isinstance(op, ast.Lt):
            return left < right
        elif isinstance(op, ast.LtE):
            return left <= right
        elif isinstance(op, ast.Gt):
            return left > right
        elif isinstance(op, ast.GtE):
            return left >= right
        elif isinstance(op, ast.In):
            return left in right
        elif isinstance(op, ast.NotIn):
            return left not in right
        else:
            # not used currently: ast.Is | ast.IsNot
            _raise_invalid_node(op)

    def visit_Name(self, node):
        """Calculate the value of the given identifier."""
        id_ = node.id
        if id_ == 'type':
            return self.entry.type.lower()
        elif id_ == 'key':
            return self.entry.key.lower()
        elif id_ == 'cited':
            return bool(self.cited_docnames)
        elif id_ == 'docname':
            return self.docname
        elif id_ == 'docnames':
            return self.cited_docnames
        elif id_ == 'True':
            return True
        elif id_ == 'False':
            return False
        elif id_ == 'author' or id_ == 'editor':
            if id_ in self.entry.persons:
                return u' and '.join(
                    six.text_type(person)  # XXX needs fix in pybtex?
                    for person in self.entry.persons[id_])
            else:
                return u''
        else:
            return self.entry.fields.get(id_, "")

    def visit_Set(self, node):
        return frozenset(self.visit(elt) for elt in node.elts)

    def visit_Str(self, node):
        return node.s

    # NameConstant is Python 3.4 only so do not insist on coverage
    def visit_NameConstant(self, node):  # pragma: no cover
        return node.value

    def generic_visit(self, node):
        _raise_invalid_node(node)


class Cache:

    """Global bibtex extension information cache. Stored in
    ``app.env.bibtex_cache``, so must be picklable.
    """

    bibfiles = None
    """A :class:`dict` mapping .bib file names (relative to the top
    source folder) to :class:`BibfileCache` instances.
    """

    _bibliographies = None
    """Each bibliography directive is assigned an id of the form
    bibtex-bibliography-xxx. This :class:`dict` maps each docname
    to another :class:`dict` which maps each id
    to information about the bibliography directive,
    :class:`BibliographyCache`. We need to store this extra
    information separately because it cannot be stored in the
    :class:`~sphinxcontrib.bibtex.nodes.bibliography` nodes
    themselves.
    """

    _cited = None
    """A :class:`dict` mapping each docname to a :class:`set` of
    citation keys.
    """

    _enum_count = None
    """A :class:`dict` mapping each docname to an :class:`int`
    representing the current bibliography enumeration counter.
    """

    def __init__(self):

        self.bibfiles = {}
        self._bibliographies = collections.defaultdict(dict)
        self._cited = collections.defaultdict(oset)
        self._enum_count = {}

    def purge(self, docname):
        """Remove  all information related to *docname*.

        :param docname: The document name.
        :type docname: :class:`str`
        """
        self._bibliographies.pop(docname, None)
        self._cited.pop(docname, None)
        self._enum_count.pop(docname, None)

    def inc_enum_count(self, docname):
        """Increment enumeration list counter for document *docname*."""
        self._enum_count[docname] += 1

    def set_enum_count(self, docname, value):
        """Set enumeration list counter for document *docname* to *value*."""
        self._enum_count[docname] = value

    def get_enum_count(self, docname):
        """Get enumeration list counter for document *docname*."""
        return self._enum_count[docname]

    def add_cited(self, key, docname):
        """Add the given *key* to the set of cited keys for
        *docname*.

        :param key: The citation key.
        :type key: :class:`str`
        :param docname: The document name.
        :type docname: :class:`str`
        """
        self._cited[docname].add(key)

    def get_cited_docnames(self, key):
        """Return the *docnames* from which the given *key* is cited.

        :param key: The citation key.
        :type key: :class:`str`
        """
        return frozenset([
            docname for docname, keys in six.iteritems(self._cited)
            if key in keys])

    def get_label_from_key(self, key):
        """Return label for the given key."""
        for bibcache in self.get_all_bibliography_caches():
            if key in bibcache.labels:
                return bibcache.labels[key]
        else:
            raise KeyError("%s not found" % key)

    def get_all_cited_keys(self):
        """Yield all citation keys, sorted first by document
        (alphabetical), then by citation order in the document.
        """
        for docname in sorted(self._cited):
            for key in self._cited[docname]:
                yield key

    def set_bibliography_cache(self, docname, id_, bibcache):
        """Register *bibcache* (:class:`BibliographyCache`)
        with id *id_* for document *docname*.
        """
        assert id_ not in self._bibliographies[docname]
        self._bibliographies[docname][id_] = bibcache

    def get_bibliography_cache(self, docname, id_):
        """Return :class:`BibliographyCache` with id *id_* in
        document *docname*.
        """
        return self._bibliographies[docname][id_]

    def get_all_bibliography_caches(self):
        """Return all bibliography caches."""
        for bibcaches in six.itervalues(self._bibliographies):
            for bibcache in six.itervalues(bibcaches):
                yield bibcache

    def _get_bibliography_entries(self, docname, id_, warn):
        """Return filtered bibliography entries, sorted by occurence
        in the bib file.
        """
        # get the information of this bibliography node
        bibcache = self.get_bibliography_cache(docname=docname, id_=id_)
        # generate entries
        for bibfile in bibcache.bibfiles:
            data = self.bibfiles[bibfile].data
            for entry in six.itervalues(data.entries):
                # beware: the prefix is not stored in the data
                # to allow reusing the data for multiple bibliographies
                cited_docnames = self.get_cited_docnames(
                    bibcache.keyprefix + entry.key)
                visitor = _FilterVisitor(
                    entry=entry,
                    docname=docname,
                    cited_docnames=cited_docnames)
                try:
                    success = visitor.visit(bibcache.filter_)
                except ValueError as err:
                    warn("syntax error in :filter: expression; %s" % err)
                    # recover by falling back to the default
                    success = bool(cited_docnames)
                if success:
                    # entries are modified in an unpickable way
                    # when formatting, so fetch a deep copy
                    # and return this copy with prefixed key
                    # we do not deep copy entry.collection because that
                    # consumes enormous amounts of memory
                    entry.collection = None
                    entry2 = copy.deepcopy(entry)
                    entry2.key = bibcache.keyprefix + entry.key
                    entry2.collection = data
                    entry.collection = data
                    yield entry2

    def get_bibliography_entries(self, docname, id_, warn):
        """Return filtered bibliography entries, sorted by citation order."""
        # get entries, ordered by bib file occurrence
        entries = OrderedDict(
            (entry.key, entry) for entry in
            self._get_bibliography_entries(
                docname=docname, id_=id_, warn=warn))
        # order entries according to which were cited first
        # first, we add all keys that were cited
        # then, we add all remaining keys
        sorted_entries = []
        for key in self.get_all_cited_keys():
            try:
                entry = entries.pop(key)
            except KeyError:
                pass
            else:
                sorted_entries.append(entry)
        sorted_entries += six.itervalues(entries)
        return sorted_entries


class BibfileCache(collections.namedtuple('BibfileCache', 'mtime data')):

    """Contains information about a parsed .bib file.

    .. attribute:: mtime

        A :class:`float` representing the modification time of the .bib
        file when it was last parsed.

    .. attribute:: data

        A :class:`pybtex.database.BibliographyData` containing the
        parsed .bib file.

    """


class BibliographyCache(collections.namedtuple(
    'BibliographyCache',
    """bibfiles style encoding
list_ enumtype start labels labelprefix
filter_ curly_bracket_strip keyprefix
""")):

    """Contains information about a bibliography directive.

    .. attribute:: bibfiles

        A :class:`list` of :class:`str`\\ s containing the .bib file
        names (relative to the top source folder) that contain the
        references.

    .. attribute:: style

        The bibtex style.

    .. attribute:: list_

        The list type.

    .. attribute:: enumtype

        The sequence type (only used for enumerated lists).

    .. attribute:: start

        The first ordinal of the sequence (only used for enumerated lists).

    .. attribute:: labels

        Maps citation keys to their final labels.

    .. attribute:: labelprefix

        This bibliography's string prefix for pybtex generated labels.

    .. attribute:: keyprefix

        This bibliography's string prefix for citation keys.

    .. attribute:: filter_

        An :class:`ast.AST` node, containing the parsed filter expression.
    """