Source code for dxr.filters

"""Base classes and convenience functions for writing filters"""

from functools import wraps
from funcy import identity

from dxr.utils import is_in


# Domain constants:
FILE = 'file'
LINE = 'line'


[docs]class Filter(object): """A provider of search strategy and highlighting Filter classes, which roughly correspond to the items in the Filters dropdown menu, tell DXR how to query the data stored in elasticsearch by :meth:`~dxr.plugins.FileToIndex.needles` and :meth:`~dxr.plugins.FileToIndex.needles_by_line`. An instance is created for each query term whose :attr:`name` matches and persists through the querying and highlighting phases. This is an optional base class that saves code on many filters. It also serves to document the filter API. :ivar name: The string prefix used in a query term to activate this filter. For example, if this were "path", this filter would be activated for the query term "path:foo". Multiple filters can be registered against a single name; they are ORed together. For example, it is good practice for a language plugin to query against a language specific needle (like "js-function") but register against the more generic "function" here. (This allows us to do language-specific queries.) :ivar domain: Either LINE or FILE. LINE means this filter returns results that point to specific lines of files; FILE means they point to files as a whole. Default: LINE. :ivar description: A description of this filter for the Filters menu: unicode or Markup (in case you want to wrap examples in ``<code>`` tags). Of filters having the same name, the description of the first one encountered will be used. An empty description will hide a filter from the menu. This should probably be used only internally, by the TextFilter. :ivar union_only: Whether this filter will always be ORed with others of the same name, useful for filters where the intersection would always be empty, such as extensions :ivar is_reference: Whether to include this filter in the "ref:" aggregate filter :ivar is_identifier: Whether to include this filter in the "id:" aggregate filter """ domain = LINE description = u'' is_reference = False is_identifier = False union_only = False def __init__(self, term, enabled_plugins): """This is a good place to parse the term's arg (if it requires further parsing) and stash it away on the instance. :arg term: a query term as constructed by a :class:`~dxr.query.QueryVisitor` :arg enabled_plugins: an iterable of the enabled :class:`~dxr.plugins.Plugin` instances, for use by filters that build upon the filters provided by plugins Raise :class:`~dxr.exceptions.BadTerm` to complain to the user: for instance, about an unparseable term. """ self._term = term self._enabled_plugins = enabled_plugins
[docs] def filter(self): """Return the ES filter clause that applies my restrictions to the found set of lines (or files and folders, if :attr:`domain` is FILES). To quietly do no filtration, return None. This would be suitable for ``path:*``, for example. To do no filtration and complain to the user about it, raise :class:`~dxr.exceptions.BadTerm`. We might even make this return a list of filter clauses, for things like the RegexFilter which want a bunch of match_phrases and a script. """ raise NotImplementedError
[docs] def highlight_path(self, result): """Return an unsorted iterable of extents that should be highlighted in the ``path`` field of a search result. :arg result: A mapping representing properties from a search result, whether a file or a line. With access to all the data, you can, for example, use the extents from a 'c-function' needle to inform the highlighting of the 'content' field. """ return []
[docs] def highlight_content(self, result): """Return an unsorted iterable of extents that should be highlighted in the ``content`` field of a search result. :arg result: A mapping representing properties from a search result, whether a file or a line. With access to all the data, you can, for example, use the extents from a 'c-function' needle to inform the highlighting of the 'content' field. """ return []
# A filter can eventually grow a "kind" attr that says "structural" or # "text" or whatever, and we can vary the highlight color or whatever based # on that to make identifiers easy to pick out visually. def negatable(filter_method): """Decorator to wrap an ES "not" around a ``Filter.filter()`` method iff the term is negated. """ @wraps(filter_method) def maybe_negate(self): positive = filter_method(self) return {'not': positive} if positive and self._term['not'] else positive return maybe_negate class NameFilterBase(Filter): """An exact-match filter for things exposing a single value to compare against This filter assumes an object-shaped needle value with a 'name' subproperty (containing the symbol name) and a 'name.lower' folded to lowercase. Highlights are based on 'start' and 'end' subproperties, which contain column bounds. Derives the needle name from the ``name`` cls attribute. :ivar lang: A language identifier to separate structural needles from those of other languages and allow for an eventual "lang:" metafilter. This also shows up language badges on the Filters menu. Consider using a common file extension from your language, since those are short and familiar. """ def __init__(self, term, enabled_plugins): super(NameFilterBase, self).__init__(term, enabled_plugins) self._needle = '{0}_{1}'.format(self.lang, self.name.replace('-', '_')) def _term_filter(self, field): """Return a term filter clause that does a case-sensitive match against the given field. """ return { 'term': {'{needle}.{field}'.format( needle=self._needle, field=field): self._term['arg']} } def _positive_filter(self): """Non-negated filter recipe, broken out for subclassing""" if self._term['case_sensitive']: return self._term_filter('name') else: # term filters have no query analysis phase. We must use a # match query, which is an analyzer pass + a term filter: return { 'query': { 'match': { '{needle}.name.lower'.format(needle=self._needle): self._term['arg'] } } } @negatable def filter(self): """Find things by their "name" properties, case-sensitive or not. Ignore the term's "qualified" property. """ return self._positive_filter() def _should_be_highlit(self, entity): """Return whether some entity should be highlit in the search results, based on its "name" property. :arg entity: A map, the value of a needle from a found line """ maybe_lower = (identity if self._term['case_sensitive'] else unicode.lower) return maybe_lower(entity['name']) == maybe_lower(self._term['arg']) def highlight_content(self, result): """Highlight any structural entity whose name matches the term.""" if self._term['not']: return [] return ((entity['start'], entity['end']) for entity in result.get(self._needle, ()) if self._should_be_highlit(entity)) class QualifiedNameFilterBase(NameFilterBase): """An exact-match filter for symbols having names and qualnames This filter assumes an object-shaped needle value with a 'name' subproperty (containing the symbol name), a 'name.lower' folded to lowercase, and 'qualname' and 'qualname.lower' (doing the same for fully-qualified name). Highlights are based on 'start' and 'end' subproperties, which contain column bounds. """ @negatable def filter(self): """Find functions by their name or qualname. "+" searches look at just qualnames, but non-"+" searches look at both names and qualnames. All comparisons against qualnames are case-sensitive, because, if you're being that specific, that's probably what you want. """ if self._term['qualified']: return self._term_filter('qualname') else: return {'or': [super(QualifiedNameFilterBase, self)._positive_filter(), self._term_filter('qualname')]} def _should_be_highlit(self, entity): """Return whether a structural entity should be highlit, according to names and qualnames. Compare short names and qualnames if this is a regular search. Compare just qualnames if it's a qualified search. """ return ((not self._term['qualified'] and super(QualifiedNameFilterBase, self)._should_be_highlit(entity)) or is_in(self._term['arg'], entity['qualname']))