Source code for dxr.plugins

"""Registration and enumeration of DXR plugins"""

from functools import partial
from inspect import isclass, isfunction

from ordereddict import OrderedDict
from pkg_resources import iter_entry_points

from dxr.filters import Filter, LINE
from dxr.indexers import TreeToIndex


class AdHocTreeToIndex(TreeToIndex):
    """A default TreeToIndex created because some plugin provided none"""

    def __init__(self, *args, **kwargs):
        self._file_to_index_class = kwargs.pop('file_to_index_class', None)
        super(AdHocTreeToIndex, self).__init__(*args, **kwargs)

    def file_to_index(self, path, contents):
        if self._file_to_index_class:
            return self._file_to_index_class(
                    path, contents, self.plugin_name, self.tree)


[docs]class Plugin(object):
    """Top-level entrypoint for DXR plugins

    A Plugin is an indexer, skimmer, filter set, and other miscellany meant to
    be used together; it is the deployer-visible unit of pluggability. In other
    words, there is no way to subdivide a plugin via configuration; there would
    be no sense running a plugin's filters if the indexer that was supposed to
    extract the requisite data never ran.

    If the deployer should be able to independently enable parts of your
    plugin, consider exposing those as separate plugins.

    Note that Plugins may be instantiated multiple times; don't assume
    otherwise.

    """
    def __init__(self,
                 filters=None,
                 folder_to_index=None,
                 tree_to_index=None,
                 file_to_skim=None,
                 mappings=None,
                 analyzers=None,
                 direct_searchers=None,
                 refs=None,
                 badge_colors=None,
                 config_schema=None):
        """
        :arg filters: A list of filter classes
        :arg folder_to_index: A :class:`FolderToIndex` subclass
        :arg tree_to_index: A :class:`TreeToIndex` subclass
        :arg file_to_skim: A :class:`FileToSkim` subclass
        :arg mappings: Additional Elasticsearch mapping definitions for all the
            plugin's elasticsearch-destined data. A dict with keys for each
            doctype and values reflecting the structure described at
            http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html.
            Since a FILE-domain query will
            be promoted to a LINE query if any other query term triggers a
            line-based query, it's important to keep field names and semantics
            the same between lines and files. In other words, a LINE mapping
            should generally be a superset of a FILE mapping.
        :arg analyzers: Analyzer, tokenizer, and token and char filter
            definitions for the elasticsearch mappings. A dict with keys
            "analyzer", "tokenizer", etc., following the structure outlined at
            http://www.elastic.co/guide/en/elasticsearch/reference/current/analysis.html.
        :arg direct_searchers: Functions that provide direct search
            capability. Each must take a single query term of type 'text',
            return an elasticsearch filter clause to run against LINEs, and
            have a ``direct_search_priority`` attribute. Filters are tried in
            order of increasing priority. Return None from a direct searcher
            to skip it.

            .. note::

                A more general approach may replace direct search in the
                future.

        :arg refs: An iterable of :class:`~dxr.lines.Ref` subclasses
            supported by this plugin. This is used at request time, to turn
            abreviated ES index data back into HTML.
        :arg badge_colors: Mapping of Filter.lang -> color for menu badges.
        :arg config_schema: A validation schema for this plugin's
            configuration. See https://pypi.python.org/pypi/schema/ for docs.

        ``mappings`` and ``analyzers`` are recursively merged into other
        plugins' mappings and analyzers using the algorithm described at
        :func:`~dxr.utils.deep_update()`. This is mostly intended so you can
        add additional kinds of indexing to fields defined in the core plugin
        using multifields. Don't go too crazy monkeypatching the world.

        """
        self.filters = filters or []
        self.direct_searchers = direct_searchers or []
        self.refs = dict((ref_class.id, ref_class)
                          for ref_class in (refs or []))
        # Someday, these might become lists of indexers or skimmers, and then
        # we can parallelize even better. OTOH, there are probably a LOT of
        # files in any time-consuming tree, so we already have a perfectly
        # effective and easier way to parallelize.
        self.folder_to_index = folder_to_index
        self.tree_to_index = tree_to_index
        self.file_to_skim = file_to_skim
        self.mappings = mappings or {}
        self.analyzers = analyzers or {}
        self.badge_colors = badge_colors or {}
        self.config_schema = config_schema or {}

[docs]    @classmethod
    def from_namespace(cls, namespace):
        """Construct a Plugin whose attrs are populated by naming conventions.

        :arg namespace: A namespace from which to pick components

        **Filters** are taken to be any class whose name ends in "Filter" and
        doesn't start with "_".

        **Refs** are taken to be any class whose name ends in "Ref" and
        doesn't start with "_".

        The **tree indexer** is assumed to be called "TreeToIndex". If there isn't
        one, one will be constructed which does nothing but delegate to the
        class called ``FileToIndex`` (if there is one) when ``file_to_index()``
        is called on it.

        The **file skimmer** is assumed to be called "FileToSkim".

        **Mappings** are pulled from ``mappings`` attribute and **analyzers**
        from ``analyzers``.

        If these rules don't suit you, you can always instantiate a Plugin
        yourself.

        """
        # Grab a tree indexer by name, or make one up:
        tree_to_index = namespace.get('TreeToIndex')
        if not tree_to_index:
            tree_to_index = partial(
                    AdHocTreeToIndex,
                    file_to_index_class=namespace.get('FileToIndex'))

        return cls(filters=filters_from_namespace(namespace),
                   folder_to_index=namespace.get('FolderToIndex'),
                   tree_to_index=tree_to_index,
                   file_to_skim=namespace.get('FileToSkim'),
                   mappings=namespace.get('mappings'),
                   analyzers=namespace.get('analyzers'),
                   badge_colors=namespace.get('badge_colors'),
                   direct_searchers=direct_searchers_from_namespace(namespace),
                   refs=refs_from_namespace(namespace))

    def __eq__(self, other):
        """Consider instances of the same plugin equal."""
        return self.name == other.name

    def __ne__(self, other):
        return self.name != other.name

    def __hash__(self):
        """Let us put plugins in sets and test for membership."""
        return hash(self.name)

    def __getstate__(self):
        """When pickling, omit the direct searchers.

        We don't use them during the multiprocess indexing phase, so we might
        as well allow ourselves to create direct searchers using function
        factories, whose products are unpickleable.

        """
        copy = self.__dict__.copy()
        copy['direct_searchers'] = []
        return copy

    def __repr__(self):
        return (('<Plugin %s>' % self.name) if hasattr(self, 'name')
                else super(Plugin, self).__repr__())


def filters_from_namespace(namespace):
    """Return the filters which conform to our suggested naming convention:
    ending with "Filter" and not starting with "_".

    :arg namespace: The namespace in which to look for filters

    """
    return [v for k, v in namespace.iteritems() if
            isclass(v) and
            not k.startswith('_') and
            k.endswith('Filter') and
            v is not Filter]


def direct_searchers_from_namespace(namespace):
    """Return a list of the direct search functions defined in a namespace.

    A direct search function is one that has a ``direct_search_priority``
    attribute.

    """
    return [v for v in namespace.itervalues()
            if hasattr(v, 'direct_search_priority') and isfunction(v)]


def refs_from_namespace(namespace):
    """Return a list of :class:`~dxr.lines.Ref` subclasses (or workalikes)
    defined in a namespace, identified by conforming to our naming convention.

    Our convention is to end with "Ref" and not start with "_".

    """
    from dxr.lines import Ref

    # TODO: Consider switching to an isinstance() test so plugin authors have
    # more naming flexibility.
    return [v for k, v in namespace.iteritems() if
            isclass(v) and
            not k.startswith('_') and
            k.endswith('Ref') and
            v is not Ref]


def direct_search(priority, domain=LINE):
    """Mark a function as being a direct search provider.

    :arg priority: A priority to attach to the function. Direct searchers are
        called in order of increasing priority.
    :arg domain: LINE if this searcher searches for individual lines, FILE if
        it searches for entire files

    """
    def decorator(searcher):
        searcher.direct_search_priority = priority
        searcher.domain = domain
        return searcher
    return decorator


_plugin_cache = None
def all_plugins():
    """Return a dict of plugin name -> Plugin for all plugins, including core.

    Plugins are registered via the ``dxr.plugins`` setuptools entry point,
    which may point to either a module (in which case a Plugin will be
    constructed based on the contents of the module namespace) or a Plugin
    object (which will be returned directly). The entry point name is what the
    user types into the config file under ``enabled_plugins``.

    The core plugin, which provides many of DXR's cross-language, built-in
    features, is always the first plugin when iterating over the returned
    dict. This lets other plugins override bits of its elasticsearch mappings
    and analyzers when we're building up the schema.

    """
    global _plugin_cache

    def name_and_plugin(entry_point):
        """Return the name of an entry point and the Plugin it points to."""
        object = entry_point.load()
        plugin = (object if isinstance(object, Plugin) else
                  Plugin.from_namespace(object.__dict__))
        plugin.name = entry_point.name
        return entry_point.name, plugin

    if _plugin_cache is None:
        # Iterating over entrypoints could be kind of expensive, with the FS
        # reads and all.
        _plugin_cache = OrderedDict([('core', core_plugin())])
        _plugin_cache.update(name_and_plugin(point) for point in
                             iter_entry_points('dxr.plugins'))

    return _plugin_cache


def all_plugins_but_core():
    """Do like :func:`all_plugins()`, but don't return the core plugin."""
    ret = all_plugins().copy()
    del ret['core']
    return ret


_core_plugin = None
def core_plugin():
    """Return the core plugin."""
    # This is a function in order to dodge a circular import.
    global _core_plugin
    import dxr.plugins.core

    if _core_plugin is None:
        _core_plugin = Plugin.from_namespace(dxr.plugins.core.__dict__)
        _core_plugin.name = 'core'

    return _core_plugin


def plugins_named(names):
    """Return an iterable of the core plugin, along with Plugins having the
    given names.

    :arg names: An iterable of plugin names

    """
    plugins = all_plugins()
    return (plugins[name] for name in names)