Source code for dxr.plugins

"""Registration and enumeration of DXR plugins"""

from functools import partial
from inspect import isclass, isfunction

from ordereddict import OrderedDict
from pkg_resources import iter_entry_points

from dxr.filters import Filter, LINE
from dxr.indexers import TreeToIndex


class AdHocTreeToIndex(TreeToIndex):
    """A default TreeToIndex created because some plugin provided none"""

    def __init__(self, *args, **kwargs):
        self._file_to_index_class = kwargs.pop('file_to_index_class', None)
        super(AdHocTreeToIndex, self).__init__(*args, **kwargs)

    def file_to_index(self, path, contents):
        if self._file_to_index_class:
            return self._file_to_index_class(
                    path, contents, self.plugin_name, self.tree)


[docs]class Plugin(object): """Top-level entrypoint for DXR plugins A Plugin is an indexer, skimmer, filter set, and other miscellany meant to be used together; it is the deployer-visible unit of pluggability. In other words, there is no way to subdivide a plugin via configuration; there would be no sense running a plugin's filters if the indexer that was supposed to extract the requisite data never ran. If the deployer should be able to independently enable parts of your plugin, consider exposing those as separate plugins. Note that Plugins may be instantiated multiple times; don't assume otherwise. """ def __init__(self, filters=None, folder_to_index=None, tree_to_index=None, file_to_skim=None, mappings=None, analyzers=None, direct_searchers=None, refs=None, badge_colors=None, config_schema=None): """ :arg filters: A list of filter classes :arg folder_to_index: A :class:`FolderToIndex` subclass :arg tree_to_index: A :class:`TreeToIndex` subclass :arg file_to_skim: A :class:`FileToSkim` subclass :arg mappings: Additional Elasticsearch mapping definitions for all the plugin's elasticsearch-destined data. A dict with keys for each doctype and values reflecting the structure described at http://www.elastic.co/guide/en/elasticsearch/reference/current/indices-put-mapping.html. Since a FILE-domain query will be promoted to a LINE query if any other query term triggers a line-based query, it's important to keep field names and semantics the same between lines and files. In other words, a LINE mapping should generally be a superset of a FILE mapping. :arg analyzers: Analyzer, tokenizer, and token and char filter definitions for the elasticsearch mappings. A dict with keys "analyzer", "tokenizer", etc., following the structure outlined at http://www.elastic.co/guide/en/elasticsearch/reference/current/analysis.html. :arg direct_searchers: Functions that provide direct search capability. Each must take a single query term of type 'text', return an elasticsearch filter clause to run against LINEs, and have a ``direct_search_priority`` attribute. Filters are tried in order of increasing priority. Return None from a direct searcher to skip it. .. note:: A more general approach may replace direct search in the future. :arg refs: An iterable of :class:`~dxr.lines.Ref` subclasses supported by this plugin. This is used at request time, to turn abreviated ES index data back into HTML. :arg badge_colors: Mapping of Filter.lang -> color for menu badges. :arg config_schema: A validation schema for this plugin's configuration. See https://pypi.python.org/pypi/schema/ for docs. ``mappings`` and ``analyzers`` are recursively merged into other plugins' mappings and analyzers using the algorithm described at :func:`~dxr.utils.deep_update()`. This is mostly intended so you can add additional kinds of indexing to fields defined in the core plugin using multifields. Don't go too crazy monkeypatching the world. """ self.filters = filters or [] self.direct_searchers = direct_searchers or [] self.refs = dict((ref_class.id, ref_class) for ref_class in (refs or [])) # Someday, these might become lists of indexers or skimmers, and then # we can parallelize even better. OTOH, there are probably a LOT of # files in any time-consuming tree, so we already have a perfectly # effective and easier way to parallelize. self.folder_to_index = folder_to_index self.tree_to_index = tree_to_index self.file_to_skim = file_to_skim self.mappings = mappings or {} self.analyzers = analyzers or {} self.badge_colors = badge_colors or {} self.config_schema = config_schema or {} @classmethod
[docs] def from_namespace(cls, namespace): """Construct a Plugin whose attrs are populated by naming conventions. :arg namespace: A namespace from which to pick components **Filters** are taken to be any class whose name ends in "Filter" and doesn't start with "_". **Refs** are taken to be any class whose name ends in "Ref" and doesn't start with "_". The **tree indexer** is assumed to be called "TreeToIndex". If there isn't one, one will be constructed which does nothing but delegate to the class called ``FileToIndex`` (if there is one) when ``file_to_index()`` is called on it. The **file skimmer** is assumed to be called "FileToSkim". **Mappings** are pulled from ``mappings`` attribute and **analyzers** from ``analyzers``. If these rules don't suit you, you can always instantiate a Plugin yourself. """ # Grab a tree indexer by name, or make one up: tree_to_index = namespace.get('TreeToIndex') if not tree_to_index: tree_to_index = partial( AdHocTreeToIndex, file_to_index_class=namespace.get('FileToIndex')) return cls(filters=filters_from_namespace(namespace), folder_to_index=namespace.get('FolderToIndex'), tree_to_index=tree_to_index, file_to_skim=namespace.get('FileToSkim'), mappings=namespace.get('mappings'), analyzers=namespace.get('analyzers'), badge_colors=namespace.get('badge_colors'), direct_searchers=direct_searchers_from_namespace(namespace), refs=refs_from_namespace(namespace))
def __eq__(self, other): """Consider instances of the same plugin equal.""" return self.name == other.name def __ne__(self, other): return self.name != other.name def __hash__(self): """Let us put plugins in sets and test for membership.""" return hash(self.name) def __getstate__(self): """When pickling, omit the direct searchers. We don't use them during the multiprocess indexing phase, so we might as well allow ourselves to create direct searchers using function factories, whose products are unpickleable. """ copy = self.__dict__.copy() copy['direct_searchers'] = [] return copy def __repr__(self): return (('<Plugin %s>' % self.name) if hasattr(self, 'name') else super(Plugin, self).__repr__())
def filters_from_namespace(namespace): """Return the filters which conform to our suggested naming convention: ending with "Filter" and not starting with "_". :arg namespace: The namespace in which to look for filters """ return [v for k, v in namespace.iteritems() if isclass(v) and not k.startswith('_') and k.endswith('Filter') and v is not Filter] def direct_searchers_from_namespace(namespace): """Return a list of the direct search functions defined in a namespace. A direct search function is one that has a ``direct_search_priority`` attribute. """ return [v for v in namespace.itervalues() if hasattr(v, 'direct_search_priority') and isfunction(v)] def refs_from_namespace(namespace): """Return a list of :class:`~dxr.lines.Ref` subclasses (or workalikes) defined in a namespace, identified by conforming to our naming convention. Our convention is to end with "Ref" and not start with "_". """ from dxr.lines import Ref # TODO: Consider switching to an isinstance() test so plugin authors have # more naming flexibility. return [v for k, v in namespace.iteritems() if isclass(v) and not k.startswith('_') and k.endswith('Ref') and v is not Ref] def direct_search(priority, domain=LINE): """Mark a function as being a direct search provider. :arg priority: A priority to attach to the function. Direct searchers are called in order of increasing priority. :arg domain: LINE if this searcher searches for individual lines, FILE if it searches for entire files """ def decorator(searcher): searcher.direct_search_priority = priority searcher.domain = domain return searcher return decorator _plugin_cache = None def all_plugins(): """Return a dict of plugin name -> Plugin for all plugins, including core. Plugins are registered via the ``dxr.plugins`` setuptools entry point, which may point to either a module (in which case a Plugin will be constructed based on the contents of the module namespace) or a Plugin object (which will be returned directly). The entry point name is what the user types into the config file under ``enabled_plugins``. The core plugin, which provides many of DXR's cross-language, built-in features, is always the first plugin when iterating over the returned dict. This lets other plugins override bits of its elasticsearch mappings and analyzers when we're building up the schema. """ global _plugin_cache def name_and_plugin(entry_point): """Return the name of an entry point and the Plugin it points to.""" object = entry_point.load() plugin = (object if isinstance(object, Plugin) else Plugin.from_namespace(object.__dict__)) plugin.name = entry_point.name return entry_point.name, plugin if _plugin_cache is None: # Iterating over entrypoints could be kind of expensive, with the FS # reads and all. _plugin_cache = OrderedDict([('core', core_plugin())]) _plugin_cache.update(name_and_plugin(point) for point in iter_entry_points('dxr.plugins')) return _plugin_cache def all_plugins_but_core(): """Do like :func:`all_plugins()`, but don't return the core plugin.""" ret = all_plugins().copy() del ret['core'] return ret _core_plugin = None def core_plugin(): """Return the core plugin.""" # This is a function in order to dodge a circular import. global _core_plugin import dxr.plugins.core if _core_plugin is None: _core_plugin = Plugin.from_namespace(dxr.plugins.core.__dict__) _core_plugin.name = 'core' return _core_plugin def plugins_named(names): """Return an iterable of the core plugin, along with Plugins having the given names. :arg names: An iterable of plugin names """ plugins = all_plugins() return (plugins[name] for name in names)