cstrap
11/9/2015 - 11:37 AM

search features with attachment supports

search features with attachment supports

# -*- coding: utf-8 -*-
"""
Search features for an elasticsearch / haystack / elasticstack

:creationdate: 05/11/15 15:05
:moduleauthor: François GUÉRIN <fguerin@ville-tourcoing.fr>
:modulename: intrautils.search

"""
import base64
import json
import logging
import mimetypes

from django import forms
from django.conf import settings
from django.db.models.fields.files import FieldFile as dj_File
from django.templatetags.static import static
from django.utils.translation import ugettext_lazy as _
from elasticstack.backends import ConfigurableElasticBackend, ConfigurableElasticSearchEngine
from elasticstack.fields import FacetField
from elasticstack.forms import SearchForm
from filer.models import File as fi_File
from form_utils.forms import BetterForm
from haystack import DEFAULT_ALIAS
from haystack.constants import DJANGO_CT, DJANGO_ID
from haystack.fields import SearchField
from haystack.forms import model_choices

from utils import widgets as u_widgets
from utils.forms import CollapsibleFieldsetFormMixin

__author__ = 'fguerin'
logger = logging.getLogger('intrautils.search')

DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'}
FIELD_MAPPINGS = {
    'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'},
    'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'},
    'date': {'type': 'date'},
    'datetime': {'type': 'date'},
    'location': {'type': 'geo_point'},
    'boolean': {'type': 'boolean'},
    'float': {'type': 'float'},
    'long': {'type': 'long'},
    'integer': {'type': 'long'},
    'attachment': {'type': 'attachment'},
}


class ExtendedElasticsearchBackend(ConfigurableElasticBackend):
    """
    Adds `attachment` support for elasticsearch backend settings
    """

    def build_schema(self, fields):
        """
        Merge from haystack and elasticstack elasticsearch backend `build_shema` methods.
        It provides an additional feuture : custom field mappings, from settings or default FIELD_MAPPINGS dict.
        :param fields:
        :return:
        """
        content_field_name = ''
        mapping = {
            DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
            DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
        }
        field_mappings = getattr(settings, 'ELASTICSEARCH_FIELD_MAPPINGS', FIELD_MAPPINGS)
        default_field_mappings = getattr(settings, 'ELASTICSEARCH_DEFAULT_FIELD_MAPPINGS', DEFAULT_FIELD_MAPPING)

        for field_name, field_class in fields.items():
            field_mapping = field_mappings.get(field_class.field_type, default_field_mappings).copy()
            if field_class.boost != 1.0:
                field_mapping['boost'] = field_class.boost

            if field_class.document is True:
                content_field_name = field_class.index_fieldname

            # Do this last to override `text` fields.
            if field_mapping['type'] == 'string' and field_class.indexed:
                if not hasattr(field_class, 'facet_for') and not field_class.field_type in ('ngram', 'edge_ngram'):
                    field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

            mapping[field_class.index_fieldname] = field_mapping

        return content_field_name, mapping


class ExtendedElasticSearchEngine(ConfigurableElasticSearchEngine):
    backend = ExtendedElasticsearchBackend


class AttachmentField(SearchField):
    field_type = 'attachment'
    author_field = 'author'

    def __init__(self, **kwargs):
        if 'content_type_field' in kwargs:
            self.content_type_field = kwargs.pop('content_type_field')
        if 'author_field' in kwargs:
            self.author_field = kwargs.pop('author_field')

        super(AttachmentField, self).__init__(**kwargs)

    def convert(self, value):
        output = value
        return output

    @staticmethod
    def _get_file_data(field):
        if isinstance(field, fi_File):
            field_file = field.file
            name = field.label
            try:
                content_length = len(field_file)
            except TypeError:
                content_length = len(field_file.file)
            content_type = mimetypes.guess_type(name)
            try:
                content = base64.b64encode(field_file.read())
            except AttributeError:
                content = base64.b64encode(field_file)
        else:  # isinstance(field, dj_File):
            field_file = field
            try:
                content_length = len(field_file)
            except TypeError:
                content_length = len(field_file.file)
            content_type = None
            name = None
            try:
                content = base64.b64encode(field_file.read())
            except AttributeError:
                content = base64.b64encode(field_file)
        output = {'_language': 'fr',
                  '_content': content,
                  '_content_type': content_type,
                  '_name': name,
                  '_title': name,
                  '_content_length': content_length
                  }
        return output

    def prepare(self, obj):
        if self.model_attr:
            field = getattr(obj, self.model_attr)
        else:
            field = obj

        if not isinstance(field, (dj_File, fi_File)):
            raise NotImplementedError('AttachmentField does not implement file reading for %s file'
                                      % field.__class__.__name__)
        output = self._get_file_data(field)

        if settings.DEBUG:
            import copy
            _output = copy.deepcopy(output)
            _output.update({'_content': _output['_content'][:50] + '...'})
            logger.debug(u'AttachmentField::prepare() output = %s', json.dumps(_output, indent=2))

        return output


class FacetedAttachmentField(FacetField, AttachmentField):
    pass


def application_model_choices(app_name, using=DEFAULT_ALIAS):
    choices = model_choices(using)
    output = []
    for choice in choices:
        if app_name in choice[0]:
            output.append(choice)
    return output


class HaystackSearchForm(CollapsibleFieldsetFormMixin, BetterForm, SearchForm):
    """
    haystack search form for main `searching` feature
    """

    class Media:
        js = (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.js'),)
        css = {'all': (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.css'),)}

    class Meta:
        collapsed = True
        fieldsets = (('main', {'legend': _('search'), 'fields': ('q', 'models')}),)

    search_app = None

    models = forms.MultipleChoiceField(choices=application_model_choices('intrapubs'),
                                       required=False,
                                       label=_('Search in'),
                                       widget=u_widgets.ColumnCheckboxSelectMultiple(columns=3))

    def get_search_app(self):
        if self.search_app:
            return self.search_app
        raise NotImplementedError('%s must provide a search_app attribute or override get_search_app() method.')

    def get_models(self):
        """
        Return an alphabetical list of model classes in the index.
        """
        search_models = []

        if self.is_valid():
            for model in self.cleaned_data['models']:
                # noinspection PyUnresolvedReferences
                search_models.append(dj_models.get_model(*model.split('.')))

        return search_models

    def search(self):
        search_app = self.get_search_app()
        search_query_set = super(HaystackSearchForm, self).search()
        settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d '
                                        u'(before models filtering)', len(search_query_set))
        if not search_query_set:
            return []
        search_query_set = search_query_set.models(*self.get_models())
        if isinstance(search_app, basestring):
            search_query_set = search_query_set.filter(django_ct__contains=search_app)
        elif isinstance(search_app, (tuple, list)):
            for app in search_app:
                search_query_set = search_query_set.filter_or(django_ct__contains=app)
        settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d '
                                        u'(after models filtering)', len(search_query_set))
        return search_query_set

    def no_query_found(self):
        return []