wonderbeyond
3/17/2017 - 2:36 AM

Build fuzzy material for quick in-memory search

Build fuzzy material for quick in-memory search

import re
import six

SPLITER = re.compile(r'[\s\,\;\/\:\(\)\[\]]')
IGNORES = re.compile(r'[\-\.]')


def build_fuzzy_material(inputs):
    """Build fuzzy material for quick in-memory search
    >>> build_fuzzy_material('Hello world, worl.d, words, Hello')
    'Hello words world'
    >>> build_fuzzy_material(['Ab','b.C','cd', 'BC'])
    'ab bc cd'
    >>> build_fuzzy_material('')
    ''
    >>> build_fuzzy_material(None)
    ''
    """
    if not inputs:
        return ''

    raw_text = inputs if isinstance(inputs, six.string_types) else ' '.join(inputs).lower()
    raw_text = IGNORES.sub('', raw_text)
    parts = [p.strip() for p in SPLITER.split(raw_text) if len(p.strip()) > 1]
    parts = list(set(parts))
    return ' '.join(sorted(parts))


if __name__ == '__main__':
    import doctest
    doctest.testmod()