bpeterso2000
4/16/2014 - 11:14 PM

Efficiently extracts column indices from a list.

Efficiently extracts column indices from a list.

"""
Extracts specified columns from a list by taking a list of column indices and converting them in to mininum number of Python
slice objects ahead of time before iterating through the list.  Then uses slices to extract ranges of columns.
"""

from itertools import chain


def prepare_slices(indices):
    """
    Converts a list of Python indices into an optimized list of slice objects.

    >>> x = [2, 4, 13, 16, 19, 23, 24, 25]
    >>> prepare_slices(x)
    [slice(2, 5, 2), slice(13, 20, 3), slice(23, 26, 1)]

    """
    start = None
    try:
        start = indices.pop(0)
        stop = indices.pop(0)
    except IndexError:
        return [slice(start, None)]
    step = stop - start
    slices = []
    for index in indices:
        if step:
            stride = index - stop
            if stride == step:
                stop = index
            else:
                slices.append(slice(start, stop + 1, step))
                start, stop, step = index, None, None
        else:
            stop, step = index, index - start
    slices.append(slice(start, stop + 1 if stop else None, step))
    return slices


def slice_columns(seq, slices):
    """
    Extracts items from a sequence using a list of slice objects

    >>> letters_a_to_z = [chr(97 + i) for i in range(26)]
    >>> slices_ = [slice(2, 5, 2), slice(13, 20, 3), slice(23, 26, 1)]
    >>> slice_columns(letters_a_to_z, slices_)
    ['c', 'e', 'n', 'q', 't', 'x', 'y', 'z']

    """
    return list(chain.from_iterable((seq[i] for i in slices)))


def slice_rows(rows, indices):
    """
    Extracts columns from a list of rows
    
    """
    slices = prepare_slices(indices)
    for row in rows:
        yield slice_columns(row, slices)


if __name__ == '__main__':
    import doctest
    doctest.testmod()