bpeterso2000
2/14/2015 - 5:37 PM

Simple UNIX-style cut parser

Simple UNIX-style cut parser

import itertools
concat = itertools.chain.from_iterable


def parse(string):
    for tok in string.split(','):
        try:
            endpts = [int(i) if i else None for i in tok.split('-')]
            if endpts[0] is not None:
                endpts[0] -= 1
            if len(endpts) == 1:
                endpts = [endpts[0], endpts[0] + 1]
            yield slice(*endpts)
        except ValueError:
            pass


def sliced(string, seq):
    """
    Unix-style cut for sequence (row)
    >>> seq = list(range(1,20))
    >>> tuple(cut('-5,10,11-15,18-', seq))
    (1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, 18, 19)
    """
    return concat(seq[i] for i in parse(string))


def cut(string, seq, rowtype=tuple):
    """
    UNIX-style cut for 2d sequence (table)
    """
    slices = parse(string)
    rows = (concat(s[slice_] for slice_ in slices) for s in seq)
    return map(rowtype, rows)