lwzm
1/9/2017 - 3:36 AM

TextLineReader

TextLineReader

#!/usr/bin/env python3

import sys
import pickle
import pathlib


class TextLineReader(object):
    def __init__(self, fn):
        self.path = pathlib.Path(fn)
        self.index_path = self.path.with_suffix(".text_line_reader_index")

        if not self.index_path.exists() or \
           self.index_path.stat().st_mtime < self.path.stat().st_mtime:
            self.build_index()

        self._file = self.path.open()
        with self.index_path.open("rb") as f:
            self._index_lines, self._index_names = pickle.load(f)

    def build_index(self):
        print("building...", end=" ", flush=True, file=sys.stderr)
        indexes = index_lines, index_names = [], {}
        with self.path.open() as f:
            while True:
                p = f.tell()
                line = f.readline()
                if not line:
                    break
                key = line.split(None, 1)[0]
                index_names[key] = p
                index_lines.append(p)

        with self.index_path.open("wb") as f:
            pickle.dump(indexes, f)
        print("done.", file=sys.stderr)

    def __getitem__(self, key):
        if isinstance(key, int):
            idx = self._index_lines
        else:
            idx = self._index_names
        try:
            self._file.seek(idx[key])
        except (IndexError, KeyError):
            return
        return self._file.readline()


def main():
    reader = TextLineReader(__file__ + ".test")
    print(reader[1])
    print(reader[42])
    print(reader["com"])
    #reader = TextLineReader("source")