TextLineReader
#!/usr/bin/env python3
import sys
import pickle
import pathlib
class TextLineReader(object):
def __init__(self, fn):
self.path = pathlib.Path(fn)
self.index_path = self.path.with_suffix(".text_line_reader_index")
if not self.index_path.exists() or \
self.index_path.stat().st_mtime < self.path.stat().st_mtime:
self.build_index()
self._file = self.path.open()
with self.index_path.open("rb") as f:
self._index_lines, self._index_names = pickle.load(f)
def build_index(self):
print("building...", end=" ", flush=True, file=sys.stderr)
indexes = index_lines, index_names = [], {}
with self.path.open() as f:
while True:
p = f.tell()
line = f.readline()
if not line:
break
key = line.split(None, 1)[0]
index_names[key] = p
index_lines.append(p)
with self.index_path.open("wb") as f:
pickle.dump(indexes, f)
print("done.", file=sys.stderr)
def __getitem__(self, key):
if isinstance(key, int):
idx = self._index_lines
else:
idx = self._index_names
try:
self._file.seek(idx[key])
except (IndexError, KeyError):
return
return self._file.readline()
def main():
reader = TextLineReader(__file__ + ".test")
print(reader[1])
print(reader[42])
print(reader["com"])
#reader = TextLineReader("source")