Usage of defaultdict of collections in python
# coding:utf-8
import sys
import re
WORE_RE = re.compile(r'\w+')
index = {}
with open(sys.argv[1], encoding="utf-8") as fp:
for line_no, line in enumerate(fp, 1):
for match in WORE_RE.finditer(line):
word = match.group()
column_no = match.start() + 1
location = (line_no, column_no)
index.setdefault(word, []).append(location)
for word in sorted(index, key=str.upper):
print(word, index[word])