CatTail
9/17/2015 - 3:30 AM

Parse url and group them by path

Parse url and group them by path

import sys
import urlparse
import operator

"""
Parse url and group them by path
"""
filename = sys.argv[1]
if not filename:
    print "Usage group.py <file path>"
    sys.exit(1)

group = {}

for url in open(filename, "r"):
    url = urlparse.urlparse(url)
    path = url.path.strip()
    if path:
        group[path] = (group[path] if path in group else 0) + 1

group = sorted(group.items(), key=operator.itemgetter(1))

for item in group:
    print "%10s %s" %(item[1], item[0])