kvasilov48
10/24/2015 - 1:45 PM

nginx log parser

nginx log parser

#!/usr/bin/env python

"""
nginx log format
log_format  main  '$server_name | $host | $remote_addr | $bytes_sent | $time_local | $request_method | $request_uri | $request_time | $status | $http_referer | $http_x_forwarded_for | $http_user_agent | $gzip_ratio | $connection';
"""

import os
import sys
import time
import math
import GeoIP

start_time = time.time()

def main(log_file):
    ips = {}
    trafic_by_country = {}

    """
    The with statement handles opening and closing the file, including if an
    exception is raised in the inner block. The for line in f treats the file
    object f as an iterable, which automatically uses buffered IO and memory
    management so you don't have to worry about large files.
    """
    with open(log_file) as f:
        for line in f:
            log = line.split(' | ')
            try:
                if 6 < len(log[2]) <= 15:
                    try:
                        ips[log[2]] = ips.get(log[2], 0) + int(log[3])
                    except:
                        pass
            except:
                pass

    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)

    for k in ips:
        country = gi.country_name_by_addr(k)
        if not country in trafic_by_country:
            trafic_by_country[country] = ips[k]
        else:
            trafic_by_country[country] += ips[k]

    for k in sorted(trafic_by_country, key=trafic_by_country.get, reverse=True):
        print '%s -- bytes sent: %s' % (k, convertSize(trafic_by_country[k]))

    print time.time() - start_time, 'sec'

def convertSize(size):
    size_name = ("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size, 1024)))
    p = math.pow(1024, i)
    s = round(size/p, 2)
    if (s > 0):
        return '%s %s' % (s, size_name[i])
    else:
        return '0B'


if __name__ == "__main__":
    if len(sys.argv) < 2:
        sys.exit('Usage: %s access.log' % sys.argv[0])

    if not os.path.exists(sys.argv[1]):
        sys.exit('ERROR: log file %s was not found!' % sys.argv[1])
    else:
        main(sys.argv[1])