Shoora
6/30/2019 - 12:16 AM

PHP script to report and/or extract sections from apache and icecast log files by date ranges

PHP script to report and/or extract sections from apache and icecast log files by date ranges

#!/usr/bin/env php
<?php
/**
 * Extract a section of an apache or icecast log file between two dates
 * Assumes that log lines are in chronological order
 * Start and end dates can be in any format that strtotime can handle
 * Reads from stdin, outputs to stdout, stats to stderr
 * @author Marcus Bointon <marcus@synchromedia.co.uk>
 * @link https://gist.github.com/3749394
 * Example usage. Report the range of dates in a log file
 * <code>
 * /usr/local/bin/logdates -f icecast < /var/log/icecast2/access.log > /dev/null
 * Found 9,172 valid lines
 * First date: 2012-09-18 19:01:50
 * Last date: 2012-09-19 11:41:49
 * </code>
 * or extract a date range from a log file into another file:
 * <code>
 * /usr/local/bin/logdates -f icecast -s '2012-09-18 20:00:00' -e '2012-09-18 22:00:00' < /var/log/icecast2/access.log > t.log 
 * Found 1,875 valid lines
 * First date within limits: 2012-09-18 20:00:01
 * Last date within limits: 2012-09-18 21:59:55
 * </code>
 */

//getopt is a bit limited in PHP 5.2
$opts = getopt('f:s:e:');
if (empty($opts)) {
	fwrite(STDERR, "Usage: {$argv[0]} -f (common|combined|icecast) [-s 'start date'] [-e 'end date'] < infile > outfile\nAll dates are handled as UTC, date strings can be anything accepted by strtotime.\n");
	exit(1);
}

//Assume log dates are in UTC, which they usually are
date_default_timezone_set('UTC');

$formats = array(
	'common' => array(
		'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+)',
		'fields' => array(
			'ip',
			'ident',
			'user',
			'timestamp',
			'request',
			'status',
			'bytes'
		)
	),
	'combined' => array(
		'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+) (".*") (".*")',
		'fields' => array(
			'ip',
			'ident',
			'user',
			'timestamp',
			'request',
			'status',
			'bytes',
			'referer',
			'user-agent'
		)
	),
	'icecast' => array(
		'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+) (".*") (".*") (\d+)',
		'fields' => array(
			'ip',
			'ident',
			'user',
			'timestamp',
			'request',
			'status',
			'bytes',
			'referer',
			'user-agent',
			'secondsconnected'
		)
	)
);

$format = 'combined';
if (array_key_exists('f', $opts) and array_key_exists($opts['f'], $formats)) {
	$format = $opts['f'];
}
if (array_key_exists('s', $opts)) {
	if (strtotime($opts['s']) !== false) {
		$startdate = strtotime($opts['s']);
	} else {
		echo "Invalid start date given\n";
		exit(1);
	}
} else {
	$startdate = strtotime('1970-00-00 00:00:00');
}
if (array_key_exists('e', $opts)) {
	if (strtotime($opts['e']) !== false) {
		$enddate = strtotime($opts['e']);
	} else {
		echo "Invalid end date given\n";
		exit(1);
	}
} else {
	$enddate = strtotime('now');
}

$firstdate = 0;
$lastdate = 0;
$linecount = 0;
while(!feof(STDIN)) {
	$line = fgets(STDIN, 2048);
	$matches = array();
	if (preg_match('/'.$formats[$format]['pattern'].'/', $line, $matches)) {
		array_shift($matches);
		$fields = array_combine($formats[$format]['fields'], $matches);
		$ts = strtotime($fields['timestamp']);
		if ($ts < $startdate) {
			//Not in range yet, skip to next
			continue;
		}
		if ($ts > $startdate and 0 == $firstdate) {
			$firstdate = $ts;
		}
		if ($ts < $enddate and $ts > $lastdate) {
			$lastdate = $ts;
		}
		if ($ts > $enddate) {
			//Passed our cut-off date, so stop
			break;
		}
		$linecount++;
		fwrite(STDOUT, $line);
	}
}

fwrite(STDERR, "Found ".number_format($linecount)." valid lines\n");
if ($linecount > 0) {
	if (!isset($opts['s'])) {
		fwrite(STDERR, "First date: ".gmdate('Y-m-d H:i:s', $firstdate)."\n");
	} else {
		fwrite(STDERR, "First date within limits: ".gmdate('Y-m-d H:i:s', $firstdate)."\n");
	}
	if (!isset($opts['e'])) {
		fwrite(STDERR, "Last date: ".gmdate('Y-m-d H:i:s', $lastdate)."\n");
	} else {
		fwrite(STDERR, "Last date within limits: ".gmdate('Y-m-d H:i:s', $lastdate)."\n");
	}
}
exit(0);