PHP script to report and/or extract sections from apache and icecast log files by date ranges
#!/usr/bin/env php
<?php
/**
* Extract a section of an apache or icecast log file between two dates
* Assumes that log lines are in chronological order
* Start and end dates can be in any format that strtotime can handle
* Reads from stdin, outputs to stdout, stats to stderr
* @author Marcus Bointon <marcus@synchromedia.co.uk>
* @link https://gist.github.com/3749394
* Example usage. Report the range of dates in a log file
* <code>
* /usr/local/bin/logdates -f icecast < /var/log/icecast2/access.log > /dev/null
* Found 9,172 valid lines
* First date: 2012-09-18 19:01:50
* Last date: 2012-09-19 11:41:49
* </code>
* or extract a date range from a log file into another file:
* <code>
* /usr/local/bin/logdates -f icecast -s '2012-09-18 20:00:00' -e '2012-09-18 22:00:00' < /var/log/icecast2/access.log > t.log
* Found 1,875 valid lines
* First date within limits: 2012-09-18 20:00:01
* Last date within limits: 2012-09-18 21:59:55
* </code>
*/
//getopt is a bit limited in PHP 5.2
$opts = getopt('f:s:e:');
if (empty($opts)) {
fwrite(STDERR, "Usage: {$argv[0]} -f (common|combined|icecast) [-s 'start date'] [-e 'end date'] < infile > outfile\nAll dates are handled as UTC, date strings can be anything accepted by strtotime.\n");
exit(1);
}
//Assume log dates are in UTC, which they usually are
date_default_timezone_set('UTC');
$formats = array(
'common' => array(
'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+)',
'fields' => array(
'ip',
'ident',
'user',
'timestamp',
'request',
'status',
'bytes'
)
),
'combined' => array(
'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+) (".*") (".*")',
'fields' => array(
'ip',
'ident',
'user',
'timestamp',
'request',
'status',
'bytes',
'referer',
'user-agent'
)
),
'icecast' => array(
'pattern' => '^([\d\.]+) ([^ ]*) ([^ ]*) \[([^\]]+)\] (".*") (\d+) (\d+) (".*") (".*") (\d+)',
'fields' => array(
'ip',
'ident',
'user',
'timestamp',
'request',
'status',
'bytes',
'referer',
'user-agent',
'secondsconnected'
)
)
);
$format = 'combined';
if (array_key_exists('f', $opts) and array_key_exists($opts['f'], $formats)) {
$format = $opts['f'];
}
if (array_key_exists('s', $opts)) {
if (strtotime($opts['s']) !== false) {
$startdate = strtotime($opts['s']);
} else {
echo "Invalid start date given\n";
exit(1);
}
} else {
$startdate = strtotime('1970-00-00 00:00:00');
}
if (array_key_exists('e', $opts)) {
if (strtotime($opts['e']) !== false) {
$enddate = strtotime($opts['e']);
} else {
echo "Invalid end date given\n";
exit(1);
}
} else {
$enddate = strtotime('now');
}
$firstdate = 0;
$lastdate = 0;
$linecount = 0;
while(!feof(STDIN)) {
$line = fgets(STDIN, 2048);
$matches = array();
if (preg_match('/'.$formats[$format]['pattern'].'/', $line, $matches)) {
array_shift($matches);
$fields = array_combine($formats[$format]['fields'], $matches);
$ts = strtotime($fields['timestamp']);
if ($ts < $startdate) {
//Not in range yet, skip to next
continue;
}
if ($ts > $startdate and 0 == $firstdate) {
$firstdate = $ts;
}
if ($ts < $enddate and $ts > $lastdate) {
$lastdate = $ts;
}
if ($ts > $enddate) {
//Passed our cut-off date, so stop
break;
}
$linecount++;
fwrite(STDOUT, $line);
}
}
fwrite(STDERR, "Found ".number_format($linecount)." valid lines\n");
if ($linecount > 0) {
if (!isset($opts['s'])) {
fwrite(STDERR, "First date: ".gmdate('Y-m-d H:i:s', $firstdate)."\n");
} else {
fwrite(STDERR, "First date within limits: ".gmdate('Y-m-d H:i:s', $firstdate)."\n");
}
if (!isset($opts['e'])) {
fwrite(STDERR, "Last date: ".gmdate('Y-m-d H:i:s', $lastdate)."\n");
} else {
fwrite(STDERR, "Last date within limits: ".gmdate('Y-m-d H:i:s', $lastdate)."\n");
}
}
exit(0);