djekl
9/25/2012 - 5:00 PM

Bash script to parse Apache log for a count of RSS subscribers and email it to you

Bash script to parse Apache log for a count of RSS subscribers and email it to you

#!/bin/bash

# --- Required variables ---
RSS_URI="/rss"
MAIL_TO="your@email.com"
LOG_FILE="/var/log/httpd/access_log"
LOG_DATE_FORMAT="%d/%b/%Y"

# --- Optional customization ---

MAIL_SUBJECT="RSS feed subscribers"

# Date expression for yesterday
DATE="-1 day"

# Locale for printf number formatting (e.g. "10000" => "10,000")
LANG=en_US

# Date format for display in emails
HUMAN_FDATE=`date -d "$DATE" '+%F'`

# --- The actual log parsing ---

LOG_FDATE=`date -d "$DATE" "+${LOG_DATE_FORMAT}"`
DAY_BEFORE_FDATE=`date -d "$DATE -1 day" "+${LOG_DATE_FORMAT}"`

# Unique IPs requesting RSS, except those reporting "subscribers":
IPSUBS=`fgrep "$LOG_FDATE" "$LOG_FILE" | fgrep " $RSS_URI" | egrep -v '[0-9]+ subscribers' | cut -d' ' -f 1 | sort | uniq | wc -l`

# Google Reader subscribers and other user-agents reporting "subscribers" and using the "feed-id" parameter for uniqueness:
GRSUBS=`egrep "($LOG_FDATE|$DAY_BEFORE_FDATE)" "$LOG_FILE" | fgrep " $RSS_URI" | egrep -o '[0-9]+ subscribers; feed-id=[0-9]+' | sort -t= -k2 -s | tac | uniq -f2 | awk '{s+=$1} END {print s}'`

# Other user-agents reporting "subscribers", for which we'll use the entire user-agent string for uniqueness:
OTHERSUBS=`fgrep "$LOG_FDATE" "$LOG_FILE" | fgrep " $RSS_URI" | fgrep -v 'subscribers; feed-id=' | egrep '[0-9]+ subscribers' | egrep -o '"[^"]+"$' | sort -t\( -k2 -sr | awk '!x[$1]++' | egrep -o '[0-9]+ subscribers' | awk '{s+=$1} END {print s}'`

REPORT=$(
    printf "Feed stats for $HUMAN_FDATE:\n\n"
    printf "%'8d Google Reader subscribers\n" $GRSUBS
    printf "%'8d subscribers from other aggregators\n" $OTHERSUBS
    printf "%'8d direct subscribers\n" $IPSUBS
    echo   "--------"
    printf "%'8d total subscribers\n" `expr $GRSUBS + $OTHERSUBS + $IPSUBS`
)

echo "$REPORT"
echo ""
echo "Also emailed to $MAIL_TO."

echo "$REPORT " | mail -s "[$HUMAN_FDATE] $MAIL_SUBJECT" $MAIL_TO