Parse IIS Log
open System
open System.Net
open System.IO
type LogLine =
{ Timestamp: DateTime
Method: string
Path: string
Query: string
HttpVersion: string
UserAgent: string
Referer: string
HttpStatus: HttpStatusCode
HttpSubStatus: int
SystemStatus: int
ByteSent: int
ByteReceived: int
TimeProcessed: TimeSpan
}
module LogLine =
let Query log = log.Query
let UserAgent log = log.UserAgent
let Timestamp log = log.Timestamp
let Path log = log.Path
let TimeProcessed log = log.TimeProcessed
let requestTime log = log.Timestamp - log.TimeProcessed
type AggregatedStats =
{ Count: int
AverageTime: float
}
module AggregatedStats =
let Count stat = stat.Count
let TimeConsumed stat = float(stat.Count) * stat.AverageTime
module W3CLogFields =
[<Literal>]
let Date = "date"
[<Literal>]
let Time = "time"
[<Literal>]
let Method = "cs-method"
[<Literal>]
let Path = "cs-uri-stem"
[<Literal>]
let Query = "cs-uri-query"
[<Literal>]
let Version = "cs-version"
[<Literal>]
let UserAgent = "cs(User-Agent)"
[<Literal>]
let Referer = "cs(Referer)"
[<Literal>]
let Status = "sc-status"
[<Literal>]
let Substatus = "sc-substatus"
[<Literal>]
let SystemStatus = "sc-win32-status"
[<Literal>]
let ByteSent = "sc-bytes"
[<Literal>]
let ByteReceived = "cs-bytes"
[<Literal>]
let TimeTaken = "time-taken"
let all =
[ Date
Time
Method
Path
Query
Version
UserAgent
Referer
Status
Substatus
SystemStatus
ByteSent
ByteReceived
TimeTaken ]
type LogFieldIndexer = Map<string,int>
let logField :LogFieldIndexer -> string[] -> string -> string = fun index s key -> s.[index.[key]]
let toLogLine :LogFieldIndexer -> string -> LogLine = fun index s ->
let dashBlank line = if line = "-" then String.Empty else line
let ss = s.Split(' ')
let field = logField index ss
{ Timestamp = DateTime.Parse(field W3CLogFields.Date + "T" + field W3CLogFields.Time)
Method = field W3CLogFields.Method
Path = field W3CLogFields.Path
Query = dashBlank <| field W3CLogFields.Query
HttpVersion = field W3CLogFields.Version
UserAgent = field W3CLogFields.UserAgent
Referer = dashBlank <| field W3CLogFields.Referer
HttpStatus = Enum.Parse(typeof<HttpStatusCode>, field W3CLogFields.Status) :?> HttpStatusCode
HttpSubStatus = Int32.Parse(field W3CLogFields.Substatus)
SystemStatus = Int32.Parse(field W3CLogFields.SystemStatus)
ByteSent = Int32.Parse(field W3CLogFields.ByteSent)
ByteReceived = Int32.Parse(field W3CLogFields.ByteReceived)
TimeProcessed = field W3CLogFields.TimeTaken |> Double.Parse |> TimeSpan.FromMilliseconds
}
let isComment :string -> bool = fun s -> s.Length = 0 || s.[0] = '#'
let loadLogFile path =
let (comments, data) = path |> File.ReadAllLines |> Array.partition isComment
let fieldDef = comments |> Seq.find (fun s -> s.StartsWith("#Fields:"))
let fields = fieldDef.Split(':').[1].Trim().Split(' ')
let indexer =
W3CLogFields.all
|> Seq.map (fun s -> s, fields |> Array.findIndex ((=)s))
|> Map.ofSeq
data |> Seq.map (toLogLine indexer)
let loadAllLogs :string * string -> LogLine seq = Directory.EnumerateFiles >> Seq.collect loadLogFile
let findTop<'any,'sortkey when 'sortkey: comparison>
:int -> ('any -> 'sortkey) -> 'any seq -> 'any seq =
fun N getter -> Seq.sortByDescending getter >> Seq.truncate N
let findTop10<'any,'sortkey when 'sortkey: comparison>
:('any -> 'sortkey) -> 'any seq -> 'any seq =
findTop 10
let trValue :('oldv -> 'newv) -> 'key * 'oldv -> 'key * 'newv =
fun f (k,v) -> (k, f(v))
let aggregateStats :LogLine[] -> AggregatedStats = fun logs ->
{ Count = logs.Length
AverageTime = logs |> Seq.averageBy (fun ll -> ll.TimeProcessed.TotalSeconds)
}
let spatialFrequency :LogLine[] -> (int * int)[] = fun logs ->
let secDiff(t1: DateTime, t2: DateTime) = (t2 - t1).TotalSeconds
let bin2 x = x / 2
match logs with
| [||]
| [|_|] -> [| Int32.MaxValue, 0 |]
| logs ->
logs |> Seq.map LogLine.requestTime
|> Seq.sort
|> Seq.pairwise
|> Seq.map (int << ceil << secDiff)
|> Seq.groupBy bin2
|> Seq.map (trValue Seq.length)
|> Seq.sortBy fst
|> Seq.toArray
//-------- MAIN -------
let path = Environment.GetCommandLineArgs().[2]
let logs = loadAllLogs (path, "*.log") |> Seq.toArray
printfn "%d lines has been loaded." logs.Length
let pathLogs = logs |> Seq.groupBy LogLine.Path
|> Seq.map (trValue Seq.toArray)
|> Seq.toArray
printfn "%d paths have been logged." pathLogs.Length
let startDate = logs |> Seq.map LogLine.requestTime |> Seq.min
let endDate = logs |> Seq.map LogLine.Timestamp |> Seq.max
let samplingTime = endDate - startDate
let toFrequency x = x / samplingTime.TotalMinutes
printfn "Data range: %d minutes started from %A" samplingTime.Minutes startDate
let simpleLogs = pathLogs |> Array.map (trValue aggregateStats)
printfn "Top 10 calls"
printfn " %9s %7s %7s %s" "RPM" "Average" "% calls" "path"
simpleLogs |> findTop10 (snd >> AggregatedStats.Count)
|> Seq.iter (fun (uri,stats) ->
printfn " %9.3f %7.3f %7.2f %s"
(toFrequency <| float stats.Count)
stats.AverageTime
(100. * float stats.Count / float logs.Length)
uri)
printfn ""
printfn "Top 10 most time consumed"
printfn " %13s %s" "Time consumed" "path"
simpleLogs |> findTop10 (snd >> AggregatedStats.TimeConsumed)
|> Seq.iter (fun (uri,stats) -> printfn " %13.3f %s" (AggregatedStats.TimeConsumed stats) uri)
printfn ""
type ConsecutiveStats =
{ Diff: int
Frequency: int
ParamVariation: int
TotalCalls: int
}
module ConsecutiveStats =
let frequency stat = stat.Frequency
let variationPercent stat =
if stat.TotalCalls = 1 then
0.
else
float(stat.ParamVariation - 1) / float(stat.TotalCalls - 1)
let cachability stat = 1. - (stat |> variationPercent)
let collectStats :LogLine[] -> ConsecutiveStats = fun ll ->
let maxFreq = ll |> spatialFrequency |> Array.maxBy snd
{ Diff = fst maxFreq
Frequency = snd maxFreq
ParamVariation = ll |> Seq.map LogLine.Query |> Seq.distinct |> Seq.length
TotalCalls = ll.Length
}
printfn "Top consecutive calls"
printfn " %4s %9s %16s %s" "Diff" "Frequency" "Cacheability (%)" "path"
pathLogs |> Seq.map (trValue collectStats)
|> findTop 20 (snd >> ConsecutiveStats.frequency)
|> Seq.iter (fun (uri,stats) -> printfn " %4d %9d %16.2f %s" stats.Diff stats.Frequency (100. * ConsecutiveStats.cachability stats) uri)
let agents =
logs |> Seq.map LogLine.UserAgent
|> Seq.groupBy id
|> Seq.map (trValue Seq.length)
|> findTop10 snd
|> Seq.toArray
if agents.Length > 1 then
printfn "Top 10 agents"
printfn " %6s %s" "Agents" "Count"
agents |> Seq.iter (fun (r, count) -> printfn " %6d %s" count r)
else
printfn "No agent detected"
0