ruxo
7/2/2017 - 4:48 PM

Parse IIS Log

Parse IIS Log

open System
open System.Net
open System.IO

type LogLine =
    { Timestamp: DateTime
      Method: string
      Path: string
      Query: string
      HttpVersion: string
      UserAgent: string
      Referer: string
      HttpStatus: HttpStatusCode
      HttpSubStatus: int
      SystemStatus: int
      ByteSent: int
      ByteReceived: int
      TimeProcessed: TimeSpan
    }
module LogLine =
    let Query log = log.Query
    let UserAgent log = log.UserAgent
    let Timestamp log = log.Timestamp
    let Path log = log.Path
    let TimeProcessed log = log.TimeProcessed
    let requestTime log = log.Timestamp - log.TimeProcessed

type AggregatedStats =
    { Count: int
      AverageTime: float
    }

module AggregatedStats =
    let Count stat = stat.Count
    let TimeConsumed stat = float(stat.Count) * stat.AverageTime

module W3CLogFields =
    [<Literal>]
    let Date = "date"
    [<Literal>]
    let Time = "time"
    [<Literal>]
    let Method = "cs-method"
    [<Literal>]
    let Path = "cs-uri-stem"
    [<Literal>]
    let Query = "cs-uri-query"
    [<Literal>]
    let Version = "cs-version"
    [<Literal>]
    let UserAgent = "cs(User-Agent)"
    [<Literal>]
    let Referer = "cs(Referer)"
    [<Literal>]
    let Status = "sc-status"
    [<Literal>]
    let Substatus = "sc-substatus"
    [<Literal>]
    let SystemStatus = "sc-win32-status"
    [<Literal>]
    let ByteSent = "sc-bytes"
    [<Literal>]
    let ByteReceived = "cs-bytes"
    [<Literal>]
    let TimeTaken = "time-taken"

    let all = 
        [ Date
          Time
          Method
          Path
          Query
          Version
          UserAgent
          Referer
          Status
          Substatus
          SystemStatus
          ByteSent
          ByteReceived
          TimeTaken     ]

type LogFieldIndexer = Map<string,int>
let logField :LogFieldIndexer -> string[] -> string -> string = fun index s key -> s.[index.[key]]

let toLogLine :LogFieldIndexer -> string -> LogLine = fun index s ->
    let dashBlank line = if line = "-" then String.Empty else line
    let ss = s.Split(' ')
    let field = logField index ss
    { Timestamp = DateTime.Parse(field W3CLogFields.Date + "T" + field W3CLogFields.Time)
      Method = field W3CLogFields.Method
      Path = field W3CLogFields.Path
      Query = dashBlank <| field W3CLogFields.Query
      HttpVersion = field W3CLogFields.Version
      UserAgent = field W3CLogFields.UserAgent
      Referer = dashBlank <| field W3CLogFields.Referer
      HttpStatus = Enum.Parse(typeof<HttpStatusCode>, field W3CLogFields.Status) :?> HttpStatusCode
      HttpSubStatus = Int32.Parse(field W3CLogFields.Substatus)
      SystemStatus = Int32.Parse(field W3CLogFields.SystemStatus)
      ByteSent = Int32.Parse(field W3CLogFields.ByteSent)
      ByteReceived = Int32.Parse(field W3CLogFields.ByteReceived)
      TimeProcessed = field W3CLogFields.TimeTaken |> Double.Parse |> TimeSpan.FromMilliseconds
    }

let isComment :string -> bool = fun s -> s.Length = 0 || s.[0] = '#'

let loadLogFile path =
    let (comments, data) = path |> File.ReadAllLines |> Array.partition isComment
    let fieldDef = comments |> Seq.find (fun s -> s.StartsWith("#Fields:"))
    let fields = fieldDef.Split(':').[1].Trim().Split(' ')
    let indexer =
        W3CLogFields.all
            |> Seq.map (fun s -> s, fields |> Array.findIndex ((=)s))
            |> Map.ofSeq
    data |> Seq.map (toLogLine indexer)

let loadAllLogs :string * string -> LogLine seq = Directory.EnumerateFiles >> Seq.collect loadLogFile

let findTop<'any,'sortkey when 'sortkey: comparison>
    :int -> ('any -> 'sortkey) -> 'any seq -> 'any seq = 
    fun N getter -> Seq.sortByDescending getter >> Seq.truncate N

let findTop10<'any,'sortkey when 'sortkey: comparison>
    :('any -> 'sortkey) -> 'any seq -> 'any seq =
    findTop 10

let trValue :('oldv -> 'newv) -> 'key * 'oldv -> 'key * 'newv =
    fun f (k,v) -> (k, f(v))

let aggregateStats :LogLine[] -> AggregatedStats = fun logs ->
    { Count = logs.Length
      AverageTime = logs |> Seq.averageBy (fun ll -> ll.TimeProcessed.TotalSeconds)
    }

let spatialFrequency :LogLine[] -> (int * int)[] = fun logs ->
    let secDiff(t1: DateTime, t2: DateTime) = (t2 - t1).TotalSeconds
    let bin2 x = x / 2

    match logs with
    | [||]
    | [|_|] -> [| Int32.MaxValue, 0 |]
    | logs ->
        logs |> Seq.map LogLine.requestTime
             |> Seq.sort
             |> Seq.pairwise
             |> Seq.map (int << ceil << secDiff)
             |> Seq.groupBy bin2
             |> Seq.map (trValue Seq.length)
             |> Seq.sortBy fst
             |> Seq.toArray

//-------- MAIN -------
let path = Environment.GetCommandLineArgs().[2]

let logs = loadAllLogs (path, "*.log") |> Seq.toArray

printfn "%d lines has been loaded." logs.Length

let pathLogs = logs |> Seq.groupBy LogLine.Path 
                    |> Seq.map (trValue Seq.toArray)
                    |> Seq.toArray

printfn "%d paths have been logged." pathLogs.Length

let startDate = logs |> Seq.map LogLine.requestTime |> Seq.min
let endDate = logs |> Seq.map LogLine.Timestamp |> Seq.max
let samplingTime = endDate - startDate
let toFrequency x = x / samplingTime.TotalMinutes

printfn "Data range: %d minutes started from %A" samplingTime.Minutes startDate

let simpleLogs = pathLogs |> Array.map (trValue aggregateStats)

printfn "Top 10 calls"
printfn "  %9s %7s %7s %s" "RPM" "Average" "% calls" "path"
simpleLogs |> findTop10 (snd >> AggregatedStats.Count)
           |> Seq.iter (fun (uri,stats) ->
                            printfn "  %9.3f %7.3f %7.2f %s"
                                (toFrequency <| float stats.Count)
                                stats.AverageTime
                                (100. * float stats.Count / float logs.Length)
                                uri)
printfn ""

printfn "Top 10 most time consumed"
printfn "  %13s %s" "Time consumed" "path"
simpleLogs |> findTop10 (snd >> AggregatedStats.TimeConsumed)
           |> Seq.iter (fun (uri,stats) -> printfn "  %13.3f %s" (AggregatedStats.TimeConsumed stats) uri)
printfn ""

type ConsecutiveStats =
    { Diff: int
      Frequency: int
      ParamVariation: int
      TotalCalls: int
    }
module ConsecutiveStats =
    let frequency stat = stat.Frequency
    let variationPercent stat =
        if stat.TotalCalls = 1 then
            0.
        else
            float(stat.ParamVariation - 1) / float(stat.TotalCalls - 1)
    let cachability stat = 1. - (stat |> variationPercent)

let collectStats :LogLine[] -> ConsecutiveStats = fun ll ->
    let maxFreq = ll |> spatialFrequency |> Array.maxBy snd
    { Diff = fst maxFreq
      Frequency = snd maxFreq
      ParamVariation = ll |> Seq.map LogLine.Query |> Seq.distinct |> Seq.length
      TotalCalls = ll.Length
    }

printfn "Top consecutive calls"
printfn "  %4s %9s %16s %s" "Diff" "Frequency" "Cacheability (%)" "path"
pathLogs |> Seq.map (trValue collectStats)
         |> findTop 20 (snd >> ConsecutiveStats.frequency)
         |> Seq.iter (fun (uri,stats) -> printfn "  %4d %9d %16.2f %s" stats.Diff stats.Frequency (100. * ConsecutiveStats.cachability stats) uri)

let agents =
    logs |> Seq.map LogLine.UserAgent
         |> Seq.groupBy id
         |> Seq.map (trValue Seq.length)
         |> findTop10 snd
         |> Seq.toArray

if agents.Length > 1 then
    printfn "Top 10 agents"
    printfn "  %6s %s" "Agents" "Count"
    agents |> Seq.iter (fun (r, count) -> printfn "  %6d %s" count r)
else
    printfn "No agent detected"

0