ruxo
7/19/2015 - 5:25 AM

F# Command line parser, inspired by NDesk options lib.

F# Command line parser, inspired by NDesk options lib.

// paket dependencies:
// source https://nuget.org/api/v2
// nuget unquote

#r "packages/Unquote/lib/net45/Unquote.dll"
#load "optionparser.fs"

open System
open Swensen.Unquote
open RZ.OptionParser
open ConfigParser
open CommandLineParser

let tr3to2 (a,b,_) = a,b
let nop0 _ = ()
let nop _ _ = ()

module ``Test ArgType`` =
    let rules =
        [ OptionType.NoValue, ShortForm (NBString "h"), nop
          OptionType.NoValue, LongForm (NBString "help"), nop
          OptionType.Optional, ShortForm (NBString "d"), nop
          OptionType.Required, LongForm (NBString "H"), nop
        ]
    let tr (result :MatchedRule<unit> option) :(OptionType * NBString) option =
        result |> Option.map tr3to2

    test <@ tr(_matchSingleRule rules ArgMatcher.isShortForm (NBString "h")) = (Some (OptionType.NoValue, NBString "h")) @>
    test <@ tr(_matchSingleRule rules ArgMatcher.isShortForm (NBString "H")) = None @>
    test <@ tr(_matchSingleRule rules ArgMatcher.isLongForm (NBString "H")) = (Some (OptionType.Required, NBString "H")) @>

    module ``Test Break Options`` =
        test <@ (breakOptions rules (ShortFormOption (NBString "hd", ""))) = [ CompletedOption (NBString "h", nop0)
                                                                               CompletedOption (NBString "d", nop0)] @>
        let test_rules = [ OptionType.Optional, ShortForm (NBString "v"), nop 
                           OptionType.Required, ShortForm (NBString "p"), nop ]
        test <@ (breakOptions test_rules (ShortFormOption (NBString "p", "9000"))) = [ CompletedOption (NBString "p", nop0) ] @>
        test <@ (breakOptions test_rules (NonOption (NBString "someone.dll"))) = [ Constant (NBString "someone.dll") ] @>
        test <@ (breakOptions test_rules (ShortFormOption (NBString "v", ""))) = [ CompletedOption (NBString "v", nop0) ] @>

module ``Test argument passing`` =
    let v = ref 0
    let p = ref 0
    let test_rules = [ OptionType.Optional, ShortForm (NBString "v"), fun a x -> (v := if x = null then 0 else Int32.Parse x) ; a 
                       OptionType.Required, ShortForm (NBString "p"), fun a x -> (p := if x = null then 0 else Int32.Parse x) ; a ]
    let result = breakOptions test_rules (ShortFormOption (NBString "vp", "9000"))

    let executeArgToken = 
        function
        | Constant _ -> ()
        | CompletedOption (_, fn) -> fn()
        | RequiredOption (_, fn) -> fn () "111"

    result |> List.iter executeArgToken

    if not <| (!v = 0 && !p = 9000) then printfn "passing `vp` and param failed: v=%d p=%d" !v !p

    raises <@ breakOptions test_rules (ShortFormOption (NBString "pv", "9000")) @>

module ``Test OptionParser argument recognition`` =
    test <@ recognize (NBString.from "-p=9000") = ShortFormOption ((NBString.from "p"), "9000") @>
    test <@ recognize (NBString "someone.dll") = NonOption (NBString "someone.dll") @>
    test <@ recognize (NBString "-v") = ShortFormOption ((NBString "v"), "") @>
    test <@ recognize (NBString "--v") = ShortFormOption ((NBString "v"), "") @>
    test <@ recognize (NBString "-abcd") = ShortFormOption (NBString "abcd", "") @>

module ``Test pattern conversion`` =
    test <@ filterPattern (["v"; "verbose"], nop) |> List.map fst = [NBString "v"; NBString "verbose"] @>
    test <@ filterPattern (["v"; "   "], nop) |> List.map fst = [NBString "v"] @>
    test <@ filterPattern (["v"; ""; "verbose"], nop) |> List.map fst = [NBString "v"; NBString "verbose"] @>

    test <@ (tr3to2 <| recognizePattern (NBString "v" , nop)) = (OptionType.NoValue, NBString "v") @>
    test <@ (tr3to2 <| recognizePattern (NBString "p:", nop)) = (OptionType.Required, NBString "p") @>

module ``Test rule validation :`` =
    test <@ (validateMatchedRule [ OptionType.NoValue, (NBString "p"), nop
                                   OptionType.Optional, (NBString "port"), nop]
            |> Option.get
            |> Seq.map tr3to2
            |> Seq.toList) = [ OptionType.Optional, (NBString "p")
                               OptionType.Optional, (NBString "port") ] @>

module ``Test Final Rules`` =
    let patterns = [
              ["v"; "verbose"], nop
              ["p:"], nop
            ]
    let normResult<'ctx> :(OptionType * ArgMatcher * Handler<'ctx>) seq -> (OptionType * ArgMatcher) list = Seq.map tr3to2 >> Seq.toList
    test <@ normResult (makeFinalRules patterns) = [ OptionType.NoValue, ShortForm (NBString "v")
                                                     OptionType.NoValue, LongForm (NBString "verbose")
                                                     OptionType.Required, ShortForm(NBString "p") ] @>

module ``Test final parser: `` =
    type Context = { verbose :int; port :int }
    let patterns = [
              ["v"; "verbose"], fun ctx _ -> { ctx with verbose=ctx.verbose + 1 }
              ["p="], fun ctx v -> {ctx with port=if v = null then -1 else Int32.Parse v }
            ]
    let p = parser patterns { verbose=0; port=0 }
    test <@ p ["-p=9000";"someone.dll";"-v";"--v"] = ({ verbose=2; port=9000 }, ["someone.dll"]) @>
    test <@ p ["-vp"; "9000"] = ({ verbose=1; port= -1 }, ["9000"]) @>

    module ``Optional option without parameter should be passed with null: `` =
        test <@ p ["-p"] = ({ verbose=0; port= -1 }, []) @>

    module ``Mandatory option without parameter should causes an error: `` =
        let patterns = [
                  ["v"; "verbose"], nop
                  ["p:"], nop
                ]
        let p = parser patterns ()

        raises <@ p ["-p"] @>
        
module ``Another case :`` =
    type Context = { port :int; bin_dir :string }
    
    let patterns = [
        ["p"; "port="], fun ctx v -> { ctx with port=Int32.Parse v }
        ["bin="], fun ctx v -> { ctx with bin_dir=v }
    ]
    
    let p = parser patterns { port=9000; bin_dir=null }
    
    test <@ p ["--port=1234"; "bin=555"] = ({ port=1234; bin_dir=null }, ["bin=555"]) @>

module ``Test mandatory option :`` =
    type Context = { verbose :int; port :int }
    let patterns = [
              ["v"; "verbose"], fun ctx _ -> { ctx with verbose=ctx.verbose + 1 }
              ["p:"], fun ctx v -> {ctx with port=if v = null then -1 else Int32.Parse v }
            ]
    let p = parser patterns { verbose=0; port=0 }
    test <@ p ["-p=9000";"someone.dll";"-v";"--v"] = ({ verbose=2; port=9000 }, ["someone.dll"]) @>
    test <@ p ["-vp"; "9000"] = ({ verbose=1; port=9000 }, []) @>

module ``'--' breaks options and text :`` =
    type Context = { v :int; p :int }

    let patterns = [
                ["v"], fun ctx _ -> { ctx with v=ctx.v + 1 }
                ["p"], fun ctx _ -> { ctx with p=ctx.p + 1 }
            ]
    let psr = parser patterns { v=0; p=0 }

    test <@ psr ["-v"; "--"; "-p"] = ({ v=1; p=0 }, ["-p"]) @>
module RZ.OptionParser

open System
open System.Collections.Generic

type Handler<'ctx> = 'ctx -> string -> 'ctx

let fst3 (x,_,_) = x
let snd3 (_,x,_) = x
let thd  (_,_,x) = x

/// <summary>
/// New Non-blank string
/// </summary>
type NBString = NBString of string
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
module NBString =
    let from s =
        if String.IsNullOrWhiteSpace s then invalidArg "s" "String cannot be blank or empty"
        NBString s
    let get (NBString s) = s

type ArgMatcher =
  | ShortForm of NBString
  | LongForm of NBString

[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
module ArgMatcher =
    let fromString ((NBString s) as nbs) = if s.Length = 1 then ShortForm nbs else LongForm nbs

    let isLongForm = function
        | ShortForm _ -> false
        | LongForm _ -> true

    let isShortForm = function
        | ShortForm _ -> true
        | LongForm _ -> false

    let canMatch (NBString s) = function
      | ShortForm (NBString p) -> p.[0] = s.[0]
      | LongForm (NBString p) -> p = s

    let toString = function
        | ShortForm s -> s
        | LongForm s -> s

type OptionType =
  | NoValue = 0
  | Required = 1
  | Optional = 2

type RuleMatcher<'ctx> = OptionType * ArgMatcher * Handler<'ctx>
type MatchedRule<'ctx> = OptionType * NBString * Handler<'ctx>
type Pattern<'ctx> = string list * Handler<'ctx>

module ConfigParser =
    [<Literal>]
    let OptionalDelimiter = '='
    [<Literal>]
    let RequiredDelimiter = ':'

    let strExcludeLast (s :string) = s.Substring(0, s.Length-1)

    let recognizePattern<'ctx> (NBString pattern, handler :Handler<'ctx>) :MatchedRule<'ctx> =
        match pattern.TrimEnd().[pattern.Length-1] with
        | OptionalDelimiter -> OptionType.Optional, (NBString.from <| strExcludeLast pattern), handler
        | RequiredDelimiter -> OptionType.Required, (NBString.from <| strExcludeLast pattern), handler
        | _                 -> OptionType.NoValue, (NBString pattern), handler

    let filterPattern<'ctx> (patterns :string list, handler :Handler<'ctx>) :(NBString * Handler<'ctx>) list =
        patterns
        |> Seq.filter (not << String.IsNullOrWhiteSpace)
        |> Seq.map (fun s -> NBString s, handler)
        |> Seq.toList

    let validateMatchedRule<'ctx> (candidates :MatchedRule<'ctx> seq) :MatchedRule<'ctx> seq option =
        let option_variety =
            candidates
            |> Seq.map fst3
            |> Seq.filter (fun opt -> opt <> OptionType.NoValue)
            |> Seq.distinct
            |> Seq.toList
        match option_variety with
        | [] -> Some candidates
        | [opt_type] -> Some (candidates |> Seq.map (fun (_,txt,handler) -> opt_type,txt,handler))
        | _ -> printfn "Invalid option: %s" <| String.Join(", ", candidates
                                                                 |> Seq.map snd3
                                                                 |> Seq.map NBString.get)
               None

    let makeFinalRules<'ctx> :Pattern<'ctx> seq -> RuleMatcher<'ctx> seq =
        Seq.map filterPattern
        >> Seq.map (Seq.map recognizePattern)
        >> Seq.choose validateMatchedRule
        >> Seq.collect id
        >> Seq.map (fun (opt_type, text, handler) -> opt_type, ArgMatcher.fromString text, handler)


module CommandLineParser =
    open System.Text.RegularExpressions

    let private argument_re = Regex(@"(?<type>/|--?)(?<option>[^:=]+)([:=](?<param>.*))?", RegexOptions.Compiled)

    type BoundHandler<'ctx> = 'ctx -> 'ctx

    let cast<'T> (obj: obj) =
        match obj with
        | :? 'T as x -> Some x
        | _ -> None 

    type ArgType =
    | LongFormOption of NBString * string   // last string is either the rest of string after equal sign or empty.
    | ShortFormOption of NBString * string
    | NonOption of NBString
    
    [<CustomEquality; NoComparison>]
    type ArgToken<'ctx> =
    | Constant of NBString
    | CompletedOption of NBString * BoundHandler<'ctx>
    | RequiredOption of NBString * Handler<'ctx>
    with
        override x.Equals something =
            match (cast<ArgToken<'ctx>> something), x with
            | Some (Constant (NBString os)), (Constant (NBString xs)) -> os = xs
            | Some (CompletedOption (NBString os, _)), (CompletedOption (NBString xs, _)) -> os = xs
            | Some (RequiredOption (NBString os, _)), (RequiredOption (NBString xs, _)) -> os = xs
            | _ -> false

        override x.GetHashCode() =
            match x with
            | Constant (NBString s) -> ("Constant_" + s).GetHashCode()
            | CompletedOption (NBString s, _) -> ("CompletedOption_" + s).GetHashCode()
            | RequiredOption (NBString s, _) -> ("RequiredOption_" + s).GetHashCode()

    let (|IsStringEmpty|) = String.IsNullOrWhiteSpace

    let recognize ((NBString s) as ns) :ArgType =
        let m = argument_re.Match s
        if m.Success
            then let option_text = m.Groups.["option"].Value
                 let opt = (NBString.from option_text, m.Groups.["param"].Value)
                 let singlechar_option = option_text.Length = 1
                 (if singlechar_option || m.Groups.["type"].Value = "-" then ShortFormOption else LongFormOption) opt
            else NonOption ns
            
    let _matchSingleRule<'ctx> (rules :RuleMatcher<'ctx> seq) (matcher_filter: ArgMatcher -> bool)  (available_options :NBString) :MatchedRule<'ctx> option =
        match rules
            |> Seq.filter (snd3 >> matcher_filter)
            |> Seq.filter (snd3 >> ArgMatcher.canMatch available_options)
            |> Seq.tryHead with
        | None -> None
        | Some (opt_type, am, handler) -> Some (opt_type, (ArgMatcher.toString am), handler)

    let rec breakShort<'ctx> rules (short_rules :MatchedRule<'ctx> list) = function
        | "" -> short_rules, ""
        | leftover -> 
            match _matchSingleRule rules ArgMatcher.isShortForm (NBString leftover) with
            | None -> failwithf "Invalid option: %s" leftover
            | Some ((_, NBString rule, _) as matched_rule) ->
                let next_leftover = leftover.Substring (rule.Length)
                breakShort rules (matched_rule::short_rules) next_leftover
    /// <summary>
    /// Recognize arg with the specific rules and returns all matched rules.  Matched rules can be
    /// one or more short-form options and/or long-form option.
    /// </summary>
    /// <param name="rules"></param>
    /// <param name="arg"></param>
    let breakOptions<'ctx> (rules :RuleMatcher<'ctx> list) (arg :ArgType) :ArgToken<'ctx> list =
        match arg with
        | NonOption param -> [Constant param]
        | LongFormOption (opt, param) ->
            match _matchSingleRule rules ArgMatcher.isLongForm opt with
            | None -> failwithf "Invalid option: %s" (NBString.get opt)
            | Some (opt_type,txt,handler) ->
                match opt_type, param with
                | OptionType.NoValue, _
                | OptionType.Optional, IsStringEmpty true -> [ CompletedOption (txt, fun ctx -> handler ctx null) ]
                | OptionType.Optional, IsStringEmpty false
                | OptionType.Required, IsStringEmpty false -> [ CompletedOption (txt, fun ctx -> handler ctx param) ]
                | OptionType.Required, IsStringEmpty true -> [ RequiredOption (txt, handler) ]
                | _ -> failwithf "Unhandled option type %A" opt_type
        | ShortFormOption (NBString opt, param) ->
            let short_rules = breakShort rules [] opt |> fst

            let processToken (token_list :ArgToken<'ctx> list, param) (opt_type, txt, handler) =
                match opt_type, param with
                | OptionType.NoValue, _ -> (CompletedOption (txt, fun ctx -> handler ctx null))::token_list, param
                | OptionType.Optional, IsStringEmpty true -> (CompletedOption (txt, fun ctx -> handler ctx null))::token_list, null
                | OptionType.Optional, IsStringEmpty false
                | OptionType.Required, IsStringEmpty false -> (CompletedOption (txt, fun ctx -> handler ctx param))::token_list, null
                | OptionType.Required, IsStringEmpty true -> (RequiredOption (txt, handler))::token_list, null
                | _ -> failwithf "Unhandled option type %A" opt_type

            // if option has "Required", it must be the last in the rule list.
            let checkRuleValidity (rules :MatchedRule<'ctx> list) =
                let req_count = rules |> Seq.map fst3 |> Seq.filter ((=) OptionType.Required) |> Seq.length
                if req_count > 1 then failwith "Too many required options!"
                if req_count = 1 && (List.head rules |> fst3) <> OptionType.Required
                    then failwith "Required option must be the last in the short form argument list!"
                rules
                     
            short_rules
            |> checkRuleValidity
            |> Seq.scan processToken ([], param)
            |> Seq.last
            |> fst

    let parseOptions final_rules = recognize >> breakOptions final_rules

    let splitDoubleDash (arguments :NBString list) :(NBString list * NBString list) =
        let array = arguments |> Seq.toArray
        match array |> Array.tryFindIndex (NBString.get >> (=) "--") with
        | None -> arguments, []
        | Some idx -> ((Array.take idx array) |> Array.toList, (Array.sub array (idx+1) (array.Length - idx - 1)) |> Array.toList)

module Parser =
    open ConfigParser
    open CommandLineParser

    [<NoComparison>]
    type ParserState<'ctx> =
    | ParseOption
    | RequireParam of ArgToken<'ctx>
    type ParserData<'ctx> = ParserState<'ctx> * NBString list

    let processSingleOption (last_state, ctx, non_options) token =
        match last_state, token with
        | ParseOption, Constant nbs ->
            ParseOption, ctx, nbs::non_options
        | RequireParam (RequiredOption (_, handler)), Constant (NBString s) ->
            ParseOption, (handler ctx s), non_options
        | RequireParam _, Constant (NBString s) ->
            failwith "Invalid parsing state"
        | _, CompletedOption (_, handler) ->
            last_state, (handler ctx), non_options
        | ParseOption, RequiredOption (keyword, handler) ->
            RequireParam token, ctx, non_options
        | _, RequiredOption _ ->
            failwith "Unexpected parsing flow!? Found two required options."

    let parser<'ctx> (patterns :Pattern<'ctx> seq) :'ctx -> string seq -> 'ctx * string list =
        let final_rules = makeFinalRules patterns |> Seq.toList

        let processOptions ((last_state, ctx, non_options) as last) nbs =
            let token_list =
                match last_state with
                | ParseOption -> parseOptions final_rules nbs
                | RequireParam _ -> [ Constant nbs ]
            token_list
            |> Seq.scan processSingleOption last
            |> Seq.last

        fun init_ctx args ->
            let option_args, texts = 
                args
                |> Seq.filter (not << String.IsNullOrWhiteSpace)
                |> Seq.map NBString
                |> Seq.toList
                |> splitDoubleDash
                
            let (final_state, result_ctx, non_options) =
                option_args
                |> Seq.scan processOptions (ParseOption, init_ctx, [])
                |> Seq.last
            match final_state with
            | ParseOption -> result_ctx, texts
                                         |> Seq.map (NBString.get)
                                         |> Seq.append (non_options |> Seq.map (NBString.get))
                                         |> Seq.toList
            | RequireParam (RequiredOption (NBString opt_txt, _)) -> failwithf "Option '%s' needs argument" opt_txt
            | RequireParam _ -> failwith "Unexpected parsing state!"

let parser<'T> = Parser.parser<'T>