joewiz
9/12/2016 - 2:53 PM

Convert CSV to XML, with XQuery

Convert CSV to XML, with XQuery

xquery version "3.1";

(: XQuery adaptation of https://github.com/digital-preservation/csv-tools/blob/master/csv-to-xml_v3.xsl.
   See also the thread on basex-talk https://mailman.uni-konstanz.de/pipermail/basex-talk/2016-September/011272.html.
:)

declare function local:get-cells($row as xs:string) {
    (: workaround for lack of lookahead support: append comma to end of row :)
    let $string-to-analyze := $row || ","
    let $analyze := fn:analyze-string($string-to-analyze, '(("[^"]*")+|[^,]*),')
    for $group in $analyze//fn:group[@nr="1"]
    return
        if (matches($group, '^".+"$')) then
            replace($group, '^"([^"]+)"$', '$1')
        else 
            $group/string()
};

let $csv := 'Author,Title,ISBN,Binding,Year Published
Jeannette Walls,The Glass Castle,074324754X,Paperback,2006
James Surowiecki,The Wisdom of Crowds,9780385503860,Paperback,2005
Lawrence Lessig,The Future of Ideas,9780375505782,Paperback,2002
"Larry Bossidy, Ram Charan, Charles Burck",Execution,9780609610572,Hardcover,2002
Kurt Vonnegut,Slaughterhouse-Five,9780791059258,Paperback,1999'
let $lines := tokenize($csv, '\n')
let $header-row := fn:head($lines)
let $body-rows := fn:tail($lines)
let $headers := local:get-cells($header-row) ! replace(., '\s+', '_')
return
    element csv {
        for $row in $body-rows
        let $cells := local:get-cells($row)
        return
            element row {
                for $cell at $count in $cells
                return element {$headers[$count]} {$cell}
            }
    }