julianjensen
9/20/2017 - 10:53 AM

xlsx-csv-to-json

xlsx-csv-to-json

#!/usr/bin/env node
/** ****************************************************************************************************
 * File: XLS/XLSX/CSV to JSON array converter
 * @author julian on 12/8/16
 * @version 1.0.0
 *******************************************************************************************************/
'use strict';

const
    csv = require( 'csv-parse' ),
    // csv = _csv.parse(),
    xlsx_to_json = require( 'xlsx2json' ),
    fs = require( 'fs' ),
    _ = require( 'lodash' ),
    chalk = require( 'chalk' ),
    error = chalk.bold.red,
    path = require( 'path' ),
    cli = require( 'command-line-args' ),
    cliHelp = require( 'command-line-usage' ),
    optionDefs = [
        {
            header: chalk.bold.cyan( 'Import Utility version 0.0.1' ),
            content: 'Convert input from XLSX/CSV to import-ready JSON format'
        },
        {
            header: chalk.bold.cyan( 'Synopsis' ),
            content: [
                `$ ${path.basename( process.argv[ 1 ] )} some-data.xlsx >data.json`,
                `$ ${path.basename( process.argv[ 1 ] )} --file=some-data.xlsx -o data.json`
            ]
        },
        {
            header: chalk.bold.cyan( 'Options' ),
            optionList: [
                {
                    name: 'help',
                    alias: 'h',
                    description: 'Display this usage guide',
                    type: Boolean
                },
                {
                    name: 'verbose',
                    alias: 'v',
                    description: 'Make the conversion verbose',
                    type: Boolean
                },
                {
                    name: 'separator',
                    description: 'Default field separator.',
                    defaultValue: ',',
                    type: String
                },
                {
                    name: 'sheet',
                    description: `Sheet to read from, defaults to ${chalk.blue.italic.bold( '0' )}`,
                    defaultValue: 0,
                    type: Number
                },
                {
                    name: 'headers',
                    description: `Header rows to skip, defaults to ${chalk.blue.italic.bold( '1' )}`,
                    defaultValue: 1,
                    type: Number
                },
                {
                    name: 'mapping',
                    alias: 'm',
                    multiple: true,
                    description: `Maps a column name to a JSON field name. Format is ${chalk.blue.italic.bold( 'ColumnName=JsonName' )}`,
                    type: v => {
                        let m;

                        if ( typeof v !== 'string' ) return new Error( 'Mapping must be a string' );

                        m = v.match( /^\s*([-0-9_\w]+)\s*=\s*([-0-9_\w]+)\s*$/ );

                        if ( !m ) return new Error( 'Mapping must be in the format: <string>=<string>' );

                        return [ m[ 1 ], m[ 2 ] ];
                    }
                },
                {
                    name: 'exclude',
                    alias: 'x',
                    multiple: true,
                    description: `Specifies a column to exclude.`,
                    type: String
                },
                {
                    name: 'file',
                    alias: 'f',
                    description: 'The input file to process.',
                    multiple: true,
                    defaultOption: true,
                    typeLabel: '[underline]{file} ...',
                    type: String
                },
                {
                    name: 'save',
                    alias: 'a',
                    description: `Save the file to the same place and name except for a ${chalk.italic( 'JSON' )} file extension`,
                    type: Boolean
                },
                {
                    name: 'output',
                    alias: 'o',
                    description: `The output file. If omitted, defaults to ${chalk.blue.italic.bold( 'stdout' )}`,
                    typeLabel: '[underline]{output} ...',
                    type: String
                }
            ]
        }
    ],
    args = cli( optionDefs[ 2 ].optionList ),
    usage = msg => { console.error( error( `\nError:\n\t${msg}\n` ) ); console.log( cliHelp( optionDefs ) ); process.exit( 1 ); };

let exclude = [], mapping = {};

if ( args.mapping )
    args.mapping.forEach( ( [ from, to ] ) => mapping[ from ] = to );

if ( args.exclude )
    exclude = exclude.concat( args.exclude.map( s => s.replace( /\s/g, '' ).toLowerCase() ) );

if ( !args.file || !args.file.length )
    usage( "You must specify at least one input file. It can be either XLS/XLSX or CSV" );

Promise.all( args.file.map( do_one_file ) ).then( () => process.exit() );

function do_one_file( fileName )
{
    const reader = fileName.toLowerCase().endsWith( 'csv' )
        ? read_csv( fileName )
        : xlsx_to_json( fileName, {
            sheet: args.hasOwnProperty( 'sheet' ) ? args.sheet : 0,
            keysRow: args.hasOwnProperty( 'headers' ) ? args.headers : 1
        } ).then( data => data.map( fix_entry ) );

    return reader
        .then( toSave => {
            let fi = path.parse( fileName );

            if ( args.verbose ) console.log( `Writing ${toSave.length} entries...` );

            if ( args.output )
                fs.writeFileSync( args.output, toSave );
            else if ( args.save )
                fs.writeFileSync( path.join( fi.dir, fi.name + '.json' ), toSave );
            else
                process.stdout.write( JSON.stringify( toSave, null, 4 ) );
        } )
        .catch( err => {
            console.error( err );
            process.exit( 1 );
        } );
}

function read_csv( fileName )
{
    return new Promise( ( resolve, reject ) => {
        fs.createReadStream( fileName ).pipe( csv( {}, ( err, recs ) => {
            if ( err ) reject( err );

            let keys = recs[ 0 ];
            resolve( recs.slice( 1 ).map( raw => {
                const no = {};

                keys.forEach( ( key, i ) => {
                    key = map_exclude( key );
                    if ( key ) no[ key ] = raw[ i ];
                } );

                return fix_entry( no );
            } ) );
        } ) );
    } );
}

function fix_entry( row )
{
    const
        str = s => typeof s === 'string',
        no = {};

    Object.keys( row ).forEach( key => {
        let v = row[ key ],
            nk = map_exclude( key[ 0 ].toLowerCase() + key.substr( 1 ) );

        if ( !nk ) return;

        if ( str( v ) && /^\d+$/.test( v ) )
            no[ nk ] = Number( v );
        else if ( str( v ) || v )
            no[ nk ] = v;
    } );

    return no;
}

function map_exclude( key )
{
    let k = key.toLowerCase();

    if ( exclude.indexOf( k ) !== -1 ) return void 0;

    return mapping[ k ] ? mapping[ k ] : key;
}