ericjarvies
2/10/2017 - 5:17 PM

Split & Merge JSON and NDJSON

Split & Merge JSON and NDJSON

// SPLITTING	node script-v7.js split "records_NDJSON.json" pid "json_files/"
// MERGING	node script-v7.js merge "json_files/" "records_merged.json" pid

var fs = require("fs");
var action = process.argv[2];

if(action == "split"){
  var filePath = process.argv[3];
  var key = process.argv[4];
  // Asynchronous read
  fs.readFile(filePath, function (err, data) {
    if (err) {
      return console.error(err);
    }
    var lines = data.toString().split("\n");
    // determine the input type
    var type = "ndjson";
    // Note: The comma at the end of the line is optional. I assume the format
    // is [{object}],\n[{object}],\n[{object}]\EOF
    if (lines[0].match(/\[[^\]]*\],?/)) {
      // it's the JSON-style format `[<json>],`
      type = "json";
    }
    var out = "";
    for (var i = 0; i < lines.length; i++) {
      if (lines[i].trim() == "") {
        continue;
      }
      var json;
      if (type == "ndjson"){
        json = JSON.parse(lines[i]);
      }
      else if (type == "json") {
        json = JSON.parse(lines[i].match(/\[([^\]]*)\],?/)[1]);
      }
      fs.appendFile(
        process.argv[5] + "/" + json[key] + ".json",
        JSON.stringify(json) + "\n",
        function(){}   // supresses warning
      );
    }
  });
}
else if (action == "merge") {
  var data;
  // get the desired output format from the user
  getFormat(function(format){
    if (Number(format) == 3 && process.argv.length < 6){
      console.log("You forgot to declare an index (e.g.- pid) at EOL, run script again.");
      process.exit();
    }
    var index = process.argv[5];
    var mergedString = "";
    var items = fs.readdirSync(process.argv[3]);
    for (var i = 0; i < items.length; i++) {
      if (items[i].endsWith(".json")){
        data = fs.readFileSync(process.argv[3] + '/' + items[i], "utf8");
        for (var a in data.toString().split("\n")) {
          var item = data.toString().split("\n")[a];
          if (item != ""){
            switch (Number(format)) {
              case 1:   // minified JSON
                mergedString = mergedString + "[" + item + "],\n";
                break;
              case 2:   // NDJSON
                mergedString += item + "\n";
                break;
              case 3:   // ESJSON
                mergedString += '{"index":{"_id":"' +
                                JSON.parse(item)[index] +
                                '"}}\n' +
                                item +
                                "\n";
                break;
              default:
                break;
            }
          }
        }
      }
    }
    var writeStream = fs.createWriteStream(process.argv[4]);
    writeStream.write(mergedString);
    writeStream.end();
    writeStream.on("finish", function(){
      process.exit();
    });
  });
}
else {
    console.log("Please provide a correct action");
}

// function to use recursion to simulate syncronous access to stdin/out
function getFormat(callback){
  process.stdout.write(
    "Select output format: 1:minified JSON, 2: NDJSON, 3:ESJSON: "
  );
  process.stdin.setEncoding('utf8');
  process.stdin.once('data', function(val){
    // check validity of input
    if (!isNaN(val) && 0 < Number(val) < 3){
      callback(val);
    }
    else {
      // if input is invalid, ask again
      getFormat(callback);
    }
  }).resume();
}
{"pid":"002-418-762","year_tax":2006,"value_land":908000,"value_imprv":380000,"value_total":1288000,"value_levy":7372.44}
{"pid":"002-418-762","year_tax":2007,"value_land":1225000,"value_imprv":380000,"value_total":1605000,"value_levy":7539.21}
{"pid":"002-418-762","year_tax":2008,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":7790.25}
{"pid":"002-418-762","year_tax":2009,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":8713.44}
{"pid":"002-418-762","year_tax":2010,"value_land":1673000,"value_imprv":439000,"value_total":2112000,"value_levy":9685.32}
{"pid":"002-418-762","year_tax":2011,"value_land":2045000,"value_imprv":477000,"value_total":2522000,"value_levy":10489.62}
{"pid":"002-418-762","year_tax":2012,"value_land":2735000,"value_imprv":509000,"value_total":3244000,"value_levy":11971.04}
{"pid":"002-418-762","year_tax":2013,"value_land":2823000,"value_imprv":493000,"value_total":3316000,"value_levy":12695.23}
{"pid":"002-418-762","year_tax":2014,"value_land":2716000,"value_imprv":477000,"value_total":3193000,"value_levy":13124.14}
{"pid":"002-418-762","year_tax":2015,"value_land":3155000,"value_imprv":480000,"value_total":3635000,"value_levy":14129}
{"pid":"002-429-993","year_tax":2006,"value_land":902000,"value_imprv":69500,"value_total":971500,"value_levy":5440.89}
{"pid":"002-429-993","year_tax":2007,"value_land":1095000,"value_imprv":69500,"value_total":1164500,"value_levy":5590.11}
{"pid":"002-429-993","year_tax":2008,"value_land":1220000,"value_imprv":71000,"value_total":1291000,"value_levy":5655.38}
{"pid":"002-429-993","year_tax":2009,"value_land":1220000,"value_imprv":71000,"value_total":1291000,"value_levy":6087.34}
{"pid":"002-429-993","year_tax":2010,"value_land":1344000,"value_imprv":72300,"value_total":1416300,"value_levy":6517.63}
{"pid":"002-429-993","year_tax":2011,"value_land":1613000,"value_imprv":83900,"value_total":1696900,"value_levy":7145.63}
{"pid":"002-429-993","year_tax":2012,"value_land":2206000,"value_imprv":58200,"value_total":2264200,"value_levy":8266.12}
{"pid":"002-429-993","year_tax":2013,"value_land":2200000,"value_imprv":47000,"value_total":2247000,"value_levy":8900.26}
{"pid":"002-429-993","year_tax":2014,"value_land":2231000,"value_imprv":44500,"value_total":2275500,"value_levy":9446.48}
{"pid":"002-429-993","year_tax":2015,"value_land":2464000,"value_imprv":65700,"value_total":2529700,"value_levy":10138.37}
{"pid":"002-435-373","year_tax":2006,"value_land":829000,"value_imprv":472000,"value_total":1301000,"value_levy":7557.19}
{"pid":"002-435-373","year_tax":2007,"value_land":1120000,"value_imprv":472000,"value_total":1592000,"value_levy":7493.19}
{"pid":"002-435-373","year_tax":2008,"value_land":1398000,"value_imprv":493000,"value_total":1891000,"value_levy":7644.62}
{"pid":"002-435-373","year_tax":2009,"value_land":1398000,"value_imprv":493000,"value_total":1891000,"value_levy":8386.26}
{"pid":"002-435-373","year_tax":2010,"value_land":1455000,"value_imprv":572000,"value_total":2027000,"value_levy":9257.2}
{"pid":"002-435-373","year_tax":2011,"value_land":1886000,"value_imprv":622000,"value_total":2508000,"value_levy":10154.92}
{"pid":"002-435-373","year_tax":2012,"value_land":2573000,"value_imprv":672000,"value_total":3245000,"value_levy":11742.53}
{"pid":"002-435-373","year_tax":2013,"value_land":2599000,"value_imprv":654000,"value_total":3253000,"value_levy":12498.69}
{"pid":"002-435-373","year_tax":2014,"value_land":2383000,"value_imprv":637000,"value_total":3020000,"value_levy":12736.11}
{"pid":"002-435-373","year_tax":2015,"value_land":2729000,"value_imprv":644000,"value_total":3373000,"value_levy":13103.73}
{"pid":"002-439-433","year_tax":2006,"value_land":1261000,"value_imprv":568000,"value_total":1829000,"value_levy":9975.21}
{"pid":"002-439-433","year_tax":2007,"value_land":1613000,"value_imprv":686000,"value_total":2299000,"value_levy":10491.28}
{"pid":"002-439-433","year_tax":2008,"value_land":1934000,"value_imprv":684000,"value_total":2618000,"value_levy":10584.75}
{"pid":"002-439-433","year_tax":2009,"value_land":1934000,"value_imprv":684000,"value_total":2618000,"value_levy":11418.81}
{"pid":"002-439-433","year_tax":2010,"value_land":1988000,"value_imprv":778000,"value_total":2766000,"value_levy":12401.6}
{"pid":"002-439-433","year_tax":2011,"value_land":2600000,"value_imprv":878000,"value_total":3478000,"value_levy":13732.3}
{"pid":"002-439-433","year_tax":2012,"value_land":3591000,"value_imprv":676000,"value_total":4267000,"value_levy":14841.07}
{"pid":"002-439-433","year_tax":2013,"value_land":3543000,"value_imprv":660000,"value_total":4203000,"value_levy":15923.23}
{"pid":"002-439-433","year_tax":2014,"value_land":3504000,"value_imprv":653000,"value_total":4157000,"value_levy":16589.67}
{"pid":"002-439-433","year_tax":2015,"value_land":3901000,"value_imprv":468000,"value_total":4369000,"value_levy":16640.94}
[{"pid":"002-418-762","year_tax":2006,"value_land":908000,"value_imprv":380000,"value_total":1288000,"value_levy":7372.44}],
[{"pid":"002-418-762","year_tax":2007,"value_land":1225000,"value_imprv":380000,"value_total":1605000,"value_levy":7539.21}],
[{"pid":"002-418-762","year_tax":2008,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":7790.25}],
[{"pid":"002-418-762","year_tax":2009,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":8713.44}],
[{"pid":"002-418-769","year_tax":2006,"value_land":908000,"value_imprv":380000,"value_total":1288000,"value_levy":7372.44}],
[{"pid":"002-418-769","year_tax":2007,"value_land":1225000,"value_imprv":380000,"value_total":1605000,"value_levy":7539.21}],
[{"pid":"002-418-769","year_tax":2008,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":7790.25}],
[{"pid":"002-418-900","year_tax":2009,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":8713.44}],
[{"pid":"002-418-900","year_tax":2006,"value_land":908000,"value_imprv":380000,"value_total":1288000,"value_levy":7372.44}],
[{"pid":"002-418-512","year_tax":2007,"value_land":1225000,"value_imprv":380000,"value_total":1605000,"value_levy":7539.21}],
[{"pid":"002-418-512","year_tax":2008,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":7790.25}],
[{"pid":"002-418-132","year_tax":2009,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":8713.44}],
[{"pid":"002-418-132","year_tax":2006,"value_land":908000,"value_imprv":380000,"value_total":1288000,"value_levy":7372.44}],
[{"pid":"002-418-777","year_tax":2007,"value_land":1225000,"value_imprv":380000,"value_total":1605000,"value_levy":7539.21}],
[{"pid":"002-418-777","year_tax":2008,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":7790.25}],
[{"pid":"002-418-666","year_tax":2009,"value_land":1590000,"value_imprv":375000,"value_total":1965000,"value_levy":8713.44}]