indexzero
6/10/2010 - 1:57 AM

gistfile1.txt

var html = "<div><div>foo</div></div>"

var split = html.split(/(<[^<\>]*>)/);

var jup = "";

// Simple state machine:
// 'start'  : Last Saw Start Tag
// 'content': Last Saw Content
// 'end'    : Last Saw End
// 'unit'   : Unit Tag  
var state = "start";

var validators = [
  function(str) {
    return str.indexOf("/>") !== -1 ? "unit" : "continue";
  },
  function(str) {
    return str.indexOf("</") !== -1 ? "end" : "continue";
  },
  function(str) {
    return str.indexOf("<") !== -1 ? "start" : "continue";
  },
  function(str) {
    return "content";
  }
];

for(slice in split) {
  if(split[slice] && split[slice].length > 0) {
    var tagType = "continue";
    for(i=0;tagType == "continue";i++) {
      tagType = validators[i](split[slice]);
    }
    switch(tagType) {
      case "unit":
        jup += "[\"" + split[slice].replace("<","").replace("/>","") + "\"],";
        break;
      case "start":
        jup += "[\"" + split[slice].replace(/(<|>)/g, "") + "\",";
        break;
      case "content":
        jup += "\"" + split[slice] + "\"";
        break;
      case "end":
        jup += "]";
        break;
    }

    state = tagType;
  }
}

console.log(jup);