Functions to encode/decode text for use in HTML 4.01.
//encode reserved characters in a string for use in HTML
//if `keepValidEntities` is true, the amphersands for valid character entity references will not be encoded
function textToHTML(str, keepValidEntities){
"use strict";
var validEntityNames, rxp;
if(keepValidEntities){
//see http://www.w3.org/TR/html401/sgml/entities.html
validEntityNames = ""+
//markup-significant and internationalization characters
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+
//ISO 8859-1 characters
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+
"uacute|ucirc|uuml|yacute|thorn|yuml|"+
//symbols, mathematical symbols, and Greek letters
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+
"loz|spades|clubs|hearts|diams";
rxp = new RegExp("&(?!(?:#([0-9]+|[xX][a-fA-F0-9]+)|"+validEntityNames+");)", "g");
str = str.replace(rxp, "&"); //encode amphersands that are not part of a valid character entity reference
}
else{
str = str.replace(/&/g, "&"); //encode all amphersands
}
//encode the other markup-significant characters
return str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
}
//decode all HTML character entity references in the string (not just the reserved characters)
function HTMLToText(str){
"use strict";
var tmp;
tmp = document.createElement("div");
tmp.innerHTML = str.replace(/</g, "<").replace(/>/g, ">");
return tmp.firstChild.nodeValue;
}
//escapes the string for use as a JavaScript string value in embedded or inline code
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript
//usage example:
// document.body.innerHTML += ("<script>console.log(\"" + textToJavaScriptString(userInput) + "\")</script>";
function textToJavaScriptString(str){
"use strict";
str = str.replace(/\\/g, "\\\\");
//prevent "escape from the quote" attacks by escaping quotes and line feed characters
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022");
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); //tab, line feed, carriage return, next line
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); //line separator, paragraph separator
//prevent string from closing the tag
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E");
//these are escaped just in case ("defense-in-depth")
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D");
return str;
}