andy-h
3/25/2015 - 1:07 PM

Form to escape/unescape HTML

Form to escape/unescape HTML

window.addEventListener("load", init, false);
function init()
{
	var encode = document.getElementById("encode");
	var decode = document.getElementById("decode");
	var clearText = document.getElementById("clearText");
	var clearHTML = document.getElementById("clearHTML");
	encode.addEventListener("click", convert, false);
	decode.addEventListener("click", convert, false);
	clearText.addEventListener("click", clear, false);
	clearHTML.addEventListener("click", clear, false);
}
function clear(evt){
	var tb = document.getElementById(evt.target.id=="clearText"?"text":"html");
	tb.value = "";
	tb.focus();
}
function convert(evt)
{
	var target, html, text;
	evt = evt || window.event;
	target = evt.target || evt.srcElement;
	html = document.getElementById("html");
	text = document.getElementById("text");
	
	if(target.id == "encode"){
		html.value = toHTML(document.getElementById("text").value);
		html.select();
	}
	else{
		text.value = toText(document.getElementById("html").value);
		text.select();
	}
}
function decToUni(num)
{
	num = (1*num).toString(16);
	while(num.length < 4){ num = "0"+num; }
	return "\\u"+num;
}
function toText(str)
{
	var i;
	
	str = str.replace(/([\\"'])/g, "\\$1");
	
	//hex to unicode
	str = str.replace(/&#x0*([1-9A-F]);/gi, "\\u000$1");
	str = str.replace(/&#x0*([1-9A-F][0-9A-F]);/gi, "\\u00$1");
	str = str.replace(/&#x0*([1-9A-F][0-9A-F]{2});/gi, "\\u0$1");
	str = str.replace(/&#x0*([1-9A-F][0-9A-F]{3});/gi, "\\u$1");
	
	//decimal to unicode
	var m = str.match(/&#0*[1-9]\d{0,3};/g);
	if(m)
	{
		var d, u, ds = [], r;
		for(i=0; i<m.length; i++)
		{
			d = m[i].replace(/&#0*([1-9]\d*);/, "$1");
			u = decToUni(d);
			if(!ds[d])
			{
				ds[d] = true;
				r = new RegExp("&#0*"+d+";", "g");
				str = str.replace(r, u);
			}
		}
	}
	
	//HTML entities - see http://www.w3.org/TR/html4/sgml/entities.html
	str = str.replace(/&lt;/g,"<").replace(/&gt;/g,">").replace(/&quot;/g,'\\"');
	var rxp;
	var entities = [
		//ISO 8859-1 characters (160-255)
		"nbsp","iexcl","cent","pound","curren","yen","brvbar","sect","uml","copy","ordf","laquo","not","shy","reg","macr",
		"deg","plusmn","sup2","sup3","acute","micro","para","middot","cedil","sup1","ordm","raquo","frac14","frac12","frac34","iquest",
		"Agrave","Aacute","Acirc","Atilde","Auml","Aring","AElig","Ccedil","Egrave","Eacute","Ecirc","Euml","Igrave","Iacute","Icirc","Iuml",
		"ETH","Ntilde","Ograve","Oacute","Ocirc","Otilde","Ouml","times","Oslash","Ugrave","Uacute","Ucirc","Uuml","Yacute","THORN","szlig",
		"agrave","aacute","acirc","atilde","auml","aring","aelig","ccedil","egrave","eacute","ecirc","euml","igrave","iacute","icirc","iuml",
		"eth","ntilde","ograve","oacute","ocirc","otilde","ouml","divide","oslash","ugrave","uacute","ucirc","uuml","yacute","thorn","yuml"
		];
	for(i=0; i<entities.length; i++)
	{
		rxp = new RegExp("&"+entities[i]+";", "g");
		str = str.replace(rxp, decToUni(160+i));
	}
	entities = [
		//symbols, mathematical symbols, and Greek letters
		402,"fnof",
		913,"Alpha",914,"Beta",915,"Gamma",916,"Delta",917,"Epsilon",918,"Zeta",919,"Eta",920,"Theta",921,"Iota",922,"Kappa",923,"Lambda",924,"Mu",925,"Nu",926,"Xi",927,"Omicron",928,"Pi",929,"Rho",931,"Sigma",932,"Tau",933,"Upsilon",934,"Phi",935,"Chi",936,"Psi",937,"Omega",
		945,"alpha",946,"beta",947,"gamma",948,"delta",949,"epsilon",950,"zeta",951,"eta",952,"theta",953,"iota",954,"kappa",955,"lambda",956,"mu",957,"nu",958,"xi",959,"omicron",960,"pi",961,"rho",962,"sigmaf",963,"sigma",964,"tau",965,"upsilon",966,"phi",967,"chi",968,"psi",969,"omega",
		977,"thetasym",978,"upsih",982,"piv",
		8226,"bull",8230,"hellip",8242,"prime",8243,"Prime",8254,"oline",8260,"frasl",8472,"weierp",8465,"image",8476,"real",8482,"trade",8501,"alefsym",
		8592,"larr",8593,"uarr",8594,"rarr",8595,"darr",8596,"harr",8629,"crarr",8656,"lArr",8657,"uArr",8658,"rArr",8659,"dArr",8660,"hArr",
		8704,"forall",8706,"part",8707,"exist",8709,"empty",8711,"nabla",8712,"isin",8713,"notin",8715,"ni",8719,"prod",8721,"sum",8722,"minus",8727,"lowast",8730,"radic",8733,"prop",8734,"infin",8736,"ang",8743,"and",8744,"or",8745,"cap",8746,"cup",8747,"int",8756,"there4",8764,"sim",
		8773,"cong",8776,"asymp",8800,"ne",8801,"equiv",8804,"le",8805,"ge",8834,"sub",8835,"sup",8836,"nsub",8838,"sube",8839,"supe",8853,"oplus",8855,"otimes",8869,"perp",8901,"sdot",8968,"lceil",8969,"rceil",8970,"lfloor",8971,"rfloor",9001,"lang",9002,"rang",9674,"loz",
		9824,"spades",9827,"clubs",9829,"hearts",9830,"diams",
		//markup-significant and internationalization characters
		338,"OElig",339,"oelig",352,"Scaron",353,"scaron",376,"Yuml",710,"circ",732,"tilde",
		8194,"ensp",8195,"emsp",8201,"thinsp",8204,"zwnj",8205,"zwj",8206,"lrm",8207,"rlm",8211,"ndash",8212,"mdash",8216,"lsquo",8217,"rsquo",8218,"sbquo",8220,"ldquo",8221,"rdquo",8222,"bdquo",8224,"dagger",8225,"Dagger",8240,"permil",8249,"lsaquo",8250,"rsaquo",8364,"euro"
		];
	for(i=0; i<entities.length; i=i+2)
	{
		rxp = new RegExp("&"+entities[i+1]+";", "g");
		str = str.replace(rxp, decToUni(entities[i]));
	}
	str = str.replace(/&amp;/g,"&");
	
	return eval('"'+str.replace(/\n/g,'\\n"+"')+'"');
}
function toHTML(str)
{
	return str.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/'/g,"&#39;");
}
<!DOCTYPE html>

<html>
<head>
	
	<meta charset="utf-8">
	
	<title>Encoder</title>
	
	<script type="text/javascript" src="encoder.js"></script>
	
</head>
<body>
	
	Text<br>
	<textarea id="text" style="width:30em; height:7.5em;"></textarea>
	<div style="display:inline-block;">
	<input type="button" id="encode" value="Encode"><br>
	<input type="button" id="clearText" value="Clear">
	</div><br>
	<br>
	Encoded HTML<br>
	<textarea id="html" style="width:30em; height:7.5em;"></textarea>
	<div style="display:inline-block;">
	<input type="button" id="decode" value="Decode"><br>
	<input type="button" id="clearHTML" value="Clear">
	</div><br>
	
</body>
</html>