rimmer333
2/27/2015 - 3:01 PM

Lightweight client side HTML whitelisting in JavaScript. By no means this is a strong security measure, I use it to simply clean up the (ugl

Lightweight client side HTML whitelisting in JavaScript. By no means this is a strong security measure, I use it to simply clean up the (ugly) code from many WYSIWYGs out there.

var whiteListHTML = function(content, whiteList) {
	var sanitizeInPlace = function(DOMElement, keepTop) {
		var allowed, item, i;

		for (i = DOMElement.children.length-1; i >= 0 ; i--) {
			item = DOMElement.children[i];
			sanitizeInPlace(item, false);
		}

		allowed = keepTop || whiteList[DOMElement.localName];
		if (!allowed) {
			for (i = 0; i < DOMElement.childNodes.length; i++) {
				item = DOMElement.childNodes[i].cloneNode(true);
				DOMElement.parentNode.insertBefore(item, DOMElement);
			}
			DOMElement.parentNode.removeChild(DOMElement);

		} else if (DOMElement.hasAttributes()) {
			for (i = DOMElement.attributes.length-1; i >= 0 ; i--) {
				item = DOMElement.attributes[i];
				if (!(allowed[item.localName] && (item.value.search(allowed[item.localName]) > -1))) {
					DOMElement.removeAttribute(item.localName);
				}
			}
		}
	};

	var sandbox = document.implementation.createHTMLDocument();
	sandbox.body.innerHTML = content;

	sanitizeInPlace(sandbox.body, true);

	content = sandbox.body.innerHTML;
	return content;
};

content = whiteListHTML(content, {
  // this is the whitelist itself.
	"b": {},
	// use allowed tag names as keys
	"i": {},
	"p": {},
	"br": {},
	"ul": {},
	"ol": {},
	"li": {},
	"hr": {},
	"a": {
    // to allow some attributes, fill in the value object:
		// again, keys are attribute names;
		// values are regexps to validate the attribute values against
		
		"href": /^(https?\:)?\/\//
		// in this case we only allow links with absolute href to http or https 
		// (hence href="javascript:..." will be thrown away).
		
		// If you leave this object empty (see above), 
		// no attributes are allowed at all.
	}
});