avinasha
7/29/2011 - 6:40 AM

A Sanitize gem transformer which sanitizes any CSS in a HTML document.

A Sanitize gem transformer which sanitizes any CSS in a HTML document.

check_css = lambda { |env|
      node      = env[:node]
      node_name = env[:node_name]
      # Don't continue if this node is already whitelisted or is not an element.
      return if env[:is_whitelisted] || !node.element?
      parent = node.parent
      return unless node_name == 'style' || node['style']      
      if node_name == 'style'
        unless good_css? node.content
          node.unlink 
          return
        end
      else
        unless good_css? node['style']
          node.unlink
          return
        end  
      end
      {:node_whitelist => [node]}  
    }


def good_css? text
    return false if text =~ /(\w\/\/)/    # a// comment immediately following a letter
    return false if text =~ /(\w\/\/*\*)/ # a/* comment immediately following a letter
    return false if text =~ /(\/\*\/)/            # /*/ --> hack attempt, IMO

    # Now, strip out any comments, and do some parsing.
    no_comments = text.gsub(/(\/\*.*?\*\/)/, "") # filter out any /* ... */
    no_comments.gsub!("\n", "")
    # No backslashes allowed
    evil = [
      /(\bdata:\b|eval|cookie|\bwindow\b|\bparent\b|\bthis\b)/i, # suspicious javascript-type words
      /behaviou?r|expression|moz-binding|@import|@charset|(java|vb)?script|[\<]|\\\w/i,
      /[\<>]/, # back slash, html tags,
      /[\x7f-\xff]/, # high bytes -- suspect
      /[\x00-\x08\x0B\x0C\x0E-\x1F]/, #low bytes -- suspect
      /&\#/, # bad charset
    ]
    evil.each { |regex| return false if no_comments =~ regex }
    true
  end