andy-h
3/22/2017 - 6:54 PM

Strip HTML tags from selected cells

Strip HTML tags from selected cells

'requires reference to Microsoft VBScript Regular Expressions 5.5

Sub stripHTML()
    
    Dim cell As Object
    
    For Each cell In Selection
        If Not (IsEmpty(cell) Or IsNumeric(cell) Or cell.HasFormula) Then
            On Error Resume Next
            cell.Value = doStripHTML(cell.Formula)
            If Err > 0 Then
                cell.Value = doStripHTML(cell.Value)
            End If
            On Error GoTo 0
        End If
    Next
    
End Sub

Private Function doStripHTML(strHTML) As String
'Strips the HTML tags from strHTML
    
    Dim rxp
    Set rxp = New RegExp
    
    rxp.IgnoreCase = True
    rxp.Global = True
    rxp.Pattern = "</?[a-z]+([^<]|\n)*?>|<!--(.|\n)*?-->"
    
    'Replace all HTML tag matches with the empty string
    strHTML = rxp.Replace(strHTML, "")
    
    strHTML = Replace(strHTML, "&lt;", "<")
    strHTML = Replace(strHTML, "&gt;", ">")
    strHTML = Replace(strHTML, "&quot;", """")
    strHTML = Replace(strHTML, "&amp;", "&")
    
    rxp.Pattern = "(?:[\s\xA0\n]|&nbsp;)+"
    strHTML = rxp.Replace(strHTML, " ")
    strHTML = Trim(strHTML)
    
    If Left(strHTML, 1) = "=" Then strHTML = "'" & strHTML
    
    doStripHTML = strHTML
    
End Function