Shoora
10/10/2018 - 11:00 PM

Source listing for the blog post http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx

using System;
using System.Text.RegularExpressions;
using System.Reflection;
namespace RegexLibraryBuilder
{
    /// <summary>
    /// Summary description for Class1.
    /// </summary>
    class RegexBuilderMain
    {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main(string[] args)
        {
            //HtmlTagRegex.
            RegexCompilationInfo[] compInfo = 
            {
                //HtmlTag Regex.
                new RegexCompilationInfo
                (
                    @"<"
                    +    @"(?<endTag>/)?"    //Captures the / if this is an end tag.
                    +    @"(?<tagname>\w+)"    //Captures TagName
                    +    @"("                //Groups tag contents
                    +        @"(\s+"            //Groups attributes
                    +            @"(?<attName>\w+)"  //Attribute name
                    +            @"("                //groups =value portion.
                    +                @"\s*=\s*"            // = 
                    +                @"(?:"        //Groups attribute "value" portion.
                    +                    @"""(?<attVal>[^""]*)"""    // attVal='double quoted'
                    +                    @"|'(?<attVal>[^']*)'"        // attVal='single quoted'
                    +                    @"|(?<attVal>[^'"">\s]+)"    // attVal=urlnospaces
                    +                @")"
                    +            @")?"        //end optional att value portion.
                    +        @")+\s*"        //One or more attribute pairs
                    +        @"|\s*"            //Some white space.
                    +    @")"
                    + @"(?<completeTag>/)?>" //Captures the "/" if this is a complete tag.
                    , RegexOptions.IgnoreCase
                    , "HtmlTagRegex"
                    , "Haack.RegularExpressions"
                    , true
                )
                ,
                // Matches double words.
                new RegexCompilationInfo
                (
                    @"\b(\w+)\s+\1\b"
                    , RegexOptions.None
                    , "DoubleWordRegex"
                    , "Haack.RegularExpressions", true
                )
            };
            AssemblyName assemblyName = new AssemblyName();
            assemblyName.Name = "Haack.RegularExpressions";
            assemblyName.Version = new Version("1.0.0.0");
            Regex.CompileToAssembly(compInfo, assemblyName);
        }
    }
}