C#清除HTML标签方法
阅读原文时间:2023年07月08日阅读:1

删除字符串中HTML标签代码

public static string ClearHTMLTags1(string HTML)

    {  
        string\[\] Regexs ={  
                    @"<script\[^>\]\*?>.\*?</script>",  
                    @"<(\\/\\s\*)?!?((\\w+:)?\\w+)(\\w+(\\s\*=?\\s\*((\[""'\])(\\\\\[""'tbnr\]|\[^\\7\])\*?\\7|\\w+)|.{0})|\\s)\*?(\\/\\s\*)?>",  
                    @"(\[\\r\\n\])\[\\s\]+",  
                    @"&(quot|#34);",  
                    @"&(amp|#38);",  
                    @"&(lt|#60);",  
                    @"&(gt|#62);",  
                    @"&(nbsp|#160);",  
                    @"&(iexcl|#161);",  
                    @"&(cent|#162);",  
                    @"&(pound|#163);",  
                    @"&(copy|#169);",  
                    @"&#(\\d+);",  
                    @"-->",  
                    @"<!--.\*\\n",  
    };

        string\[\] Replaces ={  
                        "",  
                        "",  
                        "",  
                        "\\"",  
                        "&",  
                        "<",  
                        ">",  
                        " ",  
                        "\\xa1", //chr(161),  
                        "\\xa2", //chr(162),  
                        "\\xa3", //chr(163),  
                        "\\xa9", //chr(169),  
                        "",  
                        "\\r\\n",  
                        "",  
                        ""  
    };

        string s = HTML;  
        for (int i = 0; i < Regexs.Length; i++)  
        {  
            s = new Regex(Regexs\[i\], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces\[i\]);  
        }  
        s.Replace("<", "");  
        s.Replace(">", "");  
        s.Replace("\\r\\n", "");  
        return s;  
    }

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章