删除字符串中HTML标签代码
public static string ClearHTMLTags1(string HTML)
{
string\[\] Regexs ={
@"<script\[^>\]\*?>.\*?</script>",
@"<(\\/\\s\*)?!?((\\w+:)?\\w+)(\\w+(\\s\*=?\\s\*((\[""'\])(\\\\\[""'tbnr\]|\[^\\7\])\*?\\7|\\w+)|.{0})|\\s)\*?(\\/\\s\*)?>",
@"(\[\\r\\n\])\[\\s\]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\\d+);",
@"-->",
@"<!--.\*\\n",
};
string\[\] Replaces ={
"",
"",
"",
"\\"",
"&",
"<",
">",
" ",
"\\xa1", //chr(161),
"\\xa2", //chr(162),
"\\xa3", //chr(163),
"\\xa9", //chr(169),
"",
"\\r\\n",
"",
""
};
string s = HTML;
for (int i = 0; i < Regexs.Length; i++)
{
s = new Regex(Regexs\[i\], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces\[i\]);
}
s.Replace("<", "");
s.Replace(">", "");
s.Replace("\\r\\n", "");
return s;
}
手机扫一扫
移动阅读更方便
你可能感兴趣的文章