日期:2014-05-17 浏览次数:20537 次
/// <summary>
/// 过滤掉字符串中的HTML,CSS代码
/// </summary>
/// <param name="inStr">字符串</param>
/// <returns></returns>
public static string RemoveHTML(string inStr)
{
string strOutput = inStr;
var scriptRegExp = new Regex("<scr" + "ipt[^>.]*>[\\s\\S]*?</sc" + "ript>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline & RegexOptions.ExplicitCapture);
strOutput = scriptRegExp.Replace(strOutput, "");
var styleRegex = new Regex("<style[^>.]*>[\\s\\S]*?</style>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline & RegexOptions.ExplicitCapture);
strOutput = styleRegex.Replace(strOutput, "");
var objRegExp = new Regex("<(.|\\n)+?>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline);
strOutput = objRegExp.Replace(strOutput, "");
objRegExp = new Regex("<[^>]+>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline);
strOutput = objRegExp.Replace(strOutput, "");
strOutput = strOutput.Replace("<", "<");
strOutput = strOutput.Replace(">", ">");
//
strOutput = strOutput.Replace(" ", " ");
return strOutput.Trim();
}
------解决方案--------------------
public static string StripHTML(string strHtml)
{
string[] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string[] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" "