日期:2014-05-17 浏览次数:20404 次
/// <summary> /// 过滤掉字符串中的HTML,CSS代码 /// </summary> /// <param name="inStr">字符串</param> /// <returns></returns> public static string RemoveHTML(string inStr) { string strOutput = inStr; var scriptRegExp = new Regex("<scr" + "ipt[^>.]*>[\\s\\S]*?</sc" + "ript>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline & RegexOptions.ExplicitCapture); strOutput = scriptRegExp.Replace(strOutput, ""); var styleRegex = new Regex("<style[^>.]*>[\\s\\S]*?</style>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline & RegexOptions.ExplicitCapture); strOutput = styleRegex.Replace(strOutput, ""); var objRegExp = new Regex("<(.|\\n)+?>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline); strOutput = objRegExp.Replace(strOutput, ""); objRegExp = new Regex("<[^>]+>", RegexOptions.IgnoreCase & RegexOptions.Compiled & RegexOptions.Multiline); strOutput = objRegExp.Replace(strOutput, ""); strOutput = strOutput.Replace("<", "<"); strOutput = strOutput.Replace(">", ">"); // strOutput = strOutput.Replace(" ", " "); return strOutput.Trim(); }
------解决方案--------------------
public static string StripHTML(string strHtml) { string[] aryReg ={ @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n" }; string[] aryRep = { "", "", "", "\"", "&", "<", ">", " "