日期:2014-05-18 浏览次数:21019 次
string sourcestr = "<span style="border:1px solid red">江,苏<font color="red" size="5">省、南</font>京市</span> " Regex regx = new Regex(@"[\u4e00-\u9fa5]+"); List<string> urllist = new List<string>(); string emp = ""; MatchCollection mcurl = regx.Matches(sourcestr); foreach (Match sk in mcurl) { emp += sk.Groups[0].Value }
public static string ClearMarket(string oldstring) { string str = Regex.Replace(oldstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"-->", "", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"<!--.*", "", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); str = Regex.Replace(str, @"&#(\d+);", "", RegexOptions.IgnoreCase); str.Replace("<", ""); str.Replace(">", ""); str.Replace("\r\n", ""); str = HttpContext.Current.Server.HtmlEncode(str).Trim(); return str; }
------解决方案--------------------
str=Regex.Replace(str,"<[^<>]+?>","");
------解决方案--------------------
一段vb的
Function replHTML(strng)
Dim reg
Set reg = new RegExp
reg.Pattern = " <[^> ]+> "
reg.Global = True
replHTML = reg.Replace(strng, " ")
Set reg = Nothing
End Function
改改就可以用,思想是去掉所有的<[^> ]+>,也就是html标签