日期:2014-05-18 浏览次数:20889 次
<ul class="e2"> <li> <a href='#' class='preview'><img src='#'/></a> <a href="#" class="title">标题1</a> <span class="info"> <small>日期:</small>2012-04-04 18:35:11 <small>点击:</small>129 <small>好评:</small>5 </span> <p class="intro"> 内容1</p> </li><li> <a href='#' class='preview'><img src='#'/></a> <b>[<a href='#'>类别</a>]</b> <a href="#" class="title">标题2</a> <span class="info"> <small>日期:</small>2012-04-04 12:56:34 <small>点击:</small>119 <small>好评:</small>4 </span> <p class="intro"> 内容2</p> </li> <li> <a href='#' class='preview'><img src='#'/></a> <a href="#" class="title">标题3</a> <span class="info"> <small>日期:</small>2012-04-04 18:35:11 <small>点击:</small>129 <small>好评:</small>5 </span> <p class="intro"> 内容3</p> </li><li> <a href='#' class='preview'><img src='#'/></a> <b>[<a href='#'>类别2</a>]</b> <a href="#" class="title">标题4</a> <span class="info"> <small>日期:</small>2012-04-04 12:56:34 <small>点击:</small>119 <small>好评:</small>4 </span> <p class="intro"> 内容4</p> </li> </ul>
string s = File.ReadAllText(Server.MapPath("~/test.txt")); MatchCollection matches = Regex.Matches(s, @"(?is)(?<=<ul class=""e2"">.*?)(?:<b>\[<a href='#'>(?<类别>.*?)</a>\]</b>)?\s*<a href=""(?<href>.+?)"" class=""title"">(?<title>.+?)</a>(?=.*?</ul>)"); foreach (Match match in matches) { Response.Write(match.Groups["类别"].Value + "<br/>"); Response.Write(match.Groups["href"].Value + "<br/>"); Response.Write(match.Groups["title"].Value + "<br/></br>"); }
------解决方案--------------------
void Main() { var html=File.ReadAllText("c:\\csdn.txt"); foreach(Match m in Regex.Matches(html,@"(?is)<li>.*?(\[<a[^>]*?>(?<type>[^<>]+)</a>\].*?)?<a[^>]*?class=(['""]?)title\2>(?<title>[^<>]+)</a>.*?<p[^>]*?class=(['""]?)intro\3>(?<content>[^<>]+)</p>.*?</li>")) { Console.WriteLine("类别:{0}\t标题:{1}\t内容:{2}",m.Groups["type"].Value,m.Groups["title"].Value,m.Groups["content"].Value); } /* 类别: 标题:标题1 内容: 内容1 类别:类别 标题:标题2 内容: 内容2 类别: 标题:标题3 内容: 内容3 类别:类别2 标题:标题4 内容: 内容4 */ }
------解决方案--------------------