日期:2014-05-17 浏览次数:20523 次
private ArrayList GrabUrl(string url,String rule1,string rule2)
{ string pat = @"\w*<ul\sid=""list1""\sclass=""news_list"">[\s\S]*<ul\sid=""list2\w*";
//string pat = rule1;
Response.Write("<textarea rows='3' cols='100'>" +rule2+ "</textarea></br>");
string functionReturnValue = null;
WebClient wc = new WebClient();
try
{
Stream s = wc.OpenRead("http://www.ithome.com");
StreamReader sr = new StreamReader(s, System.Text.Encoding.GetEncoding("GB2312"));
functionReturnValue = sr.ReadToEnd();
s.Close();
wc.Dispose();
}
catch (Exception ex)
{
functionReturnValue = ex.Message;
}
Regex reg = new Regex(pat, RegexOptions.IgnoreCase);//查找所需新闻内容的链接的区块
Match news = reg.Match(functionReturnValue);
Regex regex = new Regex(rule2);//过滤出新闻链接
ArrayList listNew = new ArrayList();
Match match = regex.Match(news.Value);
while (match.Success)
{
//Response.Write(match.Value+"</br>");
//String str_href =match.Value;
listNew.Add(match.Value);
//href_str = str_href + "|";
match=match.NextMatch();
}
return listNew;
}