日期:2014-05-17 浏览次数:20588 次
int pageCount = 1; // 在抓取第一页时,填充这个变量 // 假设保存的页面为1.htm到20.htm for(int i=1;i<=pageCount;i++){ string url = "http://www.abc.com/a.aspx?page=" + i;// 循环20页,抓取20个html HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); HttpWebResponse response = request.GetResponse() as HttpWebResponse; Stream stream = response.GetResponseStream(); string html; using (StreamReader reader = new StreamReader(stream)) { html = reader.ReadToEnd(); } Regex reg = new Regex(@"a\.aspx\?page=(\d*)", RegexOptions.Compiled | RegexOptions.IgnoreCase); // 在抓取第一页时,填充页数变量 if(i == 1){ MatchCollection mc = reg.Matchs(html); if(mc.Count > 0){ int.TryParse(mc[mc.Count - 1].Result("$1"), out pageCount); } } // 替换抓取到的html里的a.aspx的链接,把类似a.aspx?page=分页的链接替换成"分页.htm" html = reg.Replace(html, "$1.htm"); // 保存抓取到的html到静态文件 using(StreamWriter sw = new StreamWriter(HttpContext.Current.Server.MapPath(i + ".htm"))) { sw.Write(html); } }
------解决方案--------------------