日期:2014-05-17  浏览次数:21023 次

如何抓取查询结果中的列表信息
本帖最后由 chenxi0516 于 2012-11-28 11:05:38 编辑 如何抓取 http://search.anccnet.com/searchResult.aspx?keyword=%u9171%u6cb9  类似这个商品列表信息,存在到数据库中。

给个思路,有参考程序的最好了,谢谢。。。没写过类似这样的程序,无从下手。。

在线等。。。。
------最佳解决方案--------------------
		DataTable dt = new DataTable();
dt.Columns.Add("商品条码", typeof(string));
dt.Columns.Add("名称", typeof(string));
dt.Columns.Add("规格", typeof(string));
dt.Columns.Add("描述", typeof(string));
dt.Columns.Add("商标", typeof(string));
dt.Columns.Add("发布厂家", typeof(string));

Encoding encoding = Encoding.GetEncoding("gb2312");
string keyword = HttpUtility.UrlEncode("麻油", encoding);
string url = @"http://search.anccnet.com/searchResult.aspx?keyword=" + keyword;
HtmlWeb htmlWeb = new HtmlWeb();
htmlWeb.OverrideEncoding = encoding;
htmlWeb.PreRequest = x =>
{
byte[] bytes = encoding.GetBytes("/wEPDwUKMTA5MTc1OTY3Mg9kFgICAQ9kFgYCAw8PFgIeBFRleHQFYjxmb250IGNvbG9yPSdibGFjayc+57O757uf5Lit56ym5ZCI5p2h5Lu2PC9mb250PiDphbHmsrk8Zm9udCBjb2xvcj0nYmxhY2snPiDnmoTllYblk4HmnInvvJo8L2ZvbnQ+ZGQCBw8PFgQeC1JlY29yZGNvdW50AowRHg5DdXN0b21JbmZvVGV4dAWTAeaAu+iusOW9leaVsO+8mjxmb250IGNvbG9yPSJibHVlIj48Yj4yMTg4PC9iPjwvZm9udD4g5oC76aG15pWw77yaPGZvbnQgY29sb3I9ImJsdWUiPjxiPjE0NjwvYj48L2ZvbnQ+IOW9k+WJjemhte+8mjxmb250IGNvbG9yPSJyZWQiPjxiPjE8L2I+PC9mb250PmRkAgkPFgIeB1Zpc2libGVoZGRegsQ/9sftuonRL+jiHPQitwQcKg==");
x.ContentLength = bytes.Length;
x.ContentType = "application/x-www-form-urlencoded";
using (Stream stream = x.GetRequestStream())
stream.Write(bytes, 0, bytes.Length);
return true;
};
HtmlDocument htmlDoc = htmlWeb.Load(url, "POST");
int pageCount = int.Parse(htmlDoc.DocumentNode.SelectSingleNode("//table[@id='myPager']/tr[1]/td[1]/table/tr/td/font[2]").InnerText);
for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++)
{
htmlWeb.PreRequest = x =>
{
byte[] bytes = encoding.GetBytes("__VIEWSTATE=%2FwEPDwUKMTA5MTc1OTY3Mg9kFgICAQ9kFgYCAw8PFgIeBFRleHQFYjxmb250IGNvbG9yPSdibGFjayc%2B57O757uf5Lit56ym5ZCI5p2h5Lu2PC9mb250PiDphbHmsrk8Zm9udCBjb2xvcj0nYmxhY2snPiDnmoTllYblk4HmnInvvJo8L2ZvbnQ%2BZGQCBw8PFgYeEEN1cnJlbnRQYWdlSW5kZXgCAh4OQ3VzdG9tSW5mb1RleHQFkwHmgLvorrDlvZXmlbDvvJo8Zm9udCBjb2xvcj0iYmx1ZSI%2BPGI%2BMjE4ODwvYj48L2ZvbnQ%2BIOaAu%2BmhteaVsO%2B8mjxmb250IGNvbG9yPSJibHVlIj48Yj4xNDY8L2I%2BPC9mb250PiDlvZPliY3pobXvvJo8Zm9udCBjb2xvcj0icmVkIj48Yj4yPC9iPjwvZm9udD4eC1JlY29yZGNvdW50AowRZGQCCQ8WAh4HVmlzaWJsZWhkZLozrKPv3rWTss7F9rFI0qlAJrVU&__EVENTTARGET=myPager&__EVENTARGUMENT=" + pageIndex + "&keyword=");
x.ContentLength = bytes.Length;
x.ContentType = "application/x-www-form-urlencoded";
using (Stream stream = x.GetRequestStream())
stream.Write(bytes, 0, bytes.Length);
return true;
};
htmlDoc = htmlWeb.Load(url, "POST");
HtmlNodeCollection dls = htmlDoc.DocumentNode.SelectNodes(@"//dl[@class='p-supplier' or @class='p-info']");
for (int i = 0; i < dls.Count; i = i + 2)
{
DataRow row = dt.NewRow();
row["商标"] = dls[i].SelectSingleNode(@"dd[1]").InnerText.Trim();
row["发布厂家"] = dls[i].SelectSingleNode(@"dd[2]/a").InnerText.Trim();