日期:2014-05-19  浏览次数:20488 次

******高手帮忙看看,抓取网页内容时,抓取不到,是不是给限制抓取了呢?******
//抓取网页
                public   string   GetPageContent()
                {
                        string   ResponseText   =   String.Empty;
                        WebRequest   request   =   WebRequest.Create(this.url);
                        request.Headers.Keys.ToString();
                        System.Net.WebResponse   response   =   request.GetResponse();

                        //返回信息
                        Stream   resStream   =   response.GetResponseStream();
                        StreamReader   sr   =   new   StreamReader(resStream,   MyEncoding);
                        ResponseText   =   sr.ReadToEnd();
                        resStream.Close();
                        sr.Close();

                        return   ResponseText;
                }

我用这个过程抓取www.soso.com的内容,它的歌曲,图片都可以抓取,但是网页却抓取不到,可见不是这个函数的问题,那是哪里的问题呢?是不是他们判断头来源了?发现来源不对就重定向到首页了?

你们遇到过这样问题吗?怎么解决?先谢谢各位拉

------解决方案--------------------
protected void Page_Load(object sender, EventArgs e)
{
Response.Write(this.GetPageContent());
}
private string url = "http://www.soso.com/q?pid=s.w.res.input&gid=&cin=&sc=web&bs=dsfs&ch=w.uf&w=web&lr= ";
public string GetPageContent()
{
string ResponseText = String.Empty;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(this.url);
request.UserAgent = Request.UserAgent;

System.Net.WebResponse response = request.GetResponse();

//返回信息
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ResponseText = sr.ReadToEnd();
resStream.Close();
sr.Close();

return ResponseText;
}