怎么设置HttpWebRequest才能正确获取"http://alexa.chinaz.com/Alexa_More.asp?Domain=csdn.net"这个网页
在浏览器中输入网址可以正常显示
下面的代码返回的网页是网站的默认网页
我觉得应该是服务器进行了设置
请问怎样设置才能让httpwebrequest模拟浏览器发送请求?
------------------------------------------------
HttpWebResponse webrep;
StreamReader strrd;
string content;
HttpWebRequest webR = (HttpWebRequest)WebRequest.Create(" http://alexa.chinaz.com/Alexa_More.asp?Domain=csdn.net");
webR.AllowAutoRedirect = false;
CookieContainer cc = new CookieContainer();
CookieCollection cookies = new CookieCollection();
webR.CookieContainer = cc;
webR.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727)";
webR.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, */*";
webR.Method = "GET";
if (webProxyURL != null)
{
WebProxy proxyObject = new WebProxy(webProxyURL, true); //设置web代理
webR.Proxy = proxyObject;
}
webR.KeepAlive = true ;
webrep = (HttpWebResponse)webR.GetResponse();
strrd = new StreamReader(webrep.GetResponseStream(), Encoding.Default);
content = strrd.ReadToEnd(); //读取抓取的网页内容
------解决方案--------------------
你清空Cookie之后,在浏览器里面直接打开你的地址也是一样的,得到的也是114msn的那个。
------解决方案--------------------
的确是cookies的原因:
CookieContainer CC = Hello.Get.GetCookies("http://alexa.chinaz.com/Alexa_More.asp", null);
string html = Hello.Get.Html("http://alexa.chinaz.com/Alexa_More.asp?domain=csdn.net",null,CC, "GET");
得到正确的HTML
代码如下:
public static CookieContainer GetCookies(string link, string encoding)
{
if (encoding == null)
{
encoding = "gb2312";
}
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(link);
request.Method = "GET";
request.UserAgent = userAgent;
request.ContentType = contentType;
CookieContainer CC = new CookieContainer();
request.CookieContainer = CC;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
response.Close();
return CC;
}
public static string Html(string strLink, string encoding,CookieContainer CC, string method)
{
if (encoding == null)
{
encoding = "gb2312";
}
if (method == null)
{
method = "POST";
}
try
{
string html;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strLink);
request.UserAgent = userAgent;
request.CookieContainer = CC;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
CC.Add(response.Cookies);
using (Stream sm = response.GetResponseStream())
{
using (StreamReader sr = new StreamReader(sm, Encoding.GetEncoding(encoding)))
{
html = sr.ReadToEnd();
sr.Close();