日期:2014-05-18  浏览次数:20809 次

.net中怎样抓取其他网页的内容
就是我想获得别人网页上的数据,比如说一篇新闻报道。我在百度上查了好久,查出来的都不对,希望高手赐教。最好有事例

------解决方案--------------------
HttpWebRequest
得到源代码进行分析即可,

------解决方案--------------------
C# code
//定义对象
            HttpWebRequest _request = null;
            HttpWebResponse _response = null;

            try
            {
                //创建web请求类        
                _request = (HttpWebRequest)WebRequest.Create(_destUrl);

                //取得响对象
                _response = (HttpWebResponse)_request.GetResponse();

                //获取数据
                _resultStr = ConvertResponseToString(_response);
            }
            catch (WebException ex)
            {
                //获取响应对象
                _response = ex.Response as HttpWebResponse;

                //处理错误
                if (_response.StatusCode == HttpStatusCode.InternalServerError)
                {
                    
                }
                else
                {
                    _resultStr = ex.Message;
                }
            }
            finally
            {
                if (_response != null) _response.Close();
            }

------解决方案--------------------
C# code

/// <summary>
        /// 获取指定页面的源代码
        /// </summary>
        /// <param name="PageURL"></param>
        /// <returns></returns>
        public String GetPageCode(string PageURL)
        {
            string Charset = "gb2312";
            try
            {
                //存放目标网页的html
                String strHtml = "";
                //连接到目标网页
                HttpWebRequest wreq = (HttpWebRequest)WebRequest.Create(PageURL);
                wreq.Headers.Add("X_FORWARDED_FOR", "101.0.0.11"); //发送X_FORWARDED_FOR头(若是用取源IP的方式,可以用这个来造假IP,对日志的记录无效)  

                wreq.Method = "Get";
                wreq.KeepAlive = true;
                wreq.ContentType = "application/x-www-form-urlencoded";
                wreq.AllowAutoRedirect = true;
                wreq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
                wreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)";

                CookieContainer cookieCon = new CookieContainer();
                wreq.CookieContainer = cookieCon;

                HttpWebResponse wresp = null;
                try
                {
                    wresp = (HttpWebResponse)wreq.GetResponse();
                }
                catch (WebException ex)
                {
                    wresp = (HttpWebResponse)ex.Response;
                }

                //采用流读取,并确定编码方式
                Stream s = wresp.GetResponseStream();
                StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding(Charset));

                string strLine = "";
                //读取
                while (strLine != null)
                {
                    strLine = objReader.ReadLine();
                    if (strLine != null)
                    {
                        strHtml += strLine.Trim();
                    }
                }
                strHtml = strHtml.Replace("<br />", "\r\n");

                return strHtml;
            }
            catch (Exception n) //遇到错误,打印错误
            {
                return n.Message;
            }
        }

------解决方案--------------------
莫非小弟面试的工作是做爬虫!

------解决方案--------------------
探讨

qxyywy