日期:2014-05-17  浏览次数:20672 次

远程抓获动态网页内容
我本来不是做.net,老师偏要.net的做,要求获取远程某网页的源码内容,以下是我从网上找来的
C# code

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;

namespace txl
{
    class Program
    {
        static void Main(string[] args)
        {

            WebRequest wreq = WebRequest.Create("http://www.163.com ");
            HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse();
            Stream s = wresp.GetResponseStream();
            StreamReader sr = new StreamReader(s, Encoding.GetEncoding("utf-8"));
            String html = "";
            String line = "";
            while (line != null)
            {
                line = sr.ReadLine();
                if (line != null)
                   html += line;
            }   
            using (StreamWriter sw = new StreamWriter("e:\\output.html"))//将获取的内容写入文本
            {
                sw.Write(html);
            }
        }
    }
}


不知道正不正确,我对.net完全不熟,就是copy改。不管我用gb2312还是utf-8拿到的全是乱码。而且动态生成的内容完全拿不到。请高人指点

------解决方案--------------------

AJAX获取的数据是拿不到

C# code

/// <summary>
        /// 得到整个网页的源码
        /// </summary>
        /// <param name="Url"></param>
        /// <returns></returns>
        public static string _GetHtml(string Url)
        {

            Stream MyInStream = null;
            string Html = "";
            try
            {
                HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
                HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();

                MyInStream = MyResponse.GetResponseStream();

                Encoding encode = System.Text.Encoding.UTF8;
                StreamReader sr = new StreamReader(MyInStream, encode);

                Char[] read = new Char[256];
                int count = sr.Read(read, 0, 256);
                while (count > 0)
                {
                    String str = new String(read, 0, count);
                    Html += str;
                    count = sr.Read(read, 0, 256);
                }
            }
            catch (Exception)
            {
                Html = "错误";
            }
            finally
            {
                if (MyInStream != null)
                {
                    MyInStream.Close();
                }
            }
            return Html;
}