日期:2014-05-20  浏览次数:20937 次

请教:如何用C#获取网页显示的内容? (不是获取html代码!)
我想用C#获取某个网页上显示的内容(注:不是获取html代码)



我已经用vbs实现了这样的功能,源码如下:

Const ForWriting = 2
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objTextFile = objFSO.OpenTextFile("Kaijiang_xj.txt", ForWriting, True)
Set objIE = CreateObject("InternetExplorer.Application")
'objIE.visible = True
objIE.Navigate("http://www.xjflcp.com/ssc/")
Do While objIE.ReadyState<>4
Loop
Set objDoc = objIE.Document
Set aTables = objDoc.getElementsByTagName("Table")
For Each objTable In aTables
objTextFile.WriteLine objTable.Innertext
Next
objTextFile.Close
objIE.Quit

另存位*.vbs,执行即可。



请问,怎么用C#实现这样的功能?

------解决方案--------------------
C# code
WebBrowser web = new WebBrowser();
            web.Navigate("http://www.xjflcp.com/ssc/");
            web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);
void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            WebBrowser web = (WebBrowser)sender;
            HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table");
            foreach (HtmlElement item in ElementCollection)
            {
                 File.AppendAllText("Kaijiang_xj.txt", item.InnerText);
            }
        }

------解决方案--------------------
C# code

//先取网页HTML
string r; 
WebRequest myReq = WebRequest.Create("网页URL"); 
WebResponse myRes = myReq.GetResponse(); 
Stream resStream = myRes.GetResponseStream(); 
StreamReader sr = new StreamReader(resStream, Encoding.Default); 
StringBuilder sb = new StringBuilder(); 
while ((r = sr.ReadLine()) != null) 
{ 
sb.Append(r); 
} 
//再利用正则表达
string regexStr = @"正则表达式"; 
Match mc = Regex.Match(sb.ToString(), regexStr, RegexOptions.IgnoreCase); 
this.Label1.Text = mc.Groups[1].Value; 
myRes.Close();

------解决方案--------------------
C# code
WebClient web = new WebClient();
byte [] buffer = web.DownloadData("http://www.xjflcp.com/ssc/");
string html = System.Text.UTF8Encoding.UTF8.GetString(buffer);

------解决方案--------------------
C# code
 /// <summary>
        /// 抓取网页前台源码
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public string GetPageContent(string url)
        {
            string ContentHtml = String.Empty;
            HttpWebRequest rt = null;
            HttpWebResponse rs = null;
            Stream stream = null;
            StreamReader sr = null;
            try
            {

                rt = (HttpWebRequest)WebRequest.Create(url);
                rs = (HttpWebResponse)rt.GetResponse();
                stream = rs.GetResponseStream();
                sr = new StreamReader(stream, System.Text.Encoding.Default);
                ContentHtml = sr.ReadToEnd();
                sr.Close();
                stream.Close();
                rs.Close();
            }
            catch (Exception ex)
            {
                ContentHtml = "no values";
            }
            finally
            {
                if (rs != null) rs.Close();
                if (stream != null) stream.Close();
                if (sr != null) sr.Close();
            }
            return ContentHtml;

        }