请教C#如何提取网页正文。急!急!急!
C#如何提取网页正文?例如说某网页的新闻标题和新闻正文,标题我会了,关键是正文呐!!
各位高手来帮帮忙!~
------解决方案--------------------下载源码:
public static string GetHttpSourceValue(string a_strUrl)
{
string strResult;
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(new System.Uri(a_strUrl));
myReq.Method = "GET ";
myReq.Accept = "*/* ";
myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322) ";
try
{
HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse();
string tmp = myReq.Headers.ToString();
Stream myStream = HttpWResp.GetResponseStream();
StreamReader sr = new StreamReader(myStream, Encoding.Default);
StringBuilder strBuilder = new StringBuilder();
while (-1 != sr.Peek())
{
strBuilder.Append(sr.ReadLine() + "\r\n ");
}
strResult = strBuilder.ToString();
//StreamWriter sw = new StreamWriter( "E:\\1.txt ", false, Encoding.Default);
//sw.Write(strResult);
myStream.Close();
sr.Close();
// sw.Close();
}
catch (Exception exp)
{
strResult = "错误: " + exp.Message;
}
return strResult;
//StreamWriter sw = new StreamWriter(SaveFileName(), false, Encoding.Default);
//sw.Write(body);
//sw.Close();
}