c#抓取网页有时是乱码,有时是好的,求教高手
WebRequest wrq = WebRequest.Create(webaddr);
WebResponse wrs = wrq.GetResponse();
Stream strm = wrs.GetResponseStream();
StreamReader sr_1 = new StreamReader(strm,System.Text.Encoding.GetEncoding("gb2312"));
pagedata_1 = sr_1.ReadToEnd();
上面是代码,pagedata_1有时读到的是乱码,有时可以读到数据,试了很多编码,问题一直未解决,请教高手,怎样才能稳定可靠地读到网页中的数据?万分感谢
------解决方案--------------------public static string GetEncoding(string Url)
{
try
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)HttpWebRequest.Create(Url);
myHttpWebRequest.AllowAutoRedirect = false;
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
myHttpWebResponse.Close();
return Regex.Match(myHttpWebResponse.Headers.Get("Content-Type"), @"(?is)(?<=char\-?set[:=])\S+").Value;
}
catch { return ""; }
}
------解决方案--------------------using (HttpWebResponse response = (HttpWebResponse)webrequest.GetResponse())
{
if (response.ContentEncoding.ToLower().Contains("gzip"))
{
using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
{
using (StreamReader reader = new StreamReader(stream, Encoding.UTF8))
{
model.html = reader.ReadToEnd();
}
&