日期:2014-05-17 浏览次数:20452 次
public static string AutoHtml(string url) { if (url.ToLower().IndexOf("http://")==-1) { url = "http://" + url; } WebClient myWebClient = new WebClient(); myWebClient.Credentials = CredentialCache.DefaultCredentials; byte[] myDataBuffer = null; try { myDataBuffer=myWebClient.DownloadData(url); } catch { return null; } // string dd = System.Text.Encoding.UTF8.GetString(myDataBuffer); // string strWebData = Encoding.Default.GetString(myDataBuffer); // strWebData = " <title></title><meta charset=gb2312 />"; // string strRegex = @"charset=['""]?(.*?)['""]?\r\n/>"; //Match charSetMatchs = Regex.Match(strWebData, strRegex, RegexOptions.IgnoreCase); //Regex r = new Regex(strRegex, RegexOptions.IgnoreCase); //MatchCollection m = r.Matches(strWebData); //获取网页字符编码描述信息 Match charSetMatch = Regex.Match(strWebData, @"charset=['""]?(.*?)['""]?\s?/>", RegexOptions.IgnoreCase); string webCharSet = charSetMatch.Groups[1].Value; if (string.IsNullOrEmpty(webCharSet)) { webCharSet = "utf-8"; } //换一个编码 strWebData = Encoding.GetEncoding(webCharSet).GetString(myDataBuffer); return strWebData; }