日期:2014-05-17 浏览次数:20532 次
public static string AutoHtml(string url)
{
if (url.ToLower().IndexOf("http://")==-1)
{
url = "http://" + url;
}
WebClient myWebClient = new WebClient();
myWebClient.Credentials = CredentialCache.DefaultCredentials;
byte[] myDataBuffer = null;
try
{
myDataBuffer=myWebClient.DownloadData(url);
}
catch { return null; }
//
string dd = System.Text.Encoding.UTF8.GetString(myDataBuffer);
//
string strWebData = Encoding.Default.GetString(myDataBuffer);
// strWebData = " <title></title><meta charset=gb2312 />";
// string strRegex = @"charset=['""]?(.*?)['""]?\r\n/>";
//Match charSetMatchs = Regex.Match(strWebData, strRegex, RegexOptions.IgnoreCase);
//Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
//MatchCollection m = r.Matches(strWebData);
//获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, @"charset=['""]?(.*?)['""]?\s?/>", RegexOptions.IgnoreCase);
string webCharSet = charSetMatch.Groups[1].Value;
if (string.IsNullOrEmpty(webCharSet))
{
webCharSet = "utf-8";
} //换一个编码
strWebData = Encoding.GetEncoding(webCharSet).GetString(myDataBuffer);
return strWebData;
}