日期:2014-05-18 浏览次数:21127 次
/// <summary> /// 得到整个网页的源码 /// </summary> /// <param name="Url"></param> /// <returns></returns> public static string _GetHtml(string Url) { Stream MyInStream = null; string Html = ""; try { HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url); HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse(); MyInStream = MyResponse.GetResponseStream(); Encoding encode = System.Text.Encoding.UTF8; StreamReader sr = new StreamReader(MyInStream, encode); Char[] read = new Char[256]; int count = sr.Read(read, 0, 256); while (count > 0) { String str = new String(read, 0, count); Html += str; count = sr.Read(read, 0, 256); } } catch (Exception) { Html = "错误"; } finally { if (MyInStream != null) { MyInStream.Close(); } } return Html; } static void Main(string[] args) { string htmlStr = _GetHtml("http://topic.csdn.net/u/20120225/22/b5912ce0-ed81-4932-8bb3-a456708d69d4.html"); Regex re = new Regex(@"[\u4e00-\u9fa5]+", RegexOptions.None); MatchCollection mc = re.Matches(htmlStr); foreach (Match ma in mc) { Console.WriteLine(ma.Value); } Console.ReadLine(); }
------解决方案--------------------