日期:2014-05-18 浏览次数:21319 次
 /// <summary>
        /// 得到整个网页的源码
        /// </summary>
        /// <param name="Url"></param>
        /// <returns></returns>
        public static string _GetHtml(string Url)
        {
            Stream MyInStream = null;
            string Html = "";
            try
            {
                HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
                HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
                MyInStream = MyResponse.GetResponseStream();
                Encoding encode = System.Text.Encoding.UTF8;
                StreamReader sr = new StreamReader(MyInStream, encode);
                Char[] read = new Char[256];
                int count = sr.Read(read, 0, 256);
                while (count > 0)
                {
                    String str = new String(read, 0, count);
                    Html += str;
                    count = sr.Read(read, 0, 256);
                }
            }
            catch (Exception)
            {
                Html = "错误";
            }
            finally
            {
                if (MyInStream != null)
                {
                    MyInStream.Close();
                }
            }
            return Html;
        }
        static void Main(string[] args)
        {
            string htmlStr = _GetHtml("http://topic.csdn.net/u/20120225/22/b5912ce0-ed81-4932-8bb3-a456708d69d4.html");
            Regex re = new Regex(@"[\u4e00-\u9fa5]+", RegexOptions.None);
            MatchCollection mc = re.Matches(htmlStr);
            foreach (Match ma in mc)
            {
                Console.WriteLine(ma.Value);
            }
          
            Console.ReadLine(); 
                  
        }
------解决方案--------------------