如何获取HTML字段中,所有URL地址(过滤掉其它所有)
如何获取HTML字段中,所有URL地址(过滤掉其它所有)
------解决方案--------------------用正则表达式
------解决方案--------------------TextBox2.Text =  " "; 
         string web_url = this.TextBox1.Text;// "http://blog.csdn.net/21aspnet/ " 
         string all_code =  " "; 
         HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url); 
         WebResponse all_codeResponse = all_codeRequest.GetResponse(); 
         StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream()); 
         all_code = the_Reader.ReadToEnd(); 
         the_Reader.Close(); 
         ArrayList my_list = new ArrayList(); 
         string p = @ "http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? "; 
         Regex re = new Regex(p, RegexOptions.IgnoreCase); 
         MatchCollection mc = re.Matches(all_code);   
         for (int i = 0; i  <= mc.Count - 1; i++) 
         { 
             bool _foo = false; 
             string name = mc[i].ToString(); 
             foreach (string list in my_list) 
             { 
                 if (name == list) 
                 { 
                     _foo = true; 
                     break; 
                 } 
             }//过滤   
             if (!_foo) 
             { 
                 TextBox2.Text += name +  "\n "; 
             } 
         }          
------解决方案--------------------MatchCollection matchs = Regex.Matches(HTML,@ " <a> (? <url> [^ <]*) </a>  ",   RegexOptions.IgnoreCase);
------解决方案--------------------foreach (Match m in matchs) 
 { 
  str += m.Groups[ "url "].Value; 
 }
------解决方案--------------------<a href=[^ <]*>