日期:2014-05-17  浏览次数:20744 次

html解析得不到结果
谁做过html解析?求大神帮助!如下得不到结果是为啥呢?
java代码:
import java.io.File;      
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;   
import org.htmlparser.filters.AndFilter;   
import org.htmlparser.filters.HasAttributeFilter;   
import org.htmlparser.filters.TagNameFilter;   
import org.htmlparser.util.NodeList;   

public class parser1 {
  public static void main(String[] args) {   
        File file = new File("E:\\STOCK\\html\\2.html");
        try {   
            Parser parser = new Parser(file.getAbsolutePath());   
            parser.setEncoding("UTF-8");  
            NodeFilter filter= new AndFilter(new TagNameFilter("div"),
             new HasAttributeFilter("class", "search_feed"));
            NodeList nodeList = parser.extractAllNodesThatMatch(filter);  
            System.out.println("flter3大小="+nodeList.size());

     } catch (Throwable e) {   
            e.printStackTrace();   
        }   
          
   }   
}

2.html内容如下:
<script>STK && STK.pageletM && STK.pageletM.view({"pid":"pl_weibo_feedlist","js":["apps\/search\/js\/pl\/weibo\/feedList.js?version=201312061633"],"css":["appstyle\/searchV45\/css\/pl\/pl_feed.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_page.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_shortlink.css?version=201312061633","appstyle\/searchV45\/css\/pl\/pl_prizeper.css?version=201312061633"],"html":" <div class=\"search_feed\">\n  <div class=\"feed_lists W_linka W_texta\" node-type=\"feed_list\">\n <a node-type=\"feed_list_newBar\" href=\"javascript:void(0);\" style=\"display:none\" class=\"notes\" suda-data=\"key=tblog_search_v4.1&value=weibo_new\">\u6709 20 \u6761\u65b0\u5fae\u535a\uff0c\u70b9\u51fb\u67e5\u770b<\/a>\n   \n<dl class=\"feed_list\" mid=\"3652848963153960\" action-type=\"feed_list_item\" isforward=\"1\">\n <dt class=\"face\">\n  <a href=\"http:\/\/weibo.com\/ethangaostudio\" title=\"\u9ad8\u627f\u661f\" target=\"_blank\" suda-data=\"key=tblog_search_v4.1&value=weibo_feed_1:2004370851\">\n <img src=\"http:\/\/tp4.sinaimg.cn\/2004370851\/50\/5659234133\/1\" alt=\"\u9ad8\u627f\u661f\" width=\"50\" height=\"50\" usercard=\"id=2004370851&usercardkey=weibo_mp\"\/>\n  <\/a>\n <\/dt>\n <dd class=\"content\">\n  <p node-type=\"feed_list_content\">\n <a nick-name=\"\u9ad8\u627f\u661f\" href=\"http:\/\/weibo.com\/ethangaostudio\" target=\"_blank\" title=\"