日期:2014-05-17 浏览次数:20937 次
private static String regex = "http://www.yifujx.com/cn/showclass\\.asp\\?id=\\d{1,2}&pid=\\d{1,2}"; private static String urlRegex = "http://www.yifujx.com/cn/showProduct\\.asp\\?ID=\\d{1,2}"; public static void main(String[] args) throws Exception { // Parser parser = Parser.createParser("http://www.yifujx.com/cn/", "gb2312"); Parser parser = new Parser("http://www.yifujx.com/cn/"); HtmlPage htmlPage = new HtmlPage(parser); parser.visitAllNodesWith(htmlPage); NodeList nodeList = htmlPage.getBody(); NodeFilter nodeFilter = new TagNameFilter("A"); nodeList = nodeList.extractAllNodesThatMatch(nodeFilter, true); int size = nodeList.size(); for(int i=0; i<size; i++){ LinkTag linkTag = (LinkTag)nodeList.elementAt(i); String link = linkTag.getLink(); if( link.matches(regex) ){ System.out.println( link + "\t" + linkTag.getChildrenHTML()); } } }?