日期:2014-05-17 浏览次数:20744 次
网页中的标题、keywords和Description这三项对于建立网页索引是非常重要的。以下是用htmlparser对网页进行解析来得到这三个值。
try { NodeFilter title_filter = new TagNameFilter("title"); NodeFilter meta_filter = new TagNameFilter("meta"); OrFilter filters = new OrFilter(); filters.setPredicates(new NodeFilter[]{title_filter,meta_filter}); Parser parser = new Parser(); parser.setURL("D:\\test.html"); parser.setEncoding(parser.getEncoding()); NodeList list = parser.extractAllNodesThatMatch(filters); for (int i = 0; i < list.size(); i++) { Tag tag=(Tag) list.elementAt(i); if(tag instanceof MetaTag){ String name=tag.getAttribute("name"); if(name!=null&&name.equalsIgnoreCase("Keywords")){ System.out.println("Keywords : "+tag.getAttribute("content")); } if(name!=null&&name.equalsIgnoreCase("Description")){ System.out.println("Description"+" : "+tag.getAttribute("content")); } }else if(tag instanceof TitleTag){ System.out.println("Title : "+tag.getText()); } } } catch (Exception e) { e.printStackTrace(); }
?