日期:2014-05-20 浏览次数:20820 次
public class ParseTest {
public static void main(String[] args) {
String localFile = "C:\\Documents and Settings\\Administrator\\桌面\\mzw.htm";
// 将本地html文件存入String对象
String html = HtmlToStr(localFile);
//这里可以将html文件中的内容打印到控制台,说明还是成功读取到了的
// System.out.println(html);
Parser parser = new Parser();
try {
parser.setInputHTML(html);
NodeList nodes = parser.parse(new NodeClassFilter(ImageTag.class));
for (int i = 0; i < nodes.size(); i++) {
ImageTag it = (ImageTag) nodes.elementAt(i);
String imageURL = it.getImageURL();
System.out.println(imageURL);
}
} catch (ParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static String HtmlToStr(String filePath) {
StringBuilder html = new StringBuilder();
try {
BufferedReader br = new BufferedReader(new FileReader(filePath));
String line = null;
while ((line = br.readLine()) != null) {
html.append(line + "\n");
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return html.toString();
}
}