日期:2014-05-19 浏览次数:20728 次
package cn.yq; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.filters.OrFilter; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.LinkTag; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; public class URLParse { public static void main(String[] args) { try { Parser parser = new Parser("http://slide.sports.sina.com.cn/euro2012/slide_2_31402_29988.html"); parser.setEncoding("gb2312"); //提取链接 NodeFilter frameFilter = new NodeFilter() { public boolean accept(Node node) { if (node.getText().startsWith("frame src=")) { return true; } else { return false; } } }; //过滤出图片 OrFilter orFilter = new OrFilter(new NodeClassFilter(LinkTag.class), new NodeClassFilter(ImageTag.class)); OrFilter linkFilter = new OrFilter(orFilter, frameFilter); NodeList nodelist = parser.extractAllNodesThatMatch(linkFilter); //循环取得image标签 for (int i = 0; i < nodelist.size(); i++) { Node tag = nodelist.elementAt(i); if (tag instanceof ImageTag) { ImageTag image = (ImageTag) nodelist.elementAt(i); String urlstr = image.getImageURL(); InputStream is; OutputStream os; int len; if(urlstr.endsWith("jpg") || urlstr.endsWith("gif")){ System.out.println(urlstr); //下载到本地目录 byte[] buf = new byte[102400]; try { URL url = new URL(urlstr); String suffix = urlstr.substring(urlstr.lastIndexOf("."),urlstr.length()); try { is = url.openConnection().getInputStream(); os = new FileOutputStream("d:/downloadimage/" + i + suffix); while((len = is.read(buf)) != -1){ os.write(buf,0,len); } os.close(); is.close(); } catch (IOException e) { e.printStackTrace(); } } catch (MalformedURLException e) { e.printStackTrace(); }