日期:2014-05-16 浏览次数:20404 次
package com.th.spider.test; import java.io.BufferedOutputStream; import java.io.FileOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.CoreConnectionPNames; import org.apache.http.util.EntityUtils; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Exmaple3 { private static final Log log = LogFactory.getLog(Exmaple3.class); /** * 抓取图片存放目录 */ private static final String PIC_DIR = "/home/li/pic"; /** * 链接超时 */ private static final int TIME_OUT = 5000; static void go3(String url) throws Exception { Connection conn= Jsoup.connect(url); Document doc = conn.get(); Elements links = doc.select("div.piclist img[src]"); for(int i=0;i<links.size();i++){ Element element = links.get(i); final String imgUrl = element.attr("src"); log.info(imgUrl); Thread.sleep(500); new Thread(new Runnable() { public void run() { try { save(imgUrl); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }).start(); } } static void go2(String url) throws Exception { Connection conn= Jsoup.connect(url); Document doc = conn.get(); Elements links = doc.select("div.cc a[href]"); for(int i=0;i<links.size();i++){ Element element = links.get(i); final String dirUrl = "http://www.3lian.com"+element.attr("href"); log.info(dirUrl); Thread.sleep(500); new Thread(new Runnable() { public void run() { try { Connection conn= Jsoup.connect(dirUrl); Document doc = conn.get(); Elements images = doc.select("div.mb_jjnr img[src]"); for(int j=0;j<images.size();j++){ Element img = images.get(j); String imgUrl = img.attr("src"); log.info(imgUrl); save(imgUrl); } } catch (Exception e) { e.printStackTrace(); } } }).start(); } } /** * 处理帖子URL * @param url * @throws Exception */ static void go(String url) throws Exception { // JSOP创建链接 Connection conn = Jsoup.connect(url); // 请求返回整个文档对象 Document doc = conn.post(); // 选择所有class=zoom 的img标签对象 Elements imgs = doc.select("img[class=zoom]"); // 循环每个img标签 for (int i = 0; i < imgs.size(); i++) { Element img = imgs.get(i); // 取得图片的下载地址 String picURL = doc.baseUri() + img.attr("file"); log.info(picURL); // 保存图片 save(picURL); } } //<img src="static/image/common/none.gif" file="data/attachment/forum/201105/08/174412nz3jq4z90s33s2t0.jpg" width="770" class="zoom" onclick="zoom(this, this.src)" id="aimg_180565" onmouseover="showMenu({'ctrlid':this.id,'pos':'12'})" alt="img_src_29620.jpg" title