日期:2014-05-16 浏览次数:20416 次
package com.fenghuo.html;
import java.io.IOException;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class AnalyzeHtml {
	/**
	 * Example program to list links from a URL.
	 */
	public static void main(String[] args) throws IOException {
		String csdn = "http://blog.csdn.net";
		String blog = "http://blog.csdn.net/w695050167";
		String url = blog + "?viewmode=list";
		Connection connection = Jsoup.connect(url);
		connection.timeout(500);//设置连接超时时间
		//给服务器发消息头,告诉服务器,俺不是java程序。CSDN不允许java程序访问
		connection.header("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)");
		Document doc = connection.get();//获取返回的html的document对象
		
		//解析document对象
		Elements links = doc.select(".link_title");
		for (Element e : links) {
			if (e.getAllElements().size() == 2) {
				
				Element ae = e.select("a[href]").first();
				String href = ae.attr("href");
				System.out.println(csdn + href);
				String text = e.text();
				System.out.println(text);
			}
		}
	}
}
