日期:2014-05-16 浏览次数:20337 次
package com.fenghuo.html; import java.io.IOException; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class AnalyzeHtml { /** * Example program to list links from a URL. */ public static void main(String[] args) throws IOException { String csdn = "http://blog.csdn.net"; String blog = "http://blog.csdn.net/w695050167"; String url = blog + "?viewmode=list"; Connection connection = Jsoup.connect(url); connection.timeout(500);//设置连接超时时间 //给服务器发消息头,告诉服务器,俺不是java程序。CSDN不允许java程序访问 connection.header("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows XP; DigExt)"); Document doc = connection.get();//获取返回的html的document对象 //解析document对象 Elements links = doc.select(".link_title"); for (Element e : links) { if (e.getAllElements().size() == 2) { Element ae = e.select("a[href]").first(); String href = ae.attr("href"); System.out.println(csdn + href); String text = e.text(); System.out.println(text); } } } }