日期:2014-05-16 浏览次数:20456 次
import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class JsoupTest { public static void main(String[] args) { Document doc = null; String url = " http://slashdot.org/"; try { doc = Jsoup .connect(url) .header("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1") .header("Accept", "text ml,application/xhtml+xml").header( "Accept-Language", "zh-cn,zh;q=0.5").header( "Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7") .get(); Element body = doc.body(); System.out.println(body.text()); } catch (IOException e) { e.printStackTrace(); } } }
doc = Jsoup.connect(url).get();
String baseUrl = ""; File input = new File(url); Document doc = Jsoup.parse(input, "UTF-8", baseUrl);
<div id="bodycol"><div id="jobheadertop"> </div><div id="jobheader"><img border="0" src="./102708474_files/pixel.gif" alt="DiSalvo LLC" id="companyLogo" class="logo" onerror="removeLogo()"><p id="companyNameHeader" style="display: block; ">DiSalvo LLC recruiting</p> <div id="subicons"><img src="./102708474_files/pixel(1).gif" height="1" width="1" alt="" style="margin:0px"></div><div style="clear:both;height:1px"> </div><div id="jobheaderbottom"> </div></div><div id="jobwrappertop2"> </div><div id="jobwrapper"> <div id="jobsummary"> <div id="jobsummary_content"> <h2>Job Summary</h2> <dl> <dt>Company</dt> <dd><span class="wrappable">DiSalvo LLC recruiting</span></dd> <dt>Location</dt> <dd><span class="wrappable">Tigard, OR 97223</span></dd> <dt>Industries</dt> <dd><span class="wrappable">All</span></dd> <dt>Job Type</dt> <dd class="multipledd"><span class="wrappable">Full Time</span></dd><dd class="multipleddlast"><span class="wrappable"> Employee</span></dd> <dt>Years of Experience</dt> <dd><span class="wrappable">2+? to 5 Years</span></dd> <dt>Career Level</dt> <dd><span class="wrappable">Experienced (Non-Manager)</span></dd> <dt>Salary</dt> <dd><span class="wrappable">$47,000.?00 - $49,000.?00 /?year<br>$7k per year expense acct, medical, dental, 401K, uncapped commissions</span></dd> </dl> </div> </div> <div id="jobcopy"> <h1>Sales Representative</h1> <h2>About the Job</h2> <div id="jobBodyContent">
private String seletorJobSum = "div#jobsummary"; Elements elements = element.select(seletorJobSum); if(elements.size() == 0){ return null; } Element section = elements.first();<