日期:2014-05-16 浏览次数:20527 次
JSOUP解析页面的方法
import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class MatchAmazon { public static MatchInfo getProductCode(ProductInfo productInfo){ String scriptCode = productInfo.getScriptCode(); Document doc = null; MatchInfo opponInfo = new MatchInfo(); try { doc = Jsoup.connect("http://www.amazon.cn/s/ref=nb_sb_noss?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords="+scriptCode).userAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.19) Gecko/20110707 Firefox/3.6.19").get(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String detail = ""; if (null != doc) { Elements doc1 = doc.select("div#atfResults.list"); if (null != doc1) { detail = doc1.html(); } } if (detail == "" || detail.equals("")) { opponInfo.setId(productInfo.getId()); opponInfo.setProductId(productInfo.getProductId()); opponInfo.setProductName(productInfo.getProductName()); opponInfo.setScriptCode(scriptCode); opponInfo.setProductCode(productInfo.getProductCode()); return opponInfo; } else { String productCode = detail.substring(detail.indexOf("name=") + 6, detail.indexOf("\">")); String productName = doc.select("div#result_0.result div.data h3.title").first().text(); System.out.println("productCode:" + productCode + "\tproductName:" + productName); opponInfo.setId(productInfo.getId()); opponInfo.setProductId(productInfo.getProductId()); opponInfo.setProductName(productInfo.getProductName()); opponInfo.setScriptCode(scriptCode); opponInfo.setOpponProductCode(productCode); opponInfo.setProductCode(productInfo.getProductCode()); opponInfo.setOpponProductName(productName); return opponInfo; } } public static void exportAmazonToExcel(String filePath, Integer startIndex, Integer length, String fileName) throws Exception{ CsvImporter csvImporter = new CsvImporter(filePath); List<ProductInfo> dataList = csvImporter.nextRows(startIndex,length); List<MatchInfo> result = new ArrayList<MatchInfo>(); for (ProductInfo productInfo : dataList) { MatchInfo matchResult = getProductCode(productInfo); if (null != matchResult) { if (matchResult.getOpponProductCode() == "") { matchResult = getProductCode(productInfo); result.add(matchResult); } result.add(matchResult); } } System.out.println(result.size()); try { MatchDtoToExcel.MatchInfo2Excel(result, fileName); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }