日期:2014-05-16  浏览次数:20527 次

Jsoup的应用

JSOUP解析页面的方法

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class MatchAmazon {
	
	public static MatchInfo getProductCode(ProductInfo productInfo){
		
		String scriptCode = productInfo.getScriptCode();
		Document doc = null;
		MatchInfo opponInfo = new MatchInfo();
		try {
			doc = Jsoup.connect("http://www.amazon.cn/s/ref=nb_sb_noss?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords="+scriptCode).userAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.19) Gecko/20110707 Firefox/3.6.19").get();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		String detail = "";
		if (null != doc) {
			Elements doc1 = doc.select("div#atfResults.list");
			if (null != doc1) {
				detail = doc1.html();
			}
		}
		if (detail == "" || detail.equals("")) {
			opponInfo.setId(productInfo.getId());
			opponInfo.setProductId(productInfo.getProductId());
			opponInfo.setProductName(productInfo.getProductName());
			opponInfo.setScriptCode(scriptCode);
			opponInfo.setProductCode(productInfo.getProductCode());
			return opponInfo;
		} else {
			String productCode = detail.substring(detail.indexOf("name=") + 6, detail.indexOf("\">"));
			String productName = doc.select("div#result_0.result div.data h3.title").first().text();
			System.out.println("productCode:" + productCode + "\tproductName:" + productName);

			opponInfo.setId(productInfo.getId());
			opponInfo.setProductId(productInfo.getProductId());
			opponInfo.setProductName(productInfo.getProductName());
			opponInfo.setScriptCode(scriptCode);
			opponInfo.setOpponProductCode(productCode);
			opponInfo.setProductCode(productInfo.getProductCode());
			opponInfo.setOpponProductName(productName);
			return opponInfo;
		}
	}

	public static void exportAmazonToExcel(String filePath, Integer startIndex, Integer length, String fileName) throws Exception{
		CsvImporter csvImporter = new CsvImporter(filePath);
		List<ProductInfo> dataList = csvImporter.nextRows(startIndex,length);
		List<MatchInfo> result = new ArrayList<MatchInfo>();
		for (ProductInfo productInfo : dataList) {
			MatchInfo matchResult = getProductCode(productInfo);
			if (null != matchResult) {
				if (matchResult.getOpponProductCode() == "") {
					matchResult = getProductCode(productInfo);					
						result.add(matchResult);				
				}
				result.add(matchResult);
			}
		}
		System.out.println(result.size());
		try {
			MatchDtoToExcel.MatchInfo2Excel(result, fileName);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}