日期:2014-05-19 浏览次数:20856 次
package com.glht.search.action; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Ask { /** * @param args */ public static void main(String[] args) { getSuNing("http://www.suning.com/webapp/wcs/stores/servlet/odeSearch?storeId=10052&catalogId=10051&categoryId=20394&langId=-7&suggestionWordList=%5b%5d&isCatalogSearch=1&isList=0&sortType=0¤tPage=0"); getSuNing("http://www.suning.com/webapp/wcs/stores/servlet/odeSearch?storeId=10052&catalogId=10051&categoryId=20321&langId=-7&suggestionWordList=%5b%5d&isCatalogSearch=1&isList=0&sortType=0¤tPage=0"); } /* * 获取苏宁的一个分类页面的产品 */ public static void getSuNing(String url) { String content = Utils.getURLContent(url, "utf-8"); System.out.println("内容获取完成"); StringBuilder regStr = new StringBuilder(); regStr.append("class=\"pro_img\".*?"); regStr.append("<a.*?href=\"(.*?)\""); regStr.append(".*?src=\"(.*?)\".*?alt=\"(.*?)\""); regStr.append(".*?<em>(\\d+.?\\d+)</em>"); Pattern pattern = Pattern.compile(regStr.toString(), Pattern.DOTALL); Matcher matcher = pattern.matcher(content); int count = 0; while (matcher.find()) { System.out.println("第" + (++count) + "个"); } System.out.println("分析完成========="); } /* * 获取页面的内容 */ public static String getURLContent(String url, String encoding) { if (url == null || "".equals(url.trim())) return null; StringBuffer content = new StringBuffer(); try { // 新建URL对象 URL u = new URL(url); InputStream in = new BufferedInputStream(u.openStream()); InputStreamReader theHTML = new InputStreamReader(in, encoding != null ? encoding : "gb2312"); int c; while ((c = theHTML.read()) != -1) { content.append((char) c); } } // 处理异常 catch (MalformedURLException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } return content.toString(); } }