日期:2014-05-20 浏览次数:21061 次
import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; public class test { public static void main(String[] args) { String url = "http://product.dangdang.com/product.aspx?product_id=20689512"; new test().spiderProduct(url); } public void spiderProduct(String url) { String content = getURLContent(url, "gb2312"); String regStr = ""; regStr = "<h1>(.*?)</h1>";// 产品名称 regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片 regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格 Pattern pattern = Pattern.compile(regStr); Matcher matcher = pattern.matcher(content); while (matcher.find()) { System.out.println(matcher.group(1)); } } public String getURLContent(String url, String encoding) { if (url == null || "".equals(url.trim())) return null; StringBuffer content = new StringBuffer(); try { // 新建URL对象 URL u = new URL(url); InputStream in = new BufferedInputStream(u.openStream()); InputStreamReader theHTML = new InputStreamReader(in, encoding != null ? encoding : "gb2312"); int c; while ((c = theHTML.read()) != -1) { content.append((char) c); } } // 处理异常 catch (MalformedURLException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } return content.toString(); } }
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class ProductItem {
String name;
String picture;
String price;
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPicture() {
return picture;
}
public void setPicture(String picture) {
this.picture = picture;
}
public ProductItem(String name, String picture, String price) {
super();
this.name = name;
this.picture = picture;
this.price = price;
}
public static ProductItem createItem(String urlString) {
String name = null, picture = null, price = null;
String content = getURLContent(urlString, "gb2312");
String regStr = "<h1>(.*?)</h1>";// 产品名称
Pattern pattern = Pattern.compile(regStr);
Matcher matcher = pattern.matcher(content);
if (matcher.find())
name = matcher.group(1);
regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片
pattern = Pattern.compile(regStr);
matcher = pattern.matcher(content);
if (matcher.find())
picture = matcher.group(1);
regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格
pattern = Pattern.compile(regStr);
matcher = pattern.matcher(content);
if (matcher.find())
price = matcher.group(1);
return new ProductItem(name, picture, price);
}
public static ProductItem getItem(String urlString) {
String name = null, picture = null, price = null;
String content = getURLContent(urlString, "gb2312");
String regStr = "<h1>(.*?)</h1>.*?src=\"(.*?b\\.jpg)\".*?num\".*?(\\d+\\.\\d+).*";// 产品名称
Pattern pattern = Pattern.compile(regStr, Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
name = matcher.group(1);
picture = matcher.group(2);
price = matcher.group(3);
}
return new ProductItem(name, picture, price);
}
public static String getURLContent(String urlString, String encoding) {
if (urlString == null || "".equals(urlString.trim()))
return null;
StringBuffer content = new StringBuffer();
try {
// 新建URL对象
URL url = new URL(urlString);
InputStream in = new BufferedInputStream(url.openStream());
InputStreamReader theHTML = new InputStreamReader(in,
encoding != null ? encoding : "gb2312");
int c;
while ((c = theHTML.read()) != -1) {
content.append((char) c);
}
}
// 处理异常
catch (MalformedURLException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
return content.toString();
}
public String toString() {
return "name = " + name + " \npicture = " + picture + " \nprice = "
+ price;
}
}
public class dsfdsf {
public static void main(String[] args) {
String url = "http://product.dangdang.com/product.aspx?product_id=20689512";
ProductItem productItem = ProductItem.createItem(url);
System.out.println(productItem);
productItem = ProductItem.getItem(url);
System.out.println(productItem);
}
}