日期:2014-05-20 浏览次数:20814 次
public class Aa extends TestCase {
private String localFile = "d:/tu.html";
public void testFetchPage() throws Exception {
for (int i = 1; i < 51; i++){
try {
// HttpClient主要负责执行请求
HttpClient httpclient = new DefaultHttpClient();
// 利用HTTP GET向服务器发起请求
HttpGet get = new HttpGet(
"http://www.yhd.com/ctg/s2/c23586-0/#page=1&sort=1");
// 获得服务器响应的的所有信息
HttpResponse response = httpclient.execute(get);
// 获得服务器响应回来的消息体(不包括HTTP HEAD)
HttpEntity entity = response.getEntity();
if (entity != null) {
InputStream is = entity.getContent();
// 将InputStream转换为Reader,并使用缓冲读取,提高效率,同时可以按行读取内容
IOUtils.copy(is, new FileOutputStream(localFile));
}
// 释放所有的链接资源,一般在所有的请求处理完成之后,才需要释放
httpclient.getConnectionManager().shutdown();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
// 提取有关价格的信息
public void testParse02() throws Exception {
String html = IOUtils.toString(new FileInputStream(localFile), "UTF-8");
// 把文件内容读出来
List<Span> number = ParseUtils.parseTags(html, Span.class, "class", "color_red");
for(Span span:number){
String line = span.getStringText();
line=line.replace("[包邮]","");
line=line.replaceAll("\\s*", "");
line=line.replace("¥", "");
if(!line.isEmpty()) {
System.out.println( line);}
}
}
}
int j = 0;
for (int i = 1; i < 15; i++) {
String urlString = "http://mall.jumei.com/products/0-19-0-11-" + i+ ".html";
URL url = new URL(urlString);
URLConnection con = url.openConnection();
//con.setConnectTimeout(5 * 1000);
InputStream is = con.getInputStream();
byte[] bs = new byte[1024];
int len;
//OutputStream os = new FileOutputStream("D:/est.txt");
StringBuffer sb = new StringBuffer();
while ((len = is.read(bs)) != -1) {
//os.write(bs, 0, len);
sb.append(new String(bs, 0, len));
}
//os.close();
is.close();
String content = sb.toString();
Matcher m = Pattern.compile("<span style=\"font-size:18px; font-weight:bolder; color:#ed145b;\">(.*?)</span")
.matcher(content);
while (m.find()) {
j++;
System.out.println("第"+j+"个------->"+m.group(1).replace("¥", "¥"));
}
}