日期:2014-05-17  浏览次数:20629 次

HttpClient与HtmlParse完美融合简单实例
当然第一步当然是去网上下载Jar包啦,相信这个应该都会吧!

收索HttpClient下载
收索HtmlParse下载

简单实例代码:

package com.fldyown.advertisement;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;

public class Advertisement {
/**
* @Enclosing_Method : main
* @Written by : 强
* @Creation Date : 2011-5-17 下午09:01:41
* @version : v1.00
* @Description :
* @param args
**/
public static void main(String[] args) {

try {
HttpClient httpclient = new DefaultHttpClient();
HttpGet httpget = new HttpGet(
"http://www.baidu.com/s?wd=%B7%E3%C1%F8%B5%FB%D4%CF");
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
if (entity != null) {
Parser parser = new Parser(EntityUtils.toString(entity));
NodeList nodeList = parser.parse(null);
NodeFilter filter = new TagNameFilter("a");
NodeList list = nodeList.extractAllNodesThatMatch(filter, true);
for (int i = 0; i < list.size(); i++) {
LinkTag tag = (LinkTag) list.elementAt(i);
System.out.println(tag.getAttribute("href") + "\n");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}


这个程序很有用,很多时候我们可能需要过滤网页中的一些我们关心的数据,如图片链接,等等,就可以用到!