日期:2014-05-17 浏览次数:20796 次
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
public class Test {
public static void main(String[] args) {
try {
String[] urlAry = new String[]{
//百度网页,"埇"字正常
"http://www.baidu.com/s?cl=3&wd=%CB%DE%88%AC%D6%B4",
//百度知道,"埇"字乱码
"http://zhidao.baidu.com/q?word=%CB%DE%88%AC%D6%B4&lm=0&fr=search&ct=17&pn=0&tn=ikaslist&rn=10"
};
for (String queryURL : urlAry) {
DefaultHttpClient client = new DefaultHttpClient();
HttpGet httpget = new HttpGet(queryURL);
HttpResponse response = client.execute(httpget);
HttpEntity entity = response.getEntity();
String returnText = EntityUtils.toString(entity,"gb2312");
//网页代码
// System.out.println(returnText);
//通过正则表达式,摘出要比较的部分
getTextByRule(".*?(宿.*?执).*",returnText);
client.getConnectionManager().shutdown();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void getTextByRule(String parttern, String str){
Pattern p = Pattern.compile(parttern);
Matcher matcher = p.matcher(str);
if(matcher.find()) {
System.out.println(matcher.group(1));
}
}
}