求助:采用java抓取网页,总是出现乱码,请大侠指点迷津,谢谢
如题,相关java源程序附在后面,怎么调试也是出现乱码,恳请大牛指点,谢谢了。
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import
java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.URL;
import java.net.URLConnection;
public class DownPage {
public static void main(String args[]) throws
IOException{
//抓取的页面地址
String urlStr = "http://esf.zz.soufun.com/house/a21-i32/";
URL url = new URL(urlStr);
URLConnection connection = url.openConnection();
//获得该网页的编码
String ss = connection.getContentType();
System.out.println(ss.substring(ss.indexOf("=")+1));
//查InputStreamReader的构造方法,gb2312为该页面的编码
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(),ss.substring(ss.indexOf("=")+1)));
File file = new File("d:/a.html");
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file)));
if(br != null){
String s = null;
while((s = br.readLine())!=null){
String sss = new String(s.getBytes("gb2312"),"windows-31J");
System.out.println(s);
System.out.println(sss);
bw.write(s);
bw.flush();
}
bw.close();
br.close();
}
}
}