网页抓取 抓取utf-8会乱码怎么解决 谢谢各位 ~!
如题 谢谢各位了先~~~抓取这个页面会乱码 但是抓取gb2312正常怎么解决呢?
public class test1 {
	  private String downloadPage(URL pageUrl) {
		   try {
			   BufferedReader reader = new BufferedReader(new InputStreamReader(pageUrl.openStream()));
			   String chasr="";
			   String line;
			   StringBuffer pageBuffer = new StringBuffer();
			   while ((line = reader.readLine()) != null) {
			     pageBuffer.append(line);
			   }
			   System.out.print(pageBuffer.toString());   
			   return pageBuffer.toString();
		   } catch (Exception e) {
			   e.printStackTrace();
		   }
		   return null;
		  }
	  private URL verifyUrl(String url) {
		   if (!url.toLowerCase().startsWith("http://"))
		    return null;
		   URL verifiedUrl = null;
		   try {
		    verifiedUrl = new URL(url);
		   } catch (Exception e) {  
		    return null;
		   }
		   return verifiedUrl;
		  }
	  public static void main(String agr[]){
		  test1 st=new test1();
		  String url="http://j.peopledaily.com.cn/94476/94637/6524482.html";  
		  URL pageUrl = st.verifyUrl(url);   
		  st.downloadPage(pageUrl);
	  }
}
------解决方案--------------------
new InputStreamReader(pageUrl.openStream(), "UTF-8")