日期:2014-05-20 浏览次数:20932 次
package modelframeworkdemo; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.Date; public class CSHtml { void display() { try { String addr = "http://www.pw.utc.com/vgn-ext-templating/v/PWSearch?keyWord=engine"; // 将用户输入的URL字符串传入URL类对象 URL url = new URL(addr); // 创建URLConnection对象,用URL的openConnection方法将连接通过返回给URLConnection的对象 // 实际上URL的openConnection的返回值就是一个URLConnection URLConnection c = url.openConnection(); // * // 用URLConnection的connect()方法建立连接 c.connect(); // * // 显示该连接的相关信息,这些都是URLConnection的方法 System.out.println("内容类型: " + c.getContentType()); System.out.println("内容长度: " + c.getContentLength()); System.out.println("创建日期: " + new Date(c.getDate())); System.out.println("最后修改日期: " + new Date(c.getLastModified())); System.out.println("终止日期: " + new Date(c.getExpiration())); InputStream is = c.getInputStream(); // * InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); char ch; System.out.println("字节流长度: " + br.toString().length()); int msg = 0; int i = 0; while ((msg = br.read()) != -1) { System.out.println(msg + " " + (char)msg); } System.out.println(br.read()); br.close(); } catch (Exception e) { System.out.println(e); } } public static void main(String[] args) { CSHtml app = new CSHtml(); app.display(); } }
//读取指定url的内容并转换为xml文档 public Document doQuery1(String url)throws Exception{ URL u = new URL(url); HttpURLConnection conn = (HttpURLConnection)(u.openConnection()); // conn.setDoOutput(false); conn.setDoInput(true); conn.setUseCaches(false); conn.setRequestMethod("GET");//GET conn.connect(); int code = conn.getResponseCode(); if(code!=HttpURLConnection.HTTP_OK){ throw new Exception("远程没有返回正确结果,返回【"+code+"】。"); } //反馈.. /* //直接把结果打印出来 InputStream in = conn.getInputStream(); BufferedReader br=new BufferedReader(new InputStreamReader(in)); String t=null; while((t=br.readLine())!=null){ System.out.println(t); } return null; */ //正常解析 InputStream in = conn.getInputStream(); Document doc=Sys.loadXML(in); return doc; }
------解决方案--------------------
我并不认为楼主的方法有什么不妥,担心是缓冲区大小的原因,所以,换了个方法读。
别说我BT,这样可以不用考虑编码问题,如果有乱码那就修改控制台的编码和网页编码相同就行了。
import java.io.InputStream; import java.net.URL; import java.net.URLConnection; public class CSHtml { void display() { try { String addr = "http://www.pw.utc.com/vgn-ext-templating/v/PWSearch?keyWord=engine"; URL url = new URL(addr); URLConnection c = url.openConnection(); // * c.connect(); // * InputStream is = c.getInputStream(); byte [] b = new byte[102400]; int i=0; while((i=is.read(b,0,b.length))!=-1){ System.out.write(b, 0, i); } is.close(); } catch (Exception e) { System.out.println(e); } } public static void main(String[] args) { CSHtml app = new CSHtml(); app.display(); } }