日期:2014-05-16  浏览次数:20319 次

java、jsp获取网页源码内容的三种方法
一、GetURL.java
import java.io.*;
import java.net.*;

public class GetURL {
??? public static void main(String[] args) {
??????? InputStream in = null;??
??????? OutputStream out = null;
??????? try {
??????????? // 检查命令行参数
??????????? if ((args.length != 1)&& (args.length != 2))
??????????????? throw new IllegalArgumentException("Wrong number of args");
????
??????????
??????????? URL url = new URL(args[0]);?? //创建 URL
??????????? in = url.openStream();??????? // 打开到这个URL的流
??????????? if (args.length == 2)???????? // 创建一个适当的输出流
??????????????? out = new FileOutputStream(args[1]);
??????????? else out = System.out;
????
??????????? // 复制字节到输出流
??????????? byte[] buffer = new byte[4096];
??????????? int bytes_read;
??????????? while((bytes_read = in.read(buffer)) != -1)
??????????????? out.write(buffer, 0, bytes_read);
}
?????
??????? catch (Exception e) {
??????????? System.err.println(e);
??????????? System.err.println("Usage: java GetURL <URL> [<filename>]");
??????? }
??????? finally { //无论如何都要关闭流
??????????? try { in.close(); out.close(); } catch (Exception e) {}
??????? }
??? }
}

运行方法:
C:\java>java?? GetURL http://127.0.0.1:8080/kj/index.html index.html

二、geturl.jsp

<%@ page import="java.io.*" contentType="text/html;charset=gb2312" %>
<%@ page language="java" import="java.net.*"%>


<%
??????? String htmpath=null;
??????? BufferedReader in = null;
??????? InputStreamReader isr = null;
??????? InputStream is = null;
??????? PrintWriter pw=null;
??????? HttpURLConnection huc = null;
??????? try{
?????????? htmpath=getServletContext().getRealPath("/")+"html\\morejava.html";
?????????? pw=new PrintWriter(htmpath);
?????????? URL url = new URL("http://127.0.0.1:8080/kj/morejava.jsp"); //创建 URL
????????????? huc = (HttpURLConnection)url.openConnection();
????????????? is = huc.getInputStream();
????????????? isr = new InputStreamReader(is);
????????????? in = new BufferedReader(isr);
????????????? String line = null;
????????????? while(((line = in.readLine()) != null)) {
???????????????? if(line.length()==0)
?????????????????? continue;
???????????????? pw.println(line);
?????????????? }
??????
???????? }
?????
??????? catch (Exception e) {
??????????? System.err.println(e);
??????? }
???????? finally { //无论如何都要关闭流
??????????? try { is.close(); isr.close();in.close();huc.disconnect();pw.close();???
??????????? } catch (Exception e) {}
??????? }


%>
??????? OK--,创建文件成功

三、HttpClient.java

import java.io.*;
import java.net.*;

public class HttpClient {
??? public static void main(String[] args) {
??????? try {
??????????? // 检查命令行参数
??????????? if ((args.length != 1) && (args.length != 2))
??????????????? throw new IllegalArgumentException("Wrong number of args");
???????????
??????????? OutputStream to_file;
??????????? if (args.length == 2)
????????????????? to_file = new FileOutputStream(args[1]);//输出到文件
??????????? else
????????????????? to_file = System.out;//输出到控制台
???????????
??????????
??????????? URL url = new URL(args[0]);
??????????? String protocol = url.getProtocol();
??????????? if (!protocol.equals("http"))
?????????????? throw new IllegalArgumentException("Must use 'http:' protocol");
??????????? String host = url.getHost();
??????????? int port = url.getPort();
??????????? if (port == -1) port = 80;
??????????? String filename = url.getFile();

??????????? Socket socket = new Socket(host, port);//打开一个socket连接

??????????? InputStream from_server = socket.getInputStream();//获取输入流
??????????? PrintWriter to_s