日期:2014-05-17  浏览次数:20708 次

java抓网站html

import java.io.DataInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

//用url获取网上的资源html文件  (html源代码)
public class GetHtmlByUrl {

	public void getHtmlbyurl()
	{
		try {
			URL url = new URL("http://www.iteye.com");
			DataInputStream di = new DataInputStream(url.openStream());
			byte [] bytes = new byte[1024];
			while (di.read(bytes, 0, 1024)!=-1) {
				System.out.println(new String(bytes,"utf-8"));
			}
			di.close();
			
		} catch (MalformedURLException e) {
			// 
			e.printStackTrace();
		} catch (IOException e) {
			// 
			e.printStackTrace();
		}
	}
	
	public static void gethtmlbyurlcon()
	{
		try {
			URL url = new URL("http://www.csdn.org");
			URLConnection urlcon = url.openConnection();
			DataInputStream di = new DataInputStream(url.openStream());
			byte [] bytes = new byte[1024];
			while (di.read(bytes, 0, 1024)!=-1) {
				System.out.println(new String(bytes,"utf-8"));
			}
			di.close();
		} catch (MalformedURLException e) {
			// 
			e.printStackTrace();
		} catch (IOException e) {
			// 
			e.printStackTrace();
		}
	}
	public static void main(String[] args) {
		GetHtmlByUrl.gethtmlbyurlcon();
	}
}