?
HttpURLConnection 和HttpClient+Jsoup处理标签抓取页面和模拟登录
博客分类: httpclient
HttpURLConnectionHttpClientJsoup
HttpURLConnection抓取
Java代码 收藏代码
package com.app.html;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
public class Html {
private static final String loginURL = "http://login.goodjobs.cn/index.php/action/UserLogin";
private static final String forwardURL = "http://user.goodjobs.cn/dispatcher.php/module/Personal/?skip_fill=1";
/**
* 获取登录页面请求
* @param loginUrl登录URL
* @param params登录用户名/密码参数
* @throws Exception
*/
public static String createHtml(String...params)throws Exception{
URL url = new URL(loginURL);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
loginHtml(conn, params);
return forwardHtml(conn,url);
}
/**
* 登录页面
* @param conn
* @param params登录用户名/密码参数
* @throws Exception
*/
private static void loginHtml(HttpURLConnection conn, String... params)
throws Exception {
OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream(), "GBK");
StringBuffer buff=new StringBuffer();
buff.append("memberName="+URLEncoder.encode(params[0], "UTF-8"));//页面用户名
buff.append("&password="+URLEncoder.encode(params[1],"UTF-8"));//页面密码
out.write(buff.toString());//填充参数
out.flush();
out.close();
}
/**
* 转向到定向的页面
* @param conn连接对象
* @param url重新定向请求URL
* @param toUrl定向到页面请求URL
* @throws Exception
*/
public static String forwardHtml(HttpURLConnection conn,URL url)throws Exception{
//重新打开一个连接
String cookieVal = conn.getHeaderField("Set-Cookie");
url = new URL(forwardURL);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Foxy/1; .NET CLR 2.0.50727;MEGAUPLOAD 1.0)");
conn.setFollowRedirects(false);//置此类是否应该自动执行 HTTP 重定向
// 取得cookie,相当于记录了身份,供下次访问时使用
if (cookieVal != null) {
//发送cookie信息上去,以表明自己的身份,否则会被认为没有权限
conn.setRequestProperty("Cookie", cookieVal);
}
conn.connect();
InputStream in = conn.getInputStream();
BufferedReader buffReader = new BufferedReader( new InputStreamReader(in,"GBK"));
String line = null;
String content = "";
while ((line = buffReader.readLine()) != null) {
content +="\n" +line;
}
//IOUtils.write(result, new FileOutputStream("d:/index.html"),"GBK");
write(content, "d:/forward.html");
buffReader.close();
return content;
}
/**
*
* @param content
* @param htmlPath
* @return
*/
public static boolean write(String content, String htmlPath) {
boolean flag = true;
try {
Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(htmlPath), "GBK"));
out.write("\n" + content);
out.close();
} catch (FileNotFoundException ex) {
ex.printStackTra