日期:2014-05-17  浏览次数:20911 次

【爬虫】使用Post方法爬取网页,但是获取到的是原网页
从cnblogs的站内搜索网页提交post表单,爬取cnblogs返回的博客链接。将带有搜索结果的页面保存到文件中。
Java code

package postMethod;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.PostMethod;

public class Post {
    /**
     * @param args
     */
    private static HttpClient httpClient = new HttpClient();
    public static void main(String[] args) throws IOException
    {
        String path = "http://zzk.cnblogs.com/";
        InputStream input = null;
        OutputStream output = null;
        // TODO Auto-generated method stub
        //得到post方法
        PostMethod postMethod = new PostMethod(path);
        //设置post方法的参数
        NameValuePair[] postData = new NameValuePair[1];
        postData[0] = new NameValuePair("w","java");
        postMethod.addParameters(postData);
        
        //执行,返回状态码
        int statusCode = 0;
        try {
            statusCode = httpClient.executeMethod(postMethod);
        } catch (HttpException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println(statusCode);
        //针对状态码进行处理
        
        if(statusCode == HttpStatus.SC_OK)
        {
            input = postMethod.getResponseBodyAsStream();
            String filename = "aaaa.html";

            output = new FileOutputStream(filename);
            
            int tempByte = -1;
            while ((tempByte = input.read()) > 0)
            {
                output.write(tempByte);
            }
            if(input != null)
            {
                input.close();
            }
            if(output != null)
            {
                output.close();
            }
        }
    }
}



返回码是200,搜索框的ID和name都是w,程序执行后查看保存的文件,发现还是原来的搜索主页,没有搜索结果。
各路大神求教啊~~

------解决方案--------------------
用get方式

Java code

   private static HttpClient httpClient = new HttpClient();
    public static void main(String[] args) throws IOException
    {
        String path = "http://zzk.cnblogs.com/s?w=java";
        InputStream input = null;
        OutputStream output = null;
        // TODO Auto-generated method stub
        //得到post方法
        GetMethod postMethod = new GetMethod(path);
        //设置post方法的参数
//        NameValuePair[] postData = new NameValuePair[1];
//        postData[0] = new NameValuePair("w","java");
//        postMethod.addParameters(postData);
        
        //执行,返回状态码
        int statusCode = 0;
        try {
            statusCode = httpClient.executeMethod(postMethod);
        } catch (HttpException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println(statusCode);
        //针对状态码进行处理
        
        if(statusCode == HttpStatus.SC_OK)
        {
            input = postMethod.getResponseBodyAsStream();
            String filename = "D:/aaaa.html";

            output = new FileOutputStream(filename);
            
            int tempByte = -1;
            while ((tempB