日期:2012-02-19  浏览次数:20565 次

 都说阿里巴巴有不能采集和防采集的神话,今天就用张老师讲的Curl采集写了一个模拟浏览器的代码。没有不可能只有不去做,哈哈

<?php

set_time_limit(0);

function _rand() {

$length=26;

$chars = "0123456789abcdefghijklmnopqrstuvwxyz";

$max = strlen($chars) - 1;

mt_srand((double)microtime() * 1000000);

$string = '';

for($i = 0; $i < $length; $i++) {

$string .= $chars[mt_rand(0, $max)];

}

return $string;

}

$HTTP_SESSION=_rand();

$HTTP_SESSION;

$HTTP_Server="search.china.alibaba.com";

$HTTP_URL="/company/k-%CB%AE%CB%AE%CB%AE_n-y.html";

$ch = curl_init();

curl_setopt ($ch,CURLOPT_URL,"http://".$HTTP_Server.$HTTP_URL);

curl_setopt($ch,CURLOPT_RETURNTRANSFER,true);

curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");

$res = curl_exec($ch);

curl_close ($ch);

print_r($res);

?>