日期:2014-05-17  浏览次数:20576 次

PHP采集代码
采集http://www.01job.cn/asp/itjob.asp该页面中职位列表头三条的记录 
看了很多的PHP采集教程了 还是不会写 所以这里请教了....
以下是我的代码,运行时了错: 
<? 
include("function.php"); 
$url="http://www.01job.cn/asp/itjob.asp"; 
$ft["title"]["begin"]="<table width='620' border='0' cellspacing='0' cellpadding='5'>"; 
$ft["title"]["end"]="</table>"; 
$rs=pick($url,$ft,$th); 
echo $rs["title"]; 
echo "<br />内容:".$rs["content"]; 
?>
function.php
<?php
//获取网页内容
Function fetch_urlpage_contents($url){
$c=file_get_contents($url);
return $c;
}

//获取匹配内容
Function fetch_match_contents($begin,$end,$c)
{
$begin=change_match_string($begin);
$end=change_match_string($end);
if(@preg_match("/{$begin}(.*?){$end}/i",$c,$rs))
{return $rs[1];}
else {return "";}
}

//转义正则表达式字符串
Function change_match_string($str){
//注意,以下只是简单转义
$old=array("/","$");
$new=array("\/","\$");
$str=str_replace($old,$new,$str);
return $str;
}

//采集网页
Function pick($url,$ft,$th)
{
$c=fetch_urlpage_contents($url);
foreach($ft as $key => $value)
  {
$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c);
if(is_array($th[$key]))
  { foreach($th[$key] as $old => $new)
  {
  $rs[$key]=str_replace($old,$new,$rs[$key]);
  }
  }
  }
return $rs;
}

?>


------解决方案--------------------
<?
//获取网页内容 
Function fetch_urlpage_contents($url){ 
for($i=0;$i<10;$i++)
{
$c=@file_get_contents($url); 
if(trim($c) != "")break;
}
// print($c);
return $c; 
}

//获取匹配内容 
Function fetch_match_contents($begin,$end,$c) 
{
$beginPos = strpos($c,$begin);
$endPos = strpos($c,$end);
if($beginPos > 0 && $endPos > 0 && $endPos > $beginPos)
{
$result = substr($c,$beginPos+strlen($begin),$endPos - $beginPos-strlen($begin));
return $result;
}
else
{
return "";
}


//采集网页 
Function pick($url,$ft,$th) 

$c=fetch_urlpage_contents($url); 
foreach($ft as $key => $value)

$rs[$key]=fetch_match_contents($value["begin"],$value["end"],$c); 
if(is_array($th[$key])) 
{ foreach($th[$key] as $old => $new) 

$rs[$key]=str_replace($old,$new,$rs[$key]); 



return $rs; 

?>

<html>
<title>caiji</title>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
</head>
<body>
<?

$url="http://www.01job.cn/asp/itjob.asp"; 
$ft["title"]["begin"]="<title>"; 
$ft["title"]["end"]="</title>"; 
$rs=pick($url,$ft,$th); 
print_r($rs);
?> 
</body>
</html>
------解决方案--------------------
写个例子。。
PHP code

$content = file_get_contents('http://www.01job.cn/asp/itjob.asp');
preg_match_all('/<a .*? class="Pos">(.*)<\/a>/',$content,$arr);
print_r($arr);