日期:2014-05-17  浏览次数:20653 次

用stream_系列函数实现远程文件本地化,如何防止卡死
还是我前两天写的代码,已经用于采集内容了,今天用来改进采集到的内容中的图片等远程文件的本地化,有时候还是会卡死,高手看看如何能防止卡死。
代码如下,这个代码还是提供两种方式实现本地化进行比较,时事证明这种流方式实现效率是提高了。
PHP code

<?php
$dir = str_replace('\\', '/', dirname(__FILE__)) . '/';
$timeStart = microtime(true);
$data = '';
$urls = array('http://www.jxcms.com/upload/2011/0518/0337319304.jpg', 'http://www.jxcms.com/upload/2011/0310/0951568835.jpg', 'http://www.tzksgs.com/upload/banner1.jpg', 'http://www.tzksgs.com/upload/banner2.jpg');
foreach($urls as $url) {
    copy($url, $dir . 'a_' . basename($url));
}
$timeEnd = microtime(true);
echo sprintf("Spend time: %s second(s)\n", $timeEnd - $timeStart), '<br>';
$timeStart = microtime(true);
function getMoreContent($urls) {
    $timeout = 30;
    $rs = array();
    $sockets = array();
    $userAgent = $_SERVER['HTTP_USER_AGENT'];
    foreach($urls as $id => $url) {
        $tmp = parse_url($url);
        $host = $tmp['host'];
        $path = isset($tmp['path'])?$tmp['path']:'/';
        empty($tmp['query']) or $path .= '?' . $tmp['query'];
        if (empty($tmp['port'])) {
            $port = $tmp['scheme'] == 'https'?443:80;
        } else $port = $tmp['port'];
        $fp = stream_socket_client("$host:$port", $errno, $errstr, $timeout);
        if ($fp) {
            $rs[$id] = '';
            $sockets[$id] = $fp;
            fwrite($fp, "GET $path HTTP/1.1\r\nHost: $host\r\nUser-Agent: $userAgent\r\nConnection: Close\r\n\r\n");
        }
    }
    // Now, wait for the results to come back in
    while (count($sockets)) {
        $read = $sockets;
        // This is the magic function - explained below
        if (stream_select($read, $write = null, $e = null, $timeout)) {
            // readable sockets either have data for us, or are failed connection attempts
            foreach ($read as $r) {
                $id = array_search($r, $sockets);
                $data = fread($r, 8192);
                if (strlen($data) == 0) {
                    fclose($r);
                    $tmp = explode("\r\n\r\n", $rs[$id], 2);
                    $rs[$id] = strpos($tmp[0], '200')?$tmp[1]:'';
                    unset($sockets[$id]);
                } else $rs[$id] .= $data;
            }
        }
    }
    return $rs;
}
$rs = getMoreContent($urls);
foreach($rs as $k => $v) {
    @file_put_contents($dir . 'b_' . basename($urls[$k]), $v);
}
$timeEnd = microtime(true);
echo sprintf("Spend time: %s second(s)\n", $timeEnd - $timeStart);

?>





------解决方案--------------------
由于php没有计时器,所以你不可能中断一个没有中断接口的函数的执行
所以底层函数都只适合于理想的条件下

我依然建议你使用多道的curl来完成你的工作
PHP code
$urls = array(
 'http://www.jxcms.com/upload/2011/0518/0337319304.jpg',
 'http://www.jxcms.com/upload/2011/0310/0951568835.jpg',
 'http://www.tzksgs.com/upload/banner1.jpg',
 'http://www.tzksgs.com/upload/banner2.jpg'
);
$mh = curl_multi_init();

foreach ($urls as $i => $url) {
       $conn[$i] = curl_init($url);
       curl_setopt($conn[$i], CURLOPT_RETURNTRANSFER,1);
       curl_multi_add_handle($mh, $conn[$i]);
}

do {
  curl_multi_exec($mh, $active);
  //在这个循环中,你有机会中断程序的执行。curl_getinfo提供了连接的各种信息
}while($active);

foreach ($urls as $i => $url) {
  $fn = basename($url);
  file_put_contents($fn, curl_multi_getcontent($conn[$i]));
  curl_close($conn[$i]);
}