日期:2014-05-17  浏览次数:20478 次

[原创]为找房方便 ,将sohu焦点的 rss 改成全文的.
房价貌似快要涨了,俺最近天天加班,俺媳妇来负责找房源,可她那狗屎公司无法上网,只好用ipad 加载rss 离线看,结果狗屎sohu焦点的rss 非全文rss ,为了老婆找房方便,写了点代码,用于生成全文的rss. 希望借此机会,尝试一下代码改善生活的感觉. 不废话吧,直接贴代码了.
PHP code

<?php
header("Content-type: text/html; charset=utf-8");
ini_set("max_excution_time"  ,3600);
$url = "http://sz.focus.cn/common/xml/rss/news/hot.php";

$newItemArr = parseItems( $url );
$xmlItemContent = makeRssItemString( $newItemArr ); //重新生成item 内容,将下载内容替换descript.

//rss xml 的头尾字符串
$xmlPre = '
<?xml version="1.0" encoding="GBK"?>
<?xml-stylesheet type="text/xsl" href="/common/xml/xsl/rss.xsl" media="all"?>
<?xml-stylesheet type="text/css" href="/common/xml/css/rss.css" media="all"?>

<rss version="2.0"
xmlns:focus="http://house.focus.cn"
xmlns:F="http://house.focus.cn"
docType="rss"
F:docType="rss"
>
<channel
pageIndex=""
pageSize="20"
recordCount=""
F:pageIndex=""
F:pageSize="20"
F:recordCount="">
<image>
<title><![CDATA[热点新闻-搜狐焦点网深圳站 ]]></title>
<link>http://sz.focus.cn/newscenter/xwsy.html</link>
<url>http://images.house.focus.cn/img/newhouselogo.gif</url>
</image>
<title>热点新闻-搜狐焦点网深圳站</title>
<link>http://sz.focus.cn/newscenter/xwsy.html</link>
<description><![CDATA[提供最全面最及时的中国房地产新闻资讯 ]]></description>
<copyright>Copyright 2012, sohu.com Inc., all rights reserved</copyright>
<language>zh-cn</language>
<lastBuildDate>Mon, 02 Jul 2012 21:44:21 +0800</lastBuildDate>
<pubDate>Mon, 02 Jul 2012 21:44:21 +0800</pubDate>
<category>地产综合</category>
<ttl>30</ttl>';
$xmlAppend = '</channel></rss>';


$xmlOutput = $xmlPre . $xmlItemContent . $xmlAppend ;
echo $xmlOutput;


/**
 * @param $url
 * @return array itemObj
 */
function parseItems( $url )
{
    $xmlObj = simplexml_load_file( $url );
    $items = $xmlObj -> channel -> item;

    $resultArr = array();

    foreach( $items as  $item ):
        $resultArr []= makeRssItemObj( $item );
    endforeach;

    return $resultArr;
}



/**
 * @param $item
 * @return itemObj
 */
function makeRssItemObj( $item )
{
 //   $item->link = "demo.html";
    $urlContent = fileGetContent( $item->link );  //如果不支持curl 就用file_get_content 或者socket 连接来实现.

    $urlContent = @iconv("gb2312","utf-8",$urlContent);

   // $content = iconv("gb2312" , "utf-8" , $content);
    preg_match("/id=\"newscontent\">([\d\D]+)<div\s+class=\"clear\">/iU" , $urlContent , $matchArr);

    $item->description = $matchArr[1];
    return $item;
}

/**
 * @param $itemObjArr
 * @return string
 */
function makeRssItemString( $itemObjArr )
{
   $result = "";

   foreach( $itemObjArr as $itemObj  ):
       $result .= "<item>";
           $result .= "<title><![CDATA[ $itemObj->title ]]></title>";
           $result .= "<link>$itemObj->link</link>";
           $result .= "<description><![CDATA[ $itemObj->description ]]></description>";
           $result .= "<author>mu_rain</author>";
           $result .= "<pubDate>$itemObj->pubDate</pubDate>";
       $result .= "<item>";
   endforeach;
    return  $result;
}


// ------------------------------------
/**
 * regulary show the string or object or json.
 * 规格化显示
 *
 * @param  $str    对象的实例
 * @package        P
 * @subpackage    String
 * @category    Putils
 * @author