日期:2014-05-17 浏览次数:20801 次
class grep extends Controller {
var $tableName = 'grep';
var $pagesize =31;
var $order_string = "grep_order desc,grep_id desc";
var $filter_field = "grep_title";
var $check_repeat_field = "grep_title";
var $buttons = array(
);
var $description = "[爬取小说]";
function index()
{
//get the story list
$story_model = "story_model";
$this->load->model($story_model);
$where = array("story_id < 445");
$rows_story = $this->$story_model->get($where);
foreach ($rows_story as $key=>$val_story):
if($key < 237) continue;
$url = "http://www.xiaoxiaoshuo.net/".$val_story->storycate_vtitle."/".$val_story->story_vtitle;
$src_content = file_get_contents($url);
$src_content = iconv("GBK","utf-8//IGNORE",$src_content);
$src_content = str_replace("/style=\"border-width:0px\s*1px\s*1px\s*0px;border-color:#C8D8B8;border-style:solid;padding:3px;float:left;width:313px;\"/i","",$src_content);
$src_content = str_replace("style=\"BORDER-RIGHT: #c8d8b8 1px solid; PADDING-RIGHT: 3px; BORDER-TOP: #c8d8b8 0px solid; PADDING-LEFT: 3px; FLOAT: left; PADDING-BOTTOM: 3px; BORDER-LEFT: #c8d8b8 0px solid; WIDTH: 313px; PADDING-TOP: 3px; BORDER-BOTTOM: #c8d8b8 1px solid\"","",$src_content);
$src_content = preg_replace("/title=\"[^\"]*\"/iU","",$src_content);
$src_content = preg_replace("/title=\"[^\"]*\"/iU","",$src_content);
$src_content = preg_replace("/<LI[^>]*>/iU","",$src_content);
$src_content = preg_replace("/<\/LI[^>]*>/iU","",$src_content);
$src_content = preg_replace("/<a(?!href)[\d\D]*href/iU","<a href",$src_content);
$src_content = preg_replace('/\s(?=\s)/', '', $src_content);
$src_content = preg_replace('/[\n\r\t]/', ' ', $src_content);
$src_content = preg_replace('/[\n\r\t]/', ' ', $src_content);
$src_content = str_replace("http://www.xiaoxiaoshuo.net/yanqingxiaoshuo2/tijiaxinniang/","",$src_content);
preg_match_all("/<td\s*bgcolor=\"#EDF5EA\"([\d\D]*)<\/ul>/iU",$src_content,$arr_dstorycate);
$dstorycate_arr = $arr_dstorycate[1];
foreach ($dstorycate_arr as $key_dstorycate => $val_dstory_cate)
{
preg_match_all("/<font\s*color=\"#000000\">([^<]*)<\/font>/i",$val_dstory_cate,$dcate_title);
$datacate["dstorycate_pid"] = $val_story->story_id;
$datacate["dstorycate_title"] = $dcate_title[1][0];
//获取类别对象,记将之前的类别标置为已下载
$dtitle =$datacate["dstorycate_title"];
$obj_storycate = $this->check_dcate($dtitle,$val_story );
//pr($obj_storycate);
if($obj_storycate->dstorycate_ishot == 1)
{
$this->log( "<font color = gray>已此章节已抓取完 $val_story->story_title - $dtitle </font>,跳过");