日期:2014-05-17 浏览次数:20526 次
iconv_set_encoding("internal_encoding", "utf-8");
iconv_set_encoding("output_encoding", "gbk");
ob_start("ob_iconv_handler");
$fn = '如何在一篇文章内拆分和匹配关键字_例文.txt';
$p = new T;
$ar = $p->parse($fn);
print_r($ar);
//print_r($p->dat);
class T {
var $maxlen = 4;//最大组词长度
var $dat = array();
var $dict = array();
function get($offs=0) {
if($this->i + $offs >= $this->len
------解决方案--------------------
$offs >= $this->maxlen) return false;
$ch = $this->doc[$this->i + $offs];
if(in_array($ch, $this->dict)) return false;
return $ch;
}
function parse($filename) {
$this->dict = explode('
------解决方案--------------------
', iconv('gbk', 'utf-8', ',
------解决方案--------------------
。
------解决方案--------------------
;
------解决方案--------------------
:
------解决方案--------------------
“
------解决方案--------------------
”
------解决方案--------------------
?'));
$s = file_get_contents($filename);
$s = iconv('gbk', 'utf-8', $s);
preg_match_all('/./u', $s, $r);
$this->doc = $r[0];
$this->i = 0;
$this->len = count($this->doc);
while($this->i < $this->len) {
if(($ch = $this->get()) !== false) {