中文 分词 概念 和 实现 代码 Chinese word segmentation concept and code 2014/8/16 Baofa Chinese word segmentation system Download
create after data[title,content] Chinese word segmentation system et_conversation[$Id,title] new table et_cnword[$Id,title,content] et_post[$Id,content] chinese word tran to urlencode search[keyword] urlencode matching worked fulltext! [association of $Id] Chinese word segmentation system chinese keyword tran to urlencode [view] show result concept Insert Serach
public static function get_idx($words) { require PATH_LIBRARY.'/scws/pscws4.class.php'; $so = new PSCWS4(); $so->set_dict(PATH_LIBRARY.'/scws/dict/dict.xdb'); $so->set_rule(PATH_LIBRARY.'/scws/etc/rules.utf8.ini'); $so->set_charset('utf8'); $so->set_ignore(true); $output = ''; $so->send_text($key); while ($tmp = $so->get_result()) { foreach ($tmp as $item) { $output.= $item['word']. ' '; } $so->close(); // delete empty array $data = array_filter(explode(" ",$output)); // delete duplicates $data = array_flip(array_flip($data)); // string to urlencode part $data_code = ''; foreach ($data as $ss) { if (strlen($ss) > 1) { $data_code.= $ss. ' '; //$data_code.= str_replace('%','',urlencode($ss)). ' '; } return $data_code; } Chinese word segmentation system Class
public function add($title,$detail, $askerid) { $date= NOW; $sql= 'INSERT INTO questions '. '(title, detail, askerid, date) '. 'values '. "('$title', '$detail', $askerid, '$date')"; $result= $this->db->query($sql); $id= $this->db->lastId(); // Chinese word segmentation Class $title_idx= CWS::get_idx($title); $detail_idx= CWS::get_idx(strip_tags($detail)); $sql= 'INSERT INTO questions_idx '. '(id, title, detail) '. 'values '. "($id, '$title_idx', '$detail_idx')"; $this->db->query($sql); return$result; } Chinese word segmentation system Insert
public function search($word,$limit) { $word= CWS::get_idx($word); $sql= "SELECT A.title, A.detail, askerid, date ". "FROM questions as A, questions_idx as B ". "WHERE A.id = B.id ". "AND MATCH (B.title, B.detail) AGAINST ('$word')"; $result= $this->db->getAll($sql,$limit); return$result; } Chinese word segmentation system Search