<?php
|
|
/**
|
|
* TrieTree.php
|
|
* Algorithm Trie-tree php implementation.
|
|
* @version 150609:4
|
|
* @author karminski <code.karminski@outlook.com>
|
|
*
|
|
* @changes
|
|
* 150609:4 fix reference issue, ADD importTree() retval.
|
|
* 150528:3 add subContain().
|
|
*/
|
|
|
|
/**
|
|
* class TrieTree
|
|
*/
|
|
class TrieTree{
|
|
|
|
// const
|
|
const SHORT_SEARCH = true;
|
|
const NOT_SHORT_SEARCH = false;
|
|
|
|
// properties
|
|
public $tree = array();
|
|
|
|
/**
|
|
* [flushTree description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function flushTree(){
|
|
$this->tree = array();
|
|
}
|
|
|
|
/**
|
|
* [importTree description]
|
|
* @param [type] $tree [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function importTree(&$tree){
|
|
$this->tree = $tree;
|
|
if(empty($this->tree)) return false;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* [exportTree description]
|
|
* @param [type] $tree [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function exportTree(){
|
|
return $this->tree;
|
|
}
|
|
|
|
/**
|
|
* [insertAll description]
|
|
* @param [type] $data [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function insertAll($data){
|
|
foreach($data as $line){
|
|
$this->insert($line['word'], explode(",", $line['type']));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [insert description]
|
|
* @param [type] $utf8_str [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function insert($utf8_str = "", $type = array()){
|
|
// unset empty type
|
|
foreach($type as $serial => $line){
|
|
if(empty($line)){
|
|
unset($type[$serial]);
|
|
}
|
|
}
|
|
// insert tree
|
|
$chars = String::utf8Split(trim($utf8_str));
|
|
$chars[] = null; // null for end of thread
|
|
$count = count($chars);
|
|
$T = &$this->tree;
|
|
$last = $count-1;
|
|
for($i = 0;$i < $count;$i++){
|
|
$c = $chars[$i];
|
|
if(!array_key_exists($c, $T)){
|
|
$T[$c] = array(); // insert new char
|
|
}
|
|
// fill type
|
|
if($i === $last){
|
|
$T['__type'] = $type;
|
|
}
|
|
$T = &$T[$c];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [remove description]
|
|
* @param [type] $utf8_str [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function remove($utf8_str){
|
|
$chars = String::utf8Split($utf8_str);
|
|
$chars[] = null;
|
|
if($this->_find($chars)){ // match the head char
|
|
$chars[] = null;
|
|
$count = count($chars);
|
|
$T = &$this->tree;
|
|
for($i = 0;$i < $count;$i++){
|
|
$c = $chars[$i];
|
|
if(count($T[$c]) == 1){ // only this thread
|
|
unset($T[$c]);
|
|
return;
|
|
}
|
|
$T = &$T[$c];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [_find description]
|
|
* @param [type] &$chars [description]
|
|
* @return [type] [description]
|
|
*/
|
|
private function _find(&$chars){
|
|
$string = "";
|
|
$count = count($chars);
|
|
$T = &$this->tree;
|
|
for($i = 0;$i < $count;$i++){
|
|
$c = $chars[$i];
|
|
if(empty($c)){
|
|
return explode(",", $this->recrusiveStr($T, $string));
|
|
}
|
|
$T = &$T[$c];
|
|
$string .= $c;
|
|
}
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* [recrusiveStr description]
|
|
* @param [type] $tree [description]
|
|
* @param [type] $results [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function recrusiveStr($tree, $results){
|
|
if(empty($tree)) return false;
|
|
$r = "";
|
|
foreach($tree as $key => $subTree){
|
|
$results .= $key;
|
|
if(empty($subTree)) return $results.",";
|
|
$r .= $this->recrusiveStr($subTree, $results);
|
|
}
|
|
return $r;
|
|
}
|
|
|
|
/**
|
|
* [find description]
|
|
* @param [type] $utf8_str [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function find($utf8_str){
|
|
$chars = String::utf8Split($utf8_str);
|
|
$chars[] = null;
|
|
return $this->_find($chars);
|
|
}
|
|
|
|
/**
|
|
* [subContain description]
|
|
* @param [type] &$chars [description]
|
|
* @param [type] $len [description]
|
|
* @param [type] &$Tree [description]
|
|
* @param [type] &$hit_tmp [description]
|
|
* @param [type] &$hit_words [description]
|
|
* @param [type] $short_search [description]
|
|
* @return [type] [description]
|
|
*/
|
|
private static function subContain(&$chars, $len, &$Tree, &$hit_tmp, &$hit_words, $short_search){
|
|
for($i = 0; $i<$len; $i++){
|
|
$c = $chars[$i];
|
|
if(array_key_exists($c, $Tree)){
|
|
$T = &$Tree[$c];
|
|
for($j = $i + 1;$j < $len;$j++){
|
|
$c = $chars[$j];
|
|
$hit_tmp[] = $chars[$j-1];
|
|
// if end match
|
|
if(array_key_exists(null, $T)){
|
|
$hitWord = implode("", $hit_tmp);
|
|
if(!empty($hit_words[$hitWord])){
|
|
$hit_words[$hitWord]['hits'] ++;
|
|
}else{
|
|
$hit_words[$hitWord] = array(
|
|
'hits' => 1,
|
|
'type' => $T['__type'],
|
|
);
|
|
}
|
|
if($short_search){
|
|
$hit_tmp = array();
|
|
return true;
|
|
}
|
|
}
|
|
// if wildcard
|
|
if(array_key_exists(" ", $T)){
|
|
$T = &$T[" "];
|
|
$hit_tmp[] = " ";
|
|
self::subContain($chars, $len, $T, $hit_tmp, $hit_words, true);
|
|
$hit_tmp = array();
|
|
continue;
|
|
}
|
|
// if miss match
|
|
if(!array_key_exists($c, $T)){
|
|
array_pop($hit_tmp);
|
|
break;
|
|
}
|
|
$T = &$T[$c];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [contain description]
|
|
* @param [type] $utf8_str [description]
|
|
* @param integer $do_count [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function contain($utf8_str, &$hit_words = array(), $short_search = false){
|
|
$chars = String::utf8Split($utf8_str);
|
|
$chars[] = null;
|
|
$len = count($chars);
|
|
$Tree = &$this->tree;
|
|
$count = 0;
|
|
$totalLoop = 0;
|
|
$hit_tmp = array();
|
|
self::subContain($chars, $len, $Tree, $hit_tmp, $hit_words, $short_search);
|
|
if(!empty($hit_words)){
|
|
return true;
|
|
}else{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [containAll description]
|
|
* @param [type] $str_array [description]
|
|
* @return [type] [description]
|
|
*/
|
|
public function containAll($str_array){
|
|
foreach($str_array as $str){
|
|
if($this->contain($str)){
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|