<?php
Wind::import('SRV:word.srv.filter.PwFilterAction');

/**
 * дʹ㷨
 *
 * @author jinlong.panjl <jinlong.panjl@aliyun-inc.com>
 * @copyright 2003-2103 phpwind.com
 * @license http://www.phpwind.com
 * @version $Id$
 * @package wind
 */
class PwFilterDfa extends PwFilterAction {

	public $nodes;
	
	public function __construct($nodes = '') {
		$nodes && $this->nodes = $nodes;
	}
	
	/**
	 * дֵ
	 *
	 * @param array $words
	 * @return array
	 */
	public function createData($words) {
		$this->nodes = array( array(false, array()) ); //ʼӸڵ
		$p = 1; //һҪĽڵ
		foreach ($words as $word) {
			$cur = 0; //ǰڵ
			list($word, $type, $replace) = $this->split($word);
			for ($len = strlen($word), $i = 0; $i < $len; $i++) {
				$c = ord($word[$i]);

				if (isset($this->nodes[$cur][1][$c])) { //Ѵھ
					$cur = $this->nodes[$cur][1][$c];
					continue;
				}
				$this->nodes[$p]= array(false, array()); //½ڵ
				$this->nodes[$cur][1][$c] = $p; //ڸڵ¼ӽڵ
				$cur = $p; //ѵǰڵΪ²
				$p++; //
			}
			$this->nodes[$cur][0] = true; //һʽҶӽڵ
			$this->nodes[$cur][2] = $type; //д
			$this->nodes[$cur][3] = trim($replace);  //滻д
		}
		return $this->nodes;
	}

	function split($str) {
		if (($pos = strrpos($str, '|')) === false) {
			return array($str, 0);
		}
		return explode('|',$str);
	}
	
	/**
	 * дֵ
	 *
	 * @param array $nodes
	 */
	public function saveData($nodes) {
		WindFolder::mkRecur($this->file);
		WindFile::write($this->file.'/word.txt', serialize($nodes));
	}
	
	/**
	 * д | дֱӷtrue
	 *
	 * @param string $s
	 * @return bool
	 */
   public function check($s) {  //ֱʾ
   		$charset = Wekit::V('charset');
		$charset = str_replace('-', '', strtolower($charset));
        $isUTF8 = ($charset == 'utf8') ? true : false;
        $ret = array();
        $cur = 0; //ǰڵ㣬ʼΪڵ
        $i = 0; //ַǰƫ
        $p = 0; //ַλ
        $len = strlen($s);
        while($i < $len) {
            $c = ord($s[$i]);
            if (isset($this->nodes[$cur][1][$c])) { //
                $cur = $this->nodes[$cur][1][$c]; //Ƶǰڵ
                if ($this->nodes[$cur][0]) { //Ҷӽڵ㣬ƥ䣡
                    return true;
                }
				$i++; //һַ
            } else { //ƥ
				$cur = 0; //õǰڵΪڵ
                if (!$isUTF8 && ord($s[$p]) > 127 && ord($s[$p+1]) > 127) {
					$p += 2; //һλ
				} else {
					$p += 1; //һλ
				}
				$i = $p; //ѵǰƫΪλ
            }
        }
        return false;    
    }
	
	/**
	 * д | дʲ
	 *
	 * @param string $s
	 * @return array
	 */
    public function match($s) {
   		$charset = Wekit::V('charset');
		$charset = str_replace('-', '', strtolower($charset));
        $isUTF8 = ($charset == 'utf8') ? true : false;
        $ret = array();
        $cur = 0; //ǰڵ㣬ʼΪڵ
        $i = 0; //ַǰƫ
        $p = 0; //ַλ
        $len = strlen($s);
        $type = array();
        while($i < $len) {
            $c = ord($s[$i]);
            if (isset($this->nodes[$cur][1][$c])) { //
                $cur = $this->nodes[$cur][1][$c]; //Ƶǰڵ
                if ($this->nodes[$cur][0]) { //Ҷӽڵ㣬ƥ䣡
                	$type[] = $this->nodes[$cur][2];
                    $ret[$p] = substr($s, $p, $i - $p + 1); //ȡƥλúƥĴԼϢ
                    $p = $i + 1; //һλ
                    $cur = 0; //õǰڵΪڵ
                }
				$i++; //һַ
            } else { //ƥ
				$cur = 0; //õǰڵΪڵ
                if (!$isUTF8 && ord($s[$p]) > 127 && ord($s[$p+1]) > 127) {
					$p += 2; //һλ
				} else {
					$p += 1; //һλ
				}
				$i = $p; //ѵǰƫΪλ
            }
        }
        $type && $minType = min($type);
        return array($minType,$ret);    
    }

    /**
     * 滻д
     * 
     * @param string $s  Ҫҵı
     * @return string $s 滻ı
     */
 	public function replace($s) {
   		$charset = Wekit::V('charset');
		$charset = str_replace('-', '', strtolower($charset));
        $isUTF8 = ($charset == 'utf8') ? true : false;
        $ret = array();
        $cur = 0; //ǰڵ㣬ʼΪڵ
        $i = 0; //ַǰƫ
        $p = 0; //ַλ
        $len = strlen($s);
        while($i < $len) {
            $c = ord($s[$i]);
            if (isset($this->nodes[$cur][1][$c])) { //
                $cur = $this->nodes[$cur][1][$c]; //Ƶǰڵ
                if ($this->nodes[$cur][0]) { //Ҷӽڵ㣬ƥ䣡
                    $s = substr_replace($s, $this->nodes[$cur][3], $p, $i - $p + 1); //ȡƥλúƥĴԼʵȨ
                    $p = $i + 1; //һλ
                    $cur = 0; //õǰڵΪڵ
                }
				$i++; //һַ
            } else { //ƥ
				$cur = 0; //õǰڵΪڵ
                if (!$isUTF8 && ord($s[$p]) > 127 && ord($s[$p+1]) > 127) {
					$p += 2; //һλ
				} else {
					$p += 1; //һλ
				}
				$i = $p; //ѵǰƫΪλ
            }
        }
        
        return $s;    
    }
}