<?php 
class SplitWord
{
	var $RankDic = Array();
	var $OneNameDic = Array();
	var $TwoNameDic = Array();
	var $NewWord = Array();
	var $SourceString = '';
	var $ResultString = '';
	var $SplitChar = ' '; //ָ
	var $SplitLen = 4; //ʳ
	var $EspecialChar = "||";
	var $NewWordLimit = "|||||||||||||||";
	
	//԰Ҫ볣õʣ
	//һǷΪЩʺһǷΪʣȻΪ
	var $CommonUnit = "|||ʱ||||Ԫ||ǧ|||λ|";
	
	var $CnNumber = "|||||||||||||||||||||||||||||||| ||||||||||||||||||||||||||||||||";
	var $CnSgNum = "һ|||||||||ʮ||ǧ|||";
	var $MaxLen = 13; //ʵ 7 ֵ֣Ϊֽ
	var $MinLen = 3;  //С 2 ֵ֣Ϊֽ
	var $CnTwoName = "ľ Ϲ  ԯ       ˾ͽ ˾ Ϲ ŷ       Ľ ˾ ĺ    ʸ ξ ";
	var $CnOneName = "Ǯ֣ʩſײϻκսлˮ˸ɷ³ΤﻨԬۺʷƷѦ׺ޱϺʱƤ뿵ԪƽҦۿëױ갼Ʒɴ̸éܼףϯǿ·¦Σͯչ÷ʢֵĲ﷮֧¾̹¬ĪѸɽӦڵʯ޼ťϻ½춻κӷഢɾθڽ͹ȳȫۭﱩղҶ˾۬輻ӡް׻̨Ӷ̼׿ɳܲ˫ݷ̷̼Ƚ۪ӺSɣţͨ༽ۣũ±ׯֳ̲Ľϰ°θ߾ӺⲽĿܹ»ڶŹεԽ¡ʦ˹ǼĿɳᳲ";
  //------------------------------
  //php4캯
  //------------------------------
  function SplitWord($loaddic=true){
  	$this->__construct($loaddic);
  }
  //------------------------------
  //php5캯
  //------------------------------
  function __construct($loaddic=true)
  {  	  	
  	if($loaddic)
  	{
  	  //ϴʵ
  	  for($i=0;$i<strlen($this->CnOneName);$i++){
  		  $this->OneNameDic[$this->CnOneName[$i].$this->CnOneName[$i+1]] = 1;
  		  $i++;
  	  }
  	  $twoname = explode(" ",$this->CnTwoName);
  	  foreach($twoname as $n){ $this->TwoNameDic[$n] = 1; }
  	  unset($twoname);
  	  unset($this->CnTwoName);
  	  unset($this->CnOneName);
  	  //߼ִʣԤʵִʸٶ
  	  $dicfile = l_path.'./inc/wwwdic.csv'; 
  	  $fp = fopen($dicfile,'r');
  	  while($line = fgets($fp,64)){
  		  $ws = explode(' ',$line);
  		  $this->RankDic[strlen($ws[0])][$ws[0]] = $ws[1];
  	  }
  	  fclose($fp);
    }//Ƿʵ䣬Ҫ÷ִʹܣԲ롣
  }
  
  //--------------------------
  //Դ
  //--------------------------
  function Clear()
  {
  	unset($this->RankDic);
  }
  
  //----------------------------
  //Դַ
  //----------------------------
  function SetSource($str){
  	$this->SourceString = trim($this->ReviseString($str));
  	$this->ResultString = "";
  }
  
  //-----------------------------
  //ַǷ񲻴
  //-----------------------------
  function NotGBK($str)
  {
    if($str=="") return "";
    //ΪֵַʱѾ,˲Ҫַ
  	if( ord($str[0])>0x80 ) return false;
  	else return true;
  }
  //-----------------------------
  //RMMִ㷨
  //-----------------------------
  function SplitRMM($str="",$tryNumName=true,$tryDiff=true){
  	if($str!="") $this->SetSource(trim($str));
  	if($this->SourceString=="") return "";
  	//ıдַ
  	$this->SourceString = $this->ReviseString($this->SourceString);
  	//ضıз
  	$spwords = explode(" ",$this->SourceString);
  	$spLen = count($spwords);
  	$spc = $this->SplitChar;
  	for($i=($spLen-1);$i>=0;$i--){
  		if(trim($spwords[$i])=="") continue;
  		if($this->NotGBK($spwords[$i])){
  			$this->ResultString = $spwords[$i].$spc.$this->ResultString;
  		}
  		else
  		{
  		  if(isset($spwords[$i][1])) $c = $spwords[$i][0].$spwords[$i][1];
  		  else{
  		  	$this->ResultString = $spwords[$i].$spc.$this->ResultString;
  		  	continue;
  		  }
  		  $n = hexdec(bin2hex($c));
  		  if($c=="") //
  		  { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
  		  else if($n>0xA13F && $n < 0xAA40) //
  		  { $this->ResultString = $spwords[$i].$spc.$this->ResultString; }
  		  else //̾
  		  {
  		  	if(strlen($spwords[$i]) <= $this->SplitLen)
  		  	{
  		  		//Ϊָʣ봦
  		  		if(preg_match("/".$this->EspecialChar."$/",$spwords[$i],$regs)){
  		  				$spwords[$i] = preg_replace("/".$regs[0]."$/","",$spwords[$i]).$spc.$regs[0];
  		  		}
  		  		//ǷΪõλ
  		  		if(!preg_match("/^".$this->CommonUnit."/",$spwords[$i]) || $i==0){
  		  			$this->ResultString = $spwords[$i].$spc.$this->ResultString;
  		  		}else{
  		  			$this->ResultString = $spwords[$i-1].$spwords[$i].$spc.$this->ResultString; 
  		  			$i--;
  		  		}
  		  	}
  		  	else{ 
  		  		$this->ResultString = $this->RunRMM($spwords[$i],$tryNumName,$tryDiff).$spc.$this->ResultString;
  		  	}
  		  }
  	  }
  	}
  	$this->ResultString = preg_replace("/ {1,}/"," ",$this->ResultString);
  	//return $this->ParNumber($this->ResultString);
  	return $this->ResultString;
  }
  //------------------------
  //Գʽʶ
  //------------------------
  function ParNumber($str){
  	if($str == "") return "";
  	$ws = explode(' ',$str);
  	$wlen = count($ws);
  	$spc = $this->SplitChar;
  	$reStr = "";
  	for($i=0;$i<$wlen;$i++){
  		if($ws[$i]=="") continue;
  		if($i>=$wlen-1) $reStr .= $spc.$ws[$i];
  		else{ $reStr .= $spc.$ws[$i]; }
    }
    return $reStr;
  }
  //-------------------------------
  //ʶʶ
  //--------------------------------
  function ParOther($WordArray)
  {
  	$wlen = count($WordArray)-1;
  	$rsStr = "";
  	$spc = $this->SplitChar;
  	for($i=$wlen;$i>=0;$i--)
  	{
  		//
  		if(preg_match("/".$this->CnSgNum."/",$WordArray[$i])){
  			$rsStr .= $spc.$WordArray[$i];
  			if($i>0 && preg_match("/^".$this->CommonUnit."/",$WordArray[$i-1]))
  			{ $rsStr .= $WordArray[$i-1]; $i--; }
  			else{
  				while($i>0 && preg_match("/".$this->CnSgNum."/",$WordArray[$i-1]))
  				{ $rsStr .= $WordArray[$i-1]; $i--; }
  			}
  			continue;
  		}
  		//˫
  		if(strlen($WordArray[$i])==4 && isset($this->TwoNameDic[$WordArray[$i]]))
  		{
  			$rsStr .= $spc.$WordArray[$i];
  			if($i>0&&strlen($WordArray[$i-1])==2){
  				$rsStr .= $WordArray[$i-1];$i--;
  				if($i>0&&strlen($WordArray[$i-1])==2){ $rsStr .= $WordArray[$i-1];$i--; }
  			}
  		}
  		//
  		else if(strlen($WordArray[$i])==2 && isset($this->OneNameDic[$WordArray[$i]]))
  		{
  			$rsStr .= $spc.$WordArray[$i];
  			if($i>0&&strlen($WordArray[$i-1])==2){
  				 if(preg_match("/".$this->EspecialChar."/",$WordArray[$i-1])) continue;
  				 $rsStr .= $WordArray[$i-1];$i--;
  				 if($i>0 && strlen($WordArray[$i-1])==2 &&
  				  !preg_match("/".$this->EspecialChar."/",$WordArray[$i-1]))
  				 { $rsStr .= $WordArray[$i-1];$i--; }
  			}
  		}
  		//ͨʻ
  		else{
  			$rsStr .= $spc.$WordArray[$i];
  		}
  	}
  	//رηִʽ
  	$rsStr = preg_replace("/^".$spc."/","",$rsStr);
  	return $rsStr;
  }
   //ȫַƥ䷽ʽֽ
  function RunRMM($str,$tryNumName=true,$tryDiff=true)
  {
  	$spc = $this->SplitChar;
  	$spLen = strlen($str);
  	$rsStr = "";
  	$okWord = "";
  	$tmpWord = "";
  	$WordArray = Array();
  	//ֵƥ
  	for($i=($spLen-1);$i>=0;)
  	{
  		//iﵽСܴʵʱ
  		if($i<=$this->MinLen){
  			if($i==1){
  			  $WordArray[] = substr($str,0,2);
  		  }else
  			{
  			   $w = substr($str,0,$this->MinLen+1);
  			   if($this->IsWord($w)){
  			   	$WordArray[] = $w;                                                                                  
  			   }else{
  				   $WordArray[] = substr($str,2,2);
  				   $WordArray[] = substr($str,0,2);
  			   }
  		  }
  			$i = -1; break;
  		}
  		//Сʱ
  		if($i>=$this->MaxLen) $maxPos = $this->MaxLen;
  		else $maxPos = $i;
  		$isMatch = false;
  		for($j=$maxPos;$j>=0;$j=$j-2){
  			 $w = substr($str,$i-$j,$j+1);
  			 if($this->IsWord($w)){
  			 	$WordArray[] = $w;
  			 	$i = $i-$j-1;
  			 	$isMatch = true;
  			 	break;
  			 }
  		}
  		if(!$isMatch){
  			if($i>1) {
  				$WordArray[] = $str[$i-1].$str[$i];
  				$i = $i-2;
  			}
  		}
  	}//End For
  	
  	//ֺʶ
  	if($tryNumName)
  	{ $rsStr = $this->ParOther($WordArray); }
  	else{
  		$wlen = count($WordArray)-1;
  		for($i=$wlen;$i>=0;$i--){
  	  	$rsStr .= $spc.$WordArray[$i];
  	  }
  	}
  	
  	//᪴
  	if($tryDiff) $rsStr = $this->TestDiff(trim($rsStr));
  	
  	return $rsStr;
  }
  
  //----------------------------
  //ԶժҪ
  //$keywordָĹؼֻGetIndexTextص
  //鲻Ҫ̫Ĺؼ
  //----------------------------
  function AutoDescription($str,$keyword,$strlen)
  {
  	$this->SourceString = $this->ReviseString($this->SourceString);
  	//ضıз
  	$spwords = explode(" ",$this->SourceString);
  	$keywords = explode(" ",$this->keywords);
  	$regstr = "";
  	foreach($keywords as $k=>$v)
  	{
  		if($v=="") continue;
  		if(ord($v[0])>0x80 && strlen($v)<3) continue;
  		if($regstr=="") $regstr .= "($v)";
  		else $regstr .= "|($v)";
  	}
  	
  	foreach($spwords as $v)
  	{
  		
  	}
  }
  
  //----------------------------------
  //Էִʽ᪴
  //----------------------------------
  function TestDiff($str){
  	$str = preg_replace("/ {1,}/"," ",$str);
  	if($str == ""||$str == " ") return "";
  	$ws = explode(' ',$str);
  	$wlen = count($ws);
  	$spc = $this->SplitChar;
  	$reStr = "";
  	for($i=0;$i<$wlen;$i++){
  		//ѭһʲ
  		if($i>=($wlen-1)) {
  			$reStr .= $spc.$ws[$i];
  		}
  		//ʵĴ
  		else{
  			//ʹ
  			if($ws[$i]==$ws[$i+1]){
  				$reStr .= $spc.$ws[$i].$ws[$i+1];
  				$i++; continue;
  			}
  			//ִʺͶִ֮崦
  			if(strlen($ws[$i])==2 && strlen($ws[$i+1])<8 && strlen($ws[$i+1])>2){
  				$addw = $ws[$i].$ws[$i+1];
  				$t = 6;
  				$testok = false;
  				while($t>=4){
  				  $w = substr($addw,0,$t);
  				  if($this->IsWord($w) 
  				  && ($this->GetRank($w) > $this->GetRank($ws[$i+1])*2) ){
  					   $limitW = substr($ws[$i+1],strlen($ws[$i+1])-$t-2,strlen($ws[$i+1])-strlen($w)+2);
  					   if($limitW!="") $reStr .= $spc.$w.$spc.$limitW;
  					   else $reStr .= $spc.$w;
  					   $testok = true;
  					   break;
  				  }
  				  $t = $t-2;
  			  }
  			  if(!$testok) $reStr .= $spc.$ws[$i];
  			  else $i++;
  			}
  			//ǰΪֵֵĴʽн崦
  			else if(strlen($ws[$i])>2 && strlen($ws[$i])<8
  			&& strlen($ws[$i+1])>2 && strlen($ws[$i+1])<8)
  			{
  				$t21 = substr($ws[$i+1],0,2);
  				$t22 = substr($ws[$i+1],0,4);
  				//һʽһʵΪ
  				if($this->IsWord($ws[$i].$t21)){
  					if(strlen($ws[$i])==6||strlen($ws[$i+1])==6){
  						$reStr .= $spc.$ws[$i].$t21.$spc.substr($ws[$i+1],2,strlen($ws[$i+1])-2);
  						$i++;
  					}else{
  						$reStr .= $spc.$ws[$i];
  					}
  				}
  				//һΪ3ִʻ2ִʽвͬĴ
  				else if(strlen($ws[$i+1])==6){
  					if($this->IsWord($ws[$i].$t22)){
  						$reStr .= $spc.$ws[$i].$t22.$spc.$ws[$i+1][4].$ws[$i+1][5];
  						$i++;
  					}else{ $reStr .= $spc.$ws[$i]; }
  				}
  				//
  				//ִʽʶѡ
  				//
  				else if(strlen($ws[$i+1])==4){
  					$addw = $ws[$i].$ws[$i+1];
  					$t = strlen($ws[$i+1])-2;
  					$testok = false;
  					while($t>0){
  						$w = substr($addw,0,strlen($ws[$i])+$t);
  						if($this->IsWord($w) 
  				     && ($this->GetRank($w) > $this->GetRank($ws[$i+1])*2) )
  				    {
  				       $limitW = substr($ws[$i+1],$t,strlen($ws[$i+1])-$t);
  					     if($limitW!="") $reStr .= $spc.$w.$spc.$limitW;
  					     else $reStr .= $spc.$w;
  					     $testok = true;
  					     break;
  				    }
  				    $t = $t-2;
  					}
  					if(!$testok) $reStr .= $spc.$ws[$i];
  			    else $i++;
  				}
  				else
  				{ $reStr .= $spc.$ws[$i]; }
  			
  			}
  			//ִʻСڶֵĴʲ
  			else{
  				$reStr .= $spc.$ws[$i];
  			}
  		}
    }//End For
  	return $reStr;
  }
  //---------------------------------
  //жϴʵǷĳ
  //---------------------------------
  function IsWord($okWord){
  	$slen = strlen($okWord);
  	if($slen > $this->MaxLen) return false;
  	else return isset($this->RankDic[$slen][$okWord]);
  }
  //------------------------------
  //ַԱţӢĻŵȳ
  //------------------------------
  function ReviseString($str)
  {
  	$spc = $this->SplitChar;
    $slen = strlen($str);
    if($slen==0) return '';
    $okstr = '';
    $prechar = 0; // 0-հ 1-Ӣ 2- 3-
    for($i=0;$i<$slen;$i++){
      if(ord($str[$i]) < 0x81)
      {
        //ӢĵĿհ׷
        if(ord($str[$i]) < 33){
          //$str[$i]!="\r"&&$str[$i]!="\n"
          if($prechar!=0) $okstr .= $spc;
          $prechar=0;
          continue; 
        }else if(preg_match("/[^0-9a-zA-Z@\.%#:\\/\\&_-]/",$str[$i]))
        {
          if($prechar==0)
          {	$okstr .= $str[$i]; $prechar=3;}
          else
          { $okstr .= $spc.$str[$i]; $prechar=3;}
        }else
        {
        	if($prechar==2||$prechar==3)
        	{ $okstr .= $spc.$str[$i]; $prechar=1;}
        	else
        	{ 
        	  if(preg_match("/@#%:/",$str[$i])){ $okstr .= $str[$i]; $prechar=3; }
        	  else { $okstr .= $str[$i]; $prechar=1; }
        	}
        }
      }
      else{
        //һַΪĺͷǿոһո
        if($prechar!=0 && $prechar!=2) $okstr .= $spc;
        //ַ
        if(isset($str[$i+1])){
          $c = $str[$i].$str[$i+1];
          
          if(preg_match("/".$this->CnNumber."/",$c))
          { $okstr .= $this->GetAlabNum($c); $prechar = 2; $i++; continue; }
          
          $n = hexdec(bin2hex($c));
          if($n>0xA13F && $n < 0xAA40)
          {
            if($c==""){
            	if($prechar!=0) $okstr .= $spc." ";
            	else $okstr .= " ";
            	$prechar = 2;
            }
            else if($c==""){
            	$okstr .= " ";
            	$prechar = 3;
            }
            else{
            	if($prechar!=0) $okstr .= $spc.$c;
            	else $okstr .= $c;
            	$prechar = 3; 
            }
          }
          else{
            $okstr .= $c;
            $prechar = 2;
          }
          $i++;
        }
      }//ַ
    }//ѭ
    return $okstr;
  }
  //-----------------------------------------
	//ʶ´ʣַΪѾִʴĴ
	//----------------------------------------
  function FindNewWord($str,$maxlen=6)
  {
    $okstr = "";
    return $str;
  }
  //----------------------------------------------
  //ȥִеظʣַַΪѾִʴĴ
  //--------------------------------------------------
  function GetIndexText($okstr,$ilen=-1)
  {
    if($okstr=="") return "";
    $ws = explode(" ",$okstr);
    $okstr = "";
    $wks = "";
    foreach($ws as $w)
    {
      $w = trim($w);
      //ųС2ַ
      if(strlen($w)<2) continue;
      //ųֻ
      if(!preg_match("/[^0-9:-]/",$w)) continue;
      if(strlen($w)==2&&ord($w[0])>0x80) continue;
      if(isset($wks[$w])) $wks[$w]++;
      else $wks[$w] = 1;
    }
    if(is_array($wks))
    {
      arsort($wks);
      if($ilen==-1)
      { foreach($wks as $w=>$v)
      	{
      		if($this->GetRank($w)>500) $okstr .= $w." ";
        }
      }
      else
      {
        foreach($wks as $w=>$v){
          if((strlen($okstr)+strlen($w)+1)<$ilen) $okstr .= $w." ";
          else break;
        }
      }
    }
    return trim($okstr);
  }
  //---------------------
  //ôʵĴƵ
  //--------------------
  function GetRank($w){
  	if(isset($this->RankDic[strlen($w)][$w])) return $this->RankDic[strlen($w)][$w];
  	else return 0;
  }
  //----------------------------
  //ȫֻӢĵתΪ
  //---------------------------
  function GetAlabNum($fnum)
  {
	  $nums = array("","","","","","","",
	  "","","","","","","",
	  "","","","","","","","","","","","","",
	  "","","","",""," ","","","","","","","",
	  "","","","","","","","","","","","","",
	  "","","","","","","","","","","","","");
	  $fnums = "0123456789+-%.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
	  for($i=0;$i<count($nums);$i++){
	  	if($nums[$i]==$fnum) return $fnums[$i];
	  }
	  return $fnum;
  }
}//End Class
//˺UTF8вֱӵ
function cn_substrGb($str,$slen,$startdd=0){
	$restr = "";
	$c = "";
	$str_len = strlen($str);
	if($str_len < $startdd+1) return "";
	if($str_len < $startdd + $slen || $slen==0) $slen = $str_len - $startdd;
	$enddd = $startdd + $slen - 1;
	for($i=0;$i<$str_len;$i++)
	{
		if($startdd==0) $restr .= $c;
		else if($i > $startdd) $restr .= $c;

		if(ord($str[$i])>127){
			if($str_len>$i+1) $c = $str[$i].$str[$i+1];
			$i++;
		}
		else{	$c = $str[$i]; }

		if($i >= $enddd){
			if(strlen($restr)+strlen($c)>$slen) break;
			else{ $restr .= $c; break; }
		}
	}
	return $restr;
}
function cn_substr($str,$slen,$startdd=0){
	if($charset=='utf-8'){
	  $str =  utf82gb($str);
    return gb2utf8(cn_substrGb($str,$slen,$startdd));
  }else{
  	return cn_substrGb($str,$slen,$startdd);
  }
}
?>