===== PSCWS4 - ˵ĵ =====
$Id: readme.txt,v 1.2 2008/12/21 04:37:59 hightman Exp $

[  PSCWS4 ]

PSCWS4  hightman  2007 꿪Ĵ PHP ʵֵļķִϵͳİļơ

PSCWS Ӣ PHP Simple Chinese Words Segmentation ͷĸд SCWS Ŀǰ
 SCWS Ϊ FTPHP ĿһĿչ 2008-12 ޶

SCWS һ׿Դѵķִϵͳṩõ PHP ӿڡ
Ŀҳhttp://www.ftphp.com/scws

PSCWS4 㷨Լʵ/ļʽϣȫ C д libscwsķȫ
 scws  php չеԤķ

İ㷨N-Ĵ·ŷǿĹʶֵרʣCʵ
İ汾ٶȺЧʾǳߣƼʹáPSCWS4  SCWS(C)  PHP ʵ֣ٶȽ


[  ]

 test.php ķִе, ϵͳ FreeBSD 6.2 , CPU Ϊǿ 3.0G

PSCWS4 - Ϊ 80, 535 ı,  ʱ 30 
         ִʾ 95.60%, ٻ 90.51% (F-1: 0.93)

ͬȳı scws-1.0 (PHP չʽ) ʱ 0.65 (CΪ 0.17).
    ǿҽ߸ scws-1.0 (C)

[ ļṹ ]

  ļ                                         ʹñ?
  --------------------------------------------------------------
  dict/dict.xdb        - XDB ʽʵ              (Ҫļ)
  pscws2.class.php     - PSCWS ڶ  (Ҫļ)
  pscws3.class.php     - PSCWS   (Ҫļ)   
  dict.class.php       - ʵ              (Ҫļ)
  xdb_r.class.php      - XDB ʽȡ            (Ҫļ)

  demo.php             - ʾļ, ֧ web/ (ѡ)
  readme.txt           - ˵ļ                  (ѡ)


[ ʹ˵ ]

PSCWS4 ӦļΪ pscws4.class.php PHP еĵ÷£

// ͷļ
require '/path/to/pscws4.class.php';

// ִ, Ϊַ, ĬΪ gbk, ں set_charset ı
$pscws = new PSCWS4('utf8');

//
// , 趨һЩִʲѡ, set_dict Ǳ, ʶҪ set_rule 
//
// : set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... ȷ
// 
$pscws->set_dict('/path/to/etc/dict.xdb');
$pscws->set_rule('/path/to/etc/rules.ini');

// ִʵ send_text() ִʵַ, ѭ get_result() ȡһϵзֺõĴ
// ֱ get_result()  false Ϊֹ
// صĴһ, : word ʱ, idf (), off textеƫ, len , attr 
//

$pscws->send_text($text);
while ($some = $pscws->get_result())
{
   foreach ($some as $word)
   {
      print_r($word);
   }
}

//  send_text ֮Ե get_tops() طִʽĴﰴȨͳƵǰ N 
// ȡؼ, ÷μϸ.
// صԪһ, ְ: word ʱ, weight , times , attr 
$tops = $pscws->get_tops(10, 'n,v');
print_r($tops);

--- ෽ȫֲ ---
(ע: 캯ɴַΪ,  set_charset Чһ)

class PSCWS4 {

  void set_charset(string charset);
  ˵趨ִʴʵ䡢򼯡ıַַϵͳȱʡ gbk ּ
  أޡ
  charset 趨ַĿǰֻ֧ utf8  gbkעbig5 Ҳ gbk 
  ע⣺Ҫзֵıʵ䣬ļߵַͳһΪ charset ֵ
  
  bool set_dict(string dict_fpath);
  ˵÷ִõĴʵļ
  dict_path Ǵʵ··ȫ·
  أɹ true ʧܷ false
  д WARNING Ĵʾ
  
  void set_rule(string rule_path);
  ˵趨ִõ´ʶ򼯣ʱʶ𣩡
  أޡ
  rule_path ǹ򼯵··ȫ·
  
  void set_ignore(bool yes)
  ˵趨ִʷؽʱǷȥһЩı֮ࡣ
  أޡ
  yes 趨ֵΪ true вرţΪ false ᷵أȱʡΪ false
  
  void set_multi(int mode);
  ˵趨ִʷؽʱǷ񸴺Ϸָ硰йˡءйˣйˡʡ
  أޡ
  mode 趨ֵ1 ~ 15
        λ 1 | 2 | 4 | 8 ֱʾ: ̴ | Ԫ | Ҫ | е
	
  void set_duality(bool yes);
  ˵趨ǷɢԶԶִַʷۺϡ
  أޡ
  yes 趨ֵΪ true жֻԶַ۷֣Ϊ false 򲻴ȱʡΪ false

  void set_debug(bool yes);
  ˵÷ִʹǷN-Pathִʹ̵ĵϢ
  yes 趨ֵΪ true ִʹжڶ·ַָʾϢ
  أޡ
  
  void send_text(string text)
  ˵趨ִҪиı
  أޡ
  text ıݡ
  ע⣺ִбʱȼشʵ͹ļѡ
  
  mixed get_result(void)
  ˵ send_text 趨ıݣһϵкõĴʻ㡣
  أɹкõĴʻɵ飬 ޸ʻ㣬 false
  ޡ
  ע⣺ÿи󱾺Ӧѭãֱ false ΪֹΪÿηصĴǲȷġ
        صĴʻļֵУword (string, ʱ) idf (folat, ıƵ) off (int, ıеλ) attr(string, )
	
  mixed get_tops( [int limit [, string attr]] )
  ˵ send_text 趨ıݣϵͳؼʻб
  أɹкõĴʻɵ飬 ޸ʻ㣬 false
  limit ѡصĴʵȱʡ 10
        attr ѡһϵдɵַ֮԰ǵĶŸ
             ʾصĴԱбУ~ͷʾȡԱ벻бУ
	     ȱʡΪգȫԣˡ
	     
  string version(void);
  ˵رš
  أ汾ţַ
  ޡ
  
  void close(void);
  ˵رͷԴʹýֹøúϵͳԶա
  أޡ
  ޡ
};

[ ڴʵ ]

PSCWS4 ʹõ XDB ʽʵ䣬 C  libscws ȫݡ

ṩĬϴʵͨõĻϢʻ㼯Լ 28 ʡҪƴʵ
;ϵܻշѡ

[ ע ]

PSCWS4 ɴ PHP ʵ֣Ҫκⲿչ֧֣Чһ㣬ѡ C дչ

PSCWS4 õڸְ汾 PHP4  PHP5 ϣ֧ GBKUTF-8 ȿַ

ṩصĴʵ Intel ܹƽ̨ģŵܹĻпܻ⵼
дȫ󣨵͵磺Sparc ܹ Solaris/SunOS У뼰ʱ
ϵѰ

[ ϵ ]

SCWS Ŀվhttp://www.ftphp.com/scws
ҵĸ Emailhightman2@yahoo.com.cn   һֱţлл

--

2008.12.21 - hightman
