<?php
/*
 * @copyright   Leyun internet Technology(Shanghai)Co.,Ltd
 * @license     http://www.dzzoffice.com/licenses/license.txt
 * @package     DzzOffice
 * @version     DzzOffice Beta2 2013.12.30
 * @link        http://www.dzzoffice.com
 * @author      zyx(zyx@dzz.cc)
 */

class caiji{
	
	private $_url;
	private $_content='';
	private $_charset='';
	private $_meta=array();
	private $_title='';
	
	public function __construct($url){
		$this->_url = $url; 
	}
	public function getCharset(){
		if(!$this->_content) $this->_content= dzz_file_get_contents($this->_url);
		if(	preg_match("/charset=\"{0,1}(.+?)\"/ims",$this->_content,$matches2)){
			$this->_charset=trim($matches2[1]);
		}
		//如果头部没有明确声明charset ,利用内容判断
		if(!$this->_charset){
			$chararr=array('GB2312','GBK','UTF-8','BIG5','iso-8859-1');
			 
			if(function_exists('mb_convert_encoding')&& $char=mb_detect_encoding($this->_content,$chararr)){
				$this->_charset=$char;
			}
		}
		if(!$this->_charset) $this->_charset='UTF-8';
		return $this->_charset;
	}
	
	public function getTitle(){
		
		if(!$this->_content) $this->_content= dzz_file_get_contents($this->_url); 
		
		if(preg_match("/<title>([^>]*)<\/title>/si",$this->_content, $matches)){
			if(!$this->_charset) $this->_charset=$this->getCharset();
			if (isset($matches) && is_array($matches) && count($matches) > 0)
			{
				$this->_title=diconv(strip_tags($matches[1]),$this->_charset);
			}
		}
		return $this->_title;
	}
	
	public function getKeywords(){
		if(!$this->_meta) $this->_meta=$this->getMeta();
		if($this->_meta['keywords']){
			if(!$this->_charset) $this->_charset=$this->getCharset();
			return diconv(strip_tags($this->_meta['keywords']),$this->_charset);
		}else return '';
	}
	
	public function getDescription(){
		if(!$this->_meta) $this->_meta=$this->getMeta();
		if($this->_meta['description']){
			if(!$this->_charset) $this->_charset=$this->getCharset();
			return diconv(strip_tags($this->_meta['description']),$this->_charset);
		}else return '';
	}
	
	public function getMeta(){
		
		if(!$this->_meta) $this->_meta=get_meta_tags($this->_url,true);
		
		return $this->_meta;
	}
	
	public function getFavicon(){
		$parseurl=parse_url($this->_url);
		$host=$parseurl['host'];
		if($parseurl['scheme']=='https') $parseurl['scheme']='http';
		$ico=$parseurl['scheme'].'://'.$host.'/favicon.ico';
		$host=preg_replace("/^www./",'',$host);//strstr('.',$host);
		$ico_not_www=$parseurl['scheme'].'://www.'.$host.'/favicon.ico';
		if(check_remote_file_exists($ico)) return $ico;
		elseif(check_remote_file_exists($ico_not_www)) return $ico_not_www;
		else{
			$ico0='';
			$ico1='';
			if(!$this->_content) $this->_content= dzz_file_get_contents($this->_url);
			if(	preg_match("/<link(\s+rel=\"(\s*icon\s*)*(\s*shortcut\s*)*(\s*icon\s*)*\")*.*?\s+href=\"(.+?)\".*?(\s+rel=\"(\s*icon\s*)*(\s*shortcut\s*)*(\s*icon\s*)*\")*.*?>/ims",$this->_content,$matches2)){
				if(preg_match("/rel=\"\s*icon\s*\"/i",$matches2[0]) || preg_match("/rel=\"\s*shortcut\s*\"/i",$matches2[0]) || preg_match("/rel=\"\s*shortcut\s+icon\s*\"/i",$matches2[0]) || preg_match("/rel=\"\s*icon\s+shortcut\s*\"/i",$matches2[0])){
					$ico=trim($matches2[5]);
					$purl=parse_url($ico);
					if(empty($purl['host'])){
						//exit('dfdsf');
						$ico0=$parseurl['scheme'].'://'.preg_replace("/\/\//i",'/',$host.'/'.$ico);
						$ico1=$parseurl['scheme'].'://'.preg_replace("/\/\//i",'/','www.'.$host.'/'.$ico);
					}else{
						$ico0=$ico1=preg_replace("/^https/i",'http',$ico);
					}
					//exit($ico0.'===='.$ico1);
					if(check_remote_file_exists($ico1)) return $ico1;
					if(check_remote_file_exists($ico0)) return $ico0;
				}
			}
		}
		return '';
	}
}

?>
