<?php
class Site_Hao123 extends Site_Base
{
    public static $siteUrl = 'http://www.hao123.se';
    
    public static $siteName = 'Hao123';
    
    public static $continuance = true;
    
    public static $fetchNew = false;
    
    protected $_infoFiles = null;
    
    protected $_typeId = '';
    
    protected $_chapterListUrl = null;
    
    protected $_denyBooks = array();

    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);
        preg_match('/articleinfo\.php\?id=(\d+)+/i', $detailUrl, $matches);
        $this->_bookId = intval($matches[1]);
    }
    
	/**
 	 * 获取小说整体信息，如简介、ID之类的 
 	 * @return void
 	 */
 	public function getInfoFile()
 	{
 		$infoUrl = self::siteUrl() . '/modules/article/articleinfo.php?id='.$this->getBookId();
 		$this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');
 		preg_match('/files\/article\/html\/(\d+)\/' . $this->getBookId() . '\//is', $this->_infoFiles, $typeid);
 		$this->_typeId = $typeid[1];
 	}
 	
	/**
 	 * 获取小说名 
 	 * @return string
 	 */
 	public function getBookName()
 	{
		preg_match('/<strong>《(.*?)》/i', $this->_infoFiles, $book);
 		
        return trim($book[1]);
 	}
 	
	/**
 	 * 取得小说作者名
 	 * @return string
 	 */
	public function getAuthor()
	{		
		preg_match('/target=\"_blank\">(.*?)<\/a><\/em>/i', $this->_infoFiles, $author);
		$value = '网络转载';
		if (!empty($author[1]))
		{
            $value = trim($author[1]);
		}

		return $value;
	}
	
	/**
	 * 取得小说简介
	 * @return string
	 */
	public function getBookInfo()
	{
		$pattern = '/<p>(.*?)\<\/p>/is';
        preg_match_all($pattern, $this->_infoFiles, $info);

		return analyzeText($info[1][0]);
	}
	
	/**
	 * 取得小说写作状态
	 * @return int
	 */
	public function getStatus()
	{
	    preg_match('/<span>写作进程(:|：)(.*?)\<\/span>/i', $this->_infoFiles, $status);
 		if (!empty($status[0]) && false !== strpos(trim($status[0]), '完'))
 		{
 			return 1;
 		}
 		
 		return 0;
	}
	
	/**
	 * 取得章节列表的url 
	 * @return string
	 */
	public function getChapterListUrl()
	{
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }
        
		return $this->_chapterListUrl = self::siteUrl() . '/files/article/html/'.$this->_getTypeId().'/'.$this->_bookId.'/';
   	}

   	/**
   	 * 设置章节列表URL
   	 * @param string $url
   	 * @return void
   	 */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }
    
	/**
	 * 取得小说所有章节地址
	 * @return array
	 */
	public function getChapterList()
	{
        $list = array('hasVolume' => false);
        $chapterListContent = $this->getFile($this->getChapterListUrl(), 'gb18030', 'utf-8');
        preg_match_all('/<dd>\<a\s*href=\"(.*?)\"\s*title=\".*?\">(.*?)\<\/a>\<\/dd>/isu', $chapterListContent, $matches);
        $urlArray = $matches[1];
        $titleArray = $matches[2];
        $array = array();
        if (!empty($urlArray))
        {
            foreach ($urlArray as $key => $value)
            {
                $array[] = array('url' => $value, 'title' => $titleArray[$key]);
            }
        }
        $list['chapters'] = $array;
        $this->setChapterArray($list);

        return $list;
	}
	
	/**
	 * 取得章节名
	 * @param string $chapterContent
	 * @return string
	 */
	public function getChapterName($chapterContent)
	{
		preg_match('/<span class=\"newstitle\">(.*?)\<\/span>/i', $chapterContent, $chaptername);
		
		return $this->analyzeChapterName(str_replace(array('正文','作品','分卷阅读'), '', $chaptername[1]));
	}
	
	/**
	 * 从原始的单一章节内容中过滤出章节内容 
	 * @param string $chapterContent
	 * @return string
	 */
	public function filterChapterContent($chapterContent)
	{
	    preg_match_all('/<div id=\"booktext\">(.*?)\<\/div>/is', $chapterContent, $content);

        return $this->analyzeContent($content[1][0]);
	}
	
	/**
	 * 取得单一章节内容
	 * @param $contentUrl
	 * @return string
	 */
	public function getChapterContent($contentUrl)
	{
		$content = $this->getFile($contentUrl, 'gb18030', 'utf-8');

		return $content;
	}
	
	/**
	 * 取得一条完整的章节内容URL
	 * @param string $chapterHtml
	 * @return string
	 */
	public function getChapterContentUrl($chapterHtml)
	{
	    return self::siteUrl() . '/files/article/html/' . $this->_getTypeId() . '/' . $this->getBookId() . '/' . $chapterHtml;
	}

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match_all('/\/(DownFiles|attachment)(.*?)\.(gif|jpg)/i', $chapterContent, $images))
        {
            return $images[0];
        }
        
        return false;
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return self::siteUrl() . $image;
    }

    /**
	 * 抓取封面地址
	 * @return array
	 */
	public function getCoverUrl()
	{
        $return = false;
        $pattern = '/\/files\/article\/image\/'.$this->_typeId.'\/'.$this->_bookId.'\/'.$this->_bookId.'s\.(gif|jpg|png|bmp)/i';
        preg_match($pattern, $this->_infoFiles, $cover);
     	if(!empty($cover[0]))
        {
            $url = explode('/', $cover[0]);
            $name = $url[count($url) - 1];
            if (!empty($name) && 'nocover.jpg' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[0];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }

        return $return;
	}
	
	/**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/modules/article/search.php';
        $post = array(
            'searchtype' => 'articlename',
            'searchkey' => iconv('utf-8','gbk',$bookName),
        );

        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_HEADER, 1);
        //curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        //$tempinfo = curl_multi_getcontent($handle);
        curl_close($handle);

        if (empty($output))
        {
            return false;
        }
        // 先检查header里是否有302跳转，有的话表示能搜到
        $pattern = str_replace(array('.','?','/'),array('\.','\?','\/'),self::siteUrl()) . '\/modules\/article\/articleinfo\.php\?id=\d+';
        $pattern = '/Location:\s(' . $pattern . ')/i';
        if (preg_match($pattern, $output, $matchs))
        {
            // 单个结果时杰奇系统会自动跳转到结果页面
            return $matchs[1];
        }
        else
        {
            // 结果有多个
            preg_match_all('/<tr>(.*?)\<\/tr>/is', $output, $matches);
            if (empty($matches[1]))
            {
                return false;
            }

            $authorPattern = '/' . $authorName . '/i';
            $bookPattern = '/' . $bookName . '/i';
            $urlPattern = '/articleinfo\.php\?id=(\d+)/i';
            foreach ($matches[1] as $matche)
            {
                $matche = iconv('gbk', 'utf-8', $matche);
                if (false !== strpos($matche, $bookName) && false !== strpos($matche, $authorName))
                {
                    preg_match($urlPattern, $matche, $url);
                    
                    return self::siteUrl() . '/modules/article/articleinfo.php?id=' . $url[1];
                }
            }
        }
        
        return false;
    }
    
    public function isCanSave($content)
    {
        $content = str_replace('&nbsp;', ' ', $content);
        $content = preg_replace('/\s|\t|\n|\r/is', '', $content);
        $content = str_replace(' ', '', $content);
        $len = mb_strlen($content, 'utf-8');
        if ($len < 500 && (preg_match('/手打中/is', $content) || preg_match('/文字版稍后/is', $content)) || preg_match('/文字稍后/is', $content))
        {
            return false;
        }
        
        return true;
    }
    
	/**
 	 * 返回更新列表地址，主要用于监控更新
 	 * @return string
 	 */
 	public static function getListUrl()
 	{
 	    return self::siteUrl() . '/modules/article/toplist.php?sort=lastupdate';
 	}
 	
 	/**
 	 * 返回更新列表
 	 * @param int $page
 	 * @return array
 	 */
 	public static function getNewList($page = 1)
 	{
 	    $url = self::getListUrl() . '&page=' . $page;
 	    $content = iconv('gbk', 'utf-8', getFile($url));
 	    preg_match_all('/<h1>\s*\<a\s*href=\"(.*?)\"\s*target=\"_blank\">\s*(.*?)\<\/a>\s*\<\/h1>/is', $content, $matches);
        $urlArray = $matches[1];
        $nameArray = $matches[2];

        $result = array();
        foreach ($urlArray as $key => $value)
        {
            preg_match('/html\/\d+\/(\d+)/is', $value, $m);
            $result[self::siteUrl() . '/modules/article/articleinfo.php?id='.$m[1]] = $nameArray[$key];
        }

 	    return $result;
 	}
 	
	/**
	 * 取得目标小说所属的分类ID
	 * @return int 
	 */
	private function _getTypeId()
	{
	    $typeid = $this->_typeId;
        if (empty($typeid))
        {
            if (!is_null($this->_chapterListUrl))
            {
                preg_match('/files\/article\/html\/(\d+)\/' . $this->getBookId() . '\//is', $this->getChapterListUrl(), $tmp);
                $typeid = $tmp[1];
            }
            
            if ($typeid !== '0' && empty($typeid))
            {
                if (is_null($this->_infoFiles))
                {
                    $this->_infoFiles = $this->getInfoFile();
                }

                $typeid = $this->_typeId;
            }
        }
        
        return $typeid;
	}
	
	public function getContentRegex()
	{
	    return array(
	        //'☆.*?☆',
	        '☆(.*)☆',
	        '♀(.*)♀',
            '︴(.*)︴',
	        '<div style=\'display:none\'>(.*?)<\/div>',
            '<div style=\"display:none\">(.*?)<\/div>',
            '(h|H|Ｈ|ん)(.*?)更新最快',
            '全文字(.*?)(SＥ|se|SE)',
            '手机看书(.*?)更新最快',
            '更新最快hao123中文网[www.hao123.se]',
            'hao123中文网[hao123.se] 更新最快',
            '(h|H|Ｈ|ん)aｏ(.*?)(更新|最快)',
            '(Ｈ|h|H)a(.*?)更\s?新\s?最\s?快',
	        'h.*a.*文.*网',
	    );
	}
	
	public function getChaperNameRegex()
	{
	    
	}

    public static function siteUrl()
    {
        return self::$siteUrl;
    }

    public static function siteName()
    {
        return self::$siteName;
    }
}