<?php
/**
 * 针对http://www.dukankan.com的采集类
 * @author yzxh24
 */
class site_Dukankan extends Site_Base
{
    public static $siteUrl = 'http://www.dukankan.com';
    
    public static $siteName = '读看看';
    
    public $lockTime = 1800;
    
    protected $_infoFiles = '';
    
    protected $_typeId = '';
    
    protected $_chapterListUrl = null;
    
    protected $_denyBooks = array();

    private $contentIsImage = false;
    
    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);

 		preg_match('/(\d+)/i', $detailUrl, $matches);
 		$this->_bookId = intval($matches[1]);
    }
    
	/**
 	 * 获取小说整体信息，如简介、ID之类的 
 	 * @return void
 	 */
 	public function getInfoFile()
 	{
 		$infoUrl = self::siteUrl() . '/Book/' . $this->_bookId . '.html';
 		$this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');
 		//preg_match("/Html\/Book\/(\d+)\/$this->_bookId\/Index\.shtml/is", $this->_infoFiles, $typeid);
 		//$this->_typeId = $typeid[1];
 	}
 	
	/**
 	 * 获取小说名 
 	 * @return string
 	 */
 	public function getBookName()
 	{
		preg_match('/<span class=\"booktitle\">《(.*?)》\<\/span>/is', $this->_infoFiles, $bookname);
						
		return $bookname[1];
 	}
 	
	/**
 	 * 取得小说作者名
 	 * @return string
 	 */
	public function getAuthor()
	{		
		preg_match('/<a href=\"\/Author\/WB\/'.$this->getBookId().'\.aspx\">(.*?)\<\/a>/is', $this->_infoFiles, $author);
		
		return trim($author[1]);
	}
	
	/**
	 * 取得小说简介
	 * @return string
	 */
	public function getBookInfo()
	{
		preg_match('/<div id=\"CrbsSum\">(.*?)\<br \/>\<\/div>/is', $this->_infoFiles, $bookinfo);	    
		
		return analyzeText($bookinfo[1]);
	}
	
	/**
	 * 取得小说写作状态
	 * @return int
	 */
	public function getStatus()
	{
	    preg_match_all('/<ul class=\"h1\">(.*?)\<li class=\"l2\">(.*?)\<\/li>/is', $this->_infoFiles, $status);
	    
	    if ($status[2][0] == '完成' || $status[2][0] == '完结')
	    {
	        return 1;
	    }
	    
	    return 0;
	}
	
	/**
	 * 取得章节列表的url 
	 * @return string
	 */
	public function getChapterListUrl()
	{
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }
        
		return $this->_chapterListUrl = self::siteUrl() . '/Html/' . $this->getBookId() . '/Index.shtml';
   	}

   	/**
   	 * 设置章节列表URL
   	 * @param string $url
   	 * @return void
   	 */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }
    
	/**
	 * 取得小说所有章节地址
	 * @return array
	 */
	public function getChapterList()
	{
	    $list = array('hasVolume' => false);
        $chapterListContent = $this->getFile($this->getChapterListUrl(), 'gb18030', 'utf-8');
        preg_match_all('/<li>\<a\s*href=\"(\d+\.shtml)\" title=\".*?\">(.*?)\<\/a>\<\/li>/isu', $chapterListContent, $matches);
        $urlArray = $matches[1];
        $titleArray = $matches[2];
        $array = array();
        if (!empty($urlArray))
        {
            foreach ($urlArray as $key => $value)
            {
                $array[] = array('url' => $value, 'title' => $titleArray[$key]);
            }
        }
        $list['chapters'] = $array;
        $this->setChapterArray($list);

        return $list;
	}
    
	/**
	 * 取得章节名
	 * @param string $chapterContent
	 * @return string
	 */
	public function getChapterName($chapterContent)
	{
		preg_match('/<span class=\"newstitle\">(.*?)\<\/span>/is', $chapterContent, $chaptername);
		
		return $this->analyzeChapterName(str_replace(array('正文','作品','分卷阅读'), '', $chaptername[1]));
	}

    protected function strip_only_tags($str, $tags, $stripContent = FALSE)
    {
      $content = '';

      if (!is_array($tags)) {
        $tags = (strpos($str, '>') !== false ? explode('>', str_replace('<', '', $tags)) : array($tags));
        if (end($tags) == '') {
          array_pop($tags);
        }
      }

      foreach($tags as $tag) {
        if ($stripContent) {
          $content = '(.+<!--'.$tag.'(-->|\s[^>]*>)|)';
        }

        $str = preg_replace('#<!--?'.$tag.'(-->|\s[^>]*>)'.$content.'#is', '', $str);
      }

      return $str;
    }
	/**
	 * 从原始的单一章节内容中过滤出章节内容 
	 * @param string $chapterContent
	 * @return string
	 */
	public function filterChapterContent($chapterContent)
	{
		preg_match_all('/<DIV\s*id=content>(.*?)\<center>\<FONT/is',$chapterContent, $content);
		$result = $content[1][0];
        if ($this->isImage($result))
        {
            $this->contentIsImage = true;
        }
        $result = strip_tags($result, '<br/><br>');
        $result = str_replace('', '', $result);
		
		/**/
		$result = rtrim($result, "<br>");
		$result = rtrim($result, "<br>");
		$result = rtrim($result, "<br>");
		/**/
		$result = $this->analyzeContent($result);
		
		/**
	    $pattern = array('有最新章节更新及时', '<script(.*?)<\/script>', '\[www(.*?)com\]');
		foreach ($pattern as $p)
		{
		    $result = preg_replace('/'.$p.'/is', '', $result, 1);
		}
		/**/
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "<br/>");
		
		return $result;
	}
	
	/**
	 * 取得单一章节内容
	 * @param $contentUrl
	 * @return string
	 */
	public function getChapterContent($contentUrl)
	{
		return $this->getFile($contentUrl, 'gb18030', 'utf-8');
	}
	
	/**
	 * 取得一条完整的章节内容URL
	 * @param string $chapterHtml
	 * @return string
	 */
	public function getChapterContentUrl($chapterHtml)
	{
		$url = self::siteUrl() . '/Html/' . $this->getBookId() . '/' . $chapterHtml;
		
		return $url;
	}

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match('/[0-9]{6,}\.(gif|jpg|png)/is', $chapterContent) || preg_match('/front\.gif/is', $chapterContent))
        {
            return true;
        }

        return false;
    }

    public function isCansave($content)
    {
        if ($this->isImage($content) || $this->contentIsImage)
        {
            return false;
        }

        $content = str_replace(' ', '', $content);
        if (empty($content))
        {
            return false;
        }

        return true;
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return self::siteUrl() . $image;
    }

    /**
	 * 抓取封面地址
	 * @return array
	 */
	public function getCoverUrl()
	{
        $return = false;
		preg_match('/<div\sid=\"CrbtlBookImg\">\<img\ssrc=\"(.*?)\"/is', $this->_infoFiles, $cover);
        if(!empty($cover[1]) && false !== strpos($cover[1], 'DownFiles'))
        {
            $url = explode('/', $cover[1]);
            $name = $url[count($url) - 1];
            if (!empty($name) && 'noimg.gif' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[1];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }

     	return $return;
	}
	
	/**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/Book/Search.aspx';
        $post = array(
            'SearchClass' => 1,
            'SearchKey' => iconv('utf-8','gb18030',$bookName),
        );
        
        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        curl_close($handle);
        
        if (empty($output) || !preg_match_all('/<div id=\"CListTitle\">(.*?)\<\/div>/i', $output, $matchs))
        {
            return false;
        }

        $authorPattern = '/'.$authorName.'/i';
        $bookPattern = '/'.$bookName.'/i';
        $urlPattern = '/Book\/\d+\/Index.aspx/i';
        
        foreach ($matchs[1] as $match)
        {
            $match = iconv('gb18030', 'utf-8', $match);
            if (false !== strpos($match, $bookName) && false !== strpos($match, $authorName))
            {
                preg_match($urlPattern, $match, $url);
                return self::siteUrl() . '/' . $url[0];
            }
        }
        
        return false;
    }
    
	/**
 	 * 返回更新列表地址，主要用于监控更新
 	 * @return string
 	 */
 	public static function getListUrl()
 	{
 	    return self::siteUrl() . '/book/showbooklist.aspx';
 	}
 	
 	public function getContentRegex()
 	{
 	    return array(
 	        '(\（|\()?电( ){0,}脑( ){0,}阅( ){0,}读(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
            //'(\(|《)?(1|１|⑴)\W+(6|６|⑹)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|》)?',
        	'(\()?全文字(.*?)(學網|学网)(\)|）)?',
        	'(<|《)?16(k|K)小(说|說)网(.*?)\.(сΝ|om|ｃｎ|Сｎ|ＣＮ|c-n|M|оМ)(>|》)',
        	'(\()?本书转载(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)[\)]?',
        	'１６(.*?)小(说|說)网',
        	'(\()?(小说整理|电脑阅读)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)(\)|）)?',
        	'(\（|\()?电脑看小说(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?更\/新\/(最|超)\/快(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?请记住我们(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?web用户(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
 	        //'(1|①|１|l)(６|6|⑥|⑹|б)(.*?)(最快|整理|首发)',
            '1⑹(.*?)(整理|首发)',
        	'[x|X][s|S]?(.*?)学网[\)]?',
        	'本(書|书)(.*?)(學|学)(網|网)',
        	'更\/新\/最\/快',
            //'更新最快(.*?)\]',
            '手机轻松(.*?)整理',
            'wap(.*?)\|(k|K)',
            '(\(|\（|\[)www.16kbook.com(\)|\）|\])',
            '(手机访问|手机看小说|ｗ-α-р|ｗｗ`ｗ|ｗ-а-р|ωωω|ｗｗｗ|ｗａｐ|⑴⑹|ω)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)[\)]?',
            '\[ww(.*?)m\]',
            '本章节由(.*?)书友上传',
            '╔(.*?)╝',
            '一七一(.*?)章节',
            '七路中文',
            '\/(ω|ｗ|w|W)(.*?)(ｍ|М|m|M|t)\/',
            '(ω|ｗ|w|W)(.*?)(ｍ|М|m|M|t)',
            '(\（|\()(.*?)手打(\)|）)',
            '\*\*(.*?)网(.*?)下载\*\*',
            'xt点com',
            'xt(.*?)子书',
            '(\(|\（|\[)请记住我们的网址(\)|\）|\])',
 	    );
 	}
 	
 	public function getChaperNameRegex()
 	{
 	    return array(
 	        //'(|(w|ω|W)(.*))?(ㄧ|一|1|１|1|l|⑴|①)(.*)(m|M|Ｍ|М|ｍ|n|ｎ|m|网|網|传)',
    		//'(www.|ωωω.)*(1|１|１|ㄧ)\s*(6|６|б)(.*)[com|ｃom|om|m]',
    		//'(１|1|l|①|⑴)\s*(6|６|б)(.*)s',
 	    );
 	}
 	
 	/**
 	 * 返回更新列表
 	 * @param int $page
 	 * @return array
 	 */
 	public static function getNewList($page = 1)
 	{
 	    $url = self::getListUrl() . '?page=' . $page;
        //$content = static::proxyFetch($url);
        $content = getFile($url);
 	    $content = iconv('gbk', 'utf-8', $content);
 	    //preg_match('/<div id="CrListText">(.*?)\<\/div>/is', $content, $matches);
 	    preg_match_all('/<a\s*target=\"_blank\"\s*href=\"(\/Book\/\d+\.html)\">\<font\s*color=\"#006699\">(.*?)\<\/font>\<\/a>/is', $content, $match);
 	    $urlArray = $match[1];
 	    $nameArray = $match[2];
 	    
 	    $result = array();
 	    foreach ($urlArray as $key => $value)
 	    {
 	        $result[self::siteUrl() . $value] = $nameArray[$key];
 	    }

 	    return $result;
 	}

    public static function siteUrl()
    {
        return self::$siteUrl;
    }

    public static function siteName()
    {
        return self::$siteName;
    }
}