<?php
/**
 * 针对www.3yt.com的采集类
 * @author yzxh24
 */
class Site_Sanyt extends Site_Base
{
    public static $siteUrl = 'http://www.3yt.com';
    
    public static $siteName = '若雨中文';
    
    public static $fetchNew = false;

    public $isReplaceFetch = true; // 主要抓图，不做替换

    public $replaceLimit = 4;

    public $fetchImage = true;
    
    public $lockTime = 1800;

    protected $_denyBooks = array(6128,15920,20537);
    
    protected $_infoFiles = '';
    
    protected $_typeId = '';
    
    protected $_chapterListUrl = null;
    
    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);

 		preg_match('/txt(\d+)/i', $detailUrl, $matches);
 		$this->_bookId = intval($matches[1]);
    }
    
	/**
 	 * 获取小说整体信息，如简介、ID之类的 
 	 * @return void
 	 */
 	public function getInfoFile()
 	{
        $infoUrl = self::siteUrl() . '/txt'.$this->getBookId().'.html';
 		$this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');
 	}
 	
	/**
 	 * 获取小说名 
 	 * @return string
 	 */
 	public function getBookName()
 	{
		preg_match('/<span\s*class=\"STYLE1\">(.*?)\<\/span>/is', $this->_infoFiles, $bookname);

		return trim($bookname[1]);
 	}
 	
	/**
 	 * 取得小说作者名
 	 * @return string
 	 */
	public function getAuthor()
	{		
		preg_match('/<font\s*color=\"#7777FF\">\<u>(.*?)\<\/u>\<\/font>/is', $this->_infoFiles, $author);
		
		return trim($author[1]);
	}
	
	/**
	 * 取得小说简介
	 * @return string
	 */
	public function getBookInfo()
	{
		preg_match('/<\/u>\<\/font>\<\/a>\<\/font>(.*?)\<\/td>/is', $this->_infoFiles, $bookinfo);
		
		$info = analyzeText(strip_tags($bookinfo[1]));
		$info = preg_replace('/http:\/\/(.*).aspx/is', '', $info);
		
		return $info;
	}
	
	/**
	 * 取得小说写作状态
	 * @return int
	 */
	public function getStatus()
	{
	    // 此站没有完结的标示
	    preg_match('/<div\s*align=\"center\">写作进程：(.*?)\<\/div>/is', $this->_infoFiles, $matches);
	    if (!empty($matches[0]) && false !== strpos(trim($matches[0]), '完'))
 		{
 			return 1;
 		}
 		
	    return 0;
	}
	
	/**
	 * 取得章节列表的url 
	 * @return string
	 */
	public function getChapterListUrl()
	{
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }
        
		//return $this->_chapterListUrl = self::siteUrl() . '/Html/Book/'. $this->_getTypeId() . '/' . $this->getBookId() . '/Index.shtml';
        return $this->_chapterListUrl = self::siteUrl() . '/txt'.$this->getBookId().'.html';
   	}

   	/**
   	 * 设置章节列表URL
   	 * @param string $url
   	 * @return void
   	 */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }
    
	/**
	 * 取得小说所有章节地址
	 * @return array
	 */
	public function getChapterList()
	{
	    $list = array('hasVolume' => false);
        $chapterListContent = $this->getFile($this->getChapterListUrl());
        preg_match_all('/<div\s*id=\"list\"><a\s*href=\"(\/Html\/Book\/\d+\/\d+\/\d+\.shtml)\"\s*target=\"_blank\"\s*id=\"BookHtml\">(.*?)\<\/a>\<\/div>/is', $chapterListContent, $matches);
        $urlArray = $matches[1];
        $titleArray = $matches[2];
        foreach ($urlArray as $key => $value)
        {
            $chapters[] = array('url' => $this->siteUrl() . $value, 'title' => trim(strip_tags($titleArray[$key])));
        }
        $list['chapters'] = $chapters;
        
        $this->setChapterArray($list);

        return $list;
	}
    
	/**
	 * 取得章节名
	 * @param string $chapterContent
	 * @return string
	 */
	public function getChapterName($chapterContent)
	{
		preg_match('/<span class=\"newstitle\">(.*?)\<\/span>/is', $chapterContent, $chaptername);
		
		return $this->analyzeChapterName(str_replace(array('正文','作品','分卷阅读'), '', $chaptername[1]));
	}
	
	/**
	 * 从原始的单一章节内容中过滤出章节内容 
	 * @param string $chapterContent
	 * @return string
	 */
	public function filterChapterContent($chapterContent)
	{
	    $chapterContent = preg_replace('/<div style=[\'|\"]display:none[\'|\"]>(.*?)\<\/div>/is', '', $chapterContent);
		preg_match('/<div\s*id=\"BookText\"\s*align=\"left\">(.*?)\<div\s*class=\"box_02\">/is', $chapterContent, $content);

        if ($this->isImage($content[1]))
        {
            preg_match_all('/<img\s*src=\"(.*?)\"/is', $content[1], $images);
            $c = implode("\n", $images[1]);
        }
        else
        {
            $c = strip_tags(str_replace(array('&lt;', '&gt;'), array('<', '>'), $this->analyzeContent($content[1])));
        }

        return $c;
	}
	
	/**
	 * 取得单一章节内容
	 * @param $contentUrl
	 * @return string
	 */
	public function getChapterContent($contentUrl)
	{
		$content = $this->getFile($contentUrl);

	    preg_match('/charset=(.*?)(\'|\")/i', $content, $charset);
		if(strpos(strtolower($charset[1]), 'utf-8') !== false)
		{
			$content = iconv('gbk', 'utf-8', $content);
		}
		else if(strpos(strtolower($charset[1]), 'gb2312') !== false)
		{
			$content = iconv('GB18030', 'utf-8', $content);
		}

		return $content;
	}
	
	/**
	 * 取得一条完整的章节内容URL
	 * @param string $chapterHtml
	 * @return string
	 */
	public function getChapterContentUrl($chapterHtml)
	{
		return $chapterHtml;
	}

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match('/\d+\.(gif|jpg|png)/is', $chapterContent))
        {
            return true;
        }
        
        return false;
    }

    public function isCanSave($content)
    {
        //$content = preg_replace('/\s/is', '', trim($content));
        if (empty($content))
        {
            return false;
        }

        if ($this->isImage($content))
        {
            return true;
        }

        return true;
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return $image;
    }

    /**
     * 抓图片内容
     * @param $imageUrl
     * @return mixed|string
     */
    public function getImage($imageUrl)
    {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $imageUrl);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        //curl_setopt($ch, CURLOPT_REFERER, 'pic.biquge.com');
        curl_setopt($ch, CURLOPT_USERAGENT, 'compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)');
        //curl_setopt($ch, CURLOPT_HTTPHEADER, array('Host: pic.biquge.com:8080','Accept: image/png,image/*;q=0.8,*/*;q=0.5'));
        $contents = curl_exec($ch);

        //echo $imageUrl . "\n";
        return $contents;
    }

    /**
	 * 抓取封面地址
	 * @return array
	 */
	public function getCoverUrl()
	{
        $return = false;
		preg_match('/<div class=\"bortable wleft\">.*<img\s*src=\"(.*?)\"\s*width=\"210\"\s*height=\"280\"\s*\/>/is', $this->_infoFiles, $cover);
        if(!empty($cover[1]) && false !== strpos($cover[1], 'DownFiles'))
        {
            $url = explode('/', $cover[1]);
            $name = $url[count($url) - 1];
            if (!empty($name) && 'noimg.gif' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[1];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }

     	return $return;
	}
	
	/**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/Book/Search.aspx';
        $post = array(
            'SearchClass' => 1,
            'SearchKey' => iconv('utf-8','gb18030',$bookName),
        );
        
        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        curl_close($handle);
        
        if (empty($output) || !preg_match_all('/<table width=\"93%\" border=\"0\" align=\"center\">(.*?)\<\/table>/is', $output, $matchs))
        {
            return false;
        }

        $authorPattern = '/'.$authorName.'/i';
        $bookPattern = '/'.$bookName.'/i';
        $urlPattern = '/Book\/\d+\/Index.aspx/i';
        
        foreach ($matchs[1] as $match)
        {
            $match = iconv('gb18030', 'utf-8', $match);
            if (false !== strpos($match, $bookName) && false !== strpos($match, $authorName))
            {
                preg_match($urlPattern, $match, $url);
                return self::siteUrl() . '/' . $url[0];
            }
        }
        
        return false;
    }
    
	/**
 	 * 返回更新列表地址，主要用于监控更新
 	 * @return string
 	 */
 	public static function getListUrl()
 	{
 	    return self::siteUrl() . '/Book/ShowBooklist.aspx';
 	}
 	
 	public function getContentRegex()
 	{
 	    return array(
 	        '(\（|\()?电( ){0,}脑( ){0,}阅( ){0,}读(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
            //'(\(|《)?(1|１|⑴)\W+(6|６|⑹)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|》)?',
        	'(\()?全文字(.*?)(學網|学网)(\)|）)?',
        	'(<|《)?16(k|K)小(说|說)网(.*?)\.(сΝ|om|ｃｎ|Сｎ|ＣＮ|c-n|M|оМ)(>|》)',
        	'(\()?本书转载(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)[\)]?',
        	'１６(.*?)小(说|說)网',
        	'(\()?(小说整理|电脑阅读)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)(\)|）)?',
        	'(\（|\()?电脑看小说(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?更\/新\/(最|超)\/快(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?请记住我们(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?web用户(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
            '1⑹(.*?)(整理|首发)',
        	'[x|X][s|S]?(.*?)学网[\)]?',
        	'本(書|书)(.*?)(學|学)(網|网)',
        	'更\/新\/最\/快',
            '更新最快(.*?)\]',
            '手机轻松(.*?)整理',
            'wap(.*?)\|(k|K)',
            '(\(|\（|\[)www.16kbook.com(\)|\）|\])',
            '(手机访问|手机看小说|ｗ-α-р|ｗｗ`ｗ|ｗ-а-р|ωωω|ｗｗｗ|ｗａｐ|⑴⑹|ω)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ|СΟΜ)[\)]?',
            '\[ww(.*?)m\]',
            '本章节由(.*?)书友上传',
            '╔(.*?)╝',
 	        '\(www(.*?)com\小说网\)',
 	        '\(www(.*?)com\)',
 	        '83k',
 	        '泡(-|_|\*)?书(-|_|\*)?吧首\W发',
 	        '泡(-|_|\*)?书(-|_|\*)?吧',
 	        '(\(|（)ww(.*)c\s*o\s*m(\)|）)',
 	        'www.paoshu8.com',
 	        'paoshu8',
             '小说网',
             '若雨中文网',
             '若雨中文',
             '小说阅读下载尽在',
             '中文网更新超快小说更多(：|:)?',
             '最新最快更新的网站(：|:)?',
             '若\s*雨\s*中\s*文\s*网',
             'www.3yt.com',
             '3yt',
             'w\s*w\s*w\s*.\s*3\s*y\s*t\s*.\s*c\s*o\s*m'
 	    );
 	}
 	
 	public function getChaperNameRegex()
 	{
 	    return array(
 	        //'(|(w|ω|W)(.*))?(ㄧ|一|1|１|1|l|⑴|①)(.*)(m|M|Ｍ|М|ｍ|n|ｎ|m|网|網|传)',
    		//'(www.|ωωω.)*(1|１|１|ㄧ)\s*(6|６|б)(.*)[com|ｃom|om|m]',
    		//'(１|1|l|①|⑴)\s*(6|６|б)(.*)s',
 	    );
 	}
 	
 	/**
 	 * 返回更新列表
 	 * @param int $page
 	 * @return array
 	 */
 	public static function getNewList($page = 1)
 	{
 	    $url = self::getListUrl() . '?page=' . $page;
 	    $content = iconv('gbk', 'utf-8', getFile($url));
 	    preg_match_all('/<a\s*href=\"(\/txt\d+\.html)\"\s*target=\"_blank\">\<font\scolor=\"\#006699\">(.*?)\<\/font>/is', $content, $match);
 	    $urlArray = $match[1];
 	    $nameArray = $match[2];
 	    
 	    $result = array();
 	    foreach ($urlArray as $key => $value)
 	    {
 	        $result[self::siteUrl() . $value] = trim(strip_tags($nameArray[$key]));
 	    }

 	    return $result;
 	}
    
	/**
	 * 取得目标小说所属的分类ID
	 * @return int 
	 */
	private function _getTypeId()
	{
        $typeid = $this->_typeId;
	    if (empty($typeid))
        {
            if (!is_null($this->_chapterListUrl))
            {
                preg_match('/Html\/Book\/(\d+)\/' . $this->getBookId() . '\/Index\.shtml/is', $this->getChapterListUrl(), $tmp);
                $typeid = $tmp[1];
            }

            if ($typeid !== '0' && empty($typeid))
            {
                if (empty($this->_infoFiles))
                {
                    $this->getInfoFile();
                }
                preg_match('/Html\/Book\/(\d+)\/' . $this->getBookId() . '\/Index\.shtml/is', $this->_infoFiles, $tmp);
                $typeid = $tmp[1];
            }
	    }
		
		return $typeid;
	}
	
    /**
     * 返回本站与采集站点分类之间的对应关系
     * @return int
     */
    public function getType()
    {
        preg_match('/<a href=\"\/Book\/LN\/\d+\.aspx"><font\s*color=\"#333333\">(.*?)\<\/font>\<\/a>/is', $this->_infoFiles, $category);
        $type = trim($category[1]);
        
        if ($type == '网络游戏' || $type == '幻想网游' || $type == '虚拟网游' || $type == '游戏生涯' || $type == '游戏异界')
        {
            return 12;
        }
    	else if ($type == '电子竞技' || $type == '游戏竞技' || $type == '体育竞技' || $type == '网游竟技')
    	{
    		return 27;
    	}
        else if ($type == '都市生活' || $type == '白领生涯' || $type == '商海沉浮' || $type == '都市激战' || $type == '现代文学' || $type == '官场沉浮' || $type == '黑道风云' || $type == '都市' || $type == '都市娱乐' )
        {
            return 6;
        }
    	else if ($type == '都市异能' || $type == '都市重生')
    	{
    		return 5;
    	}
        else if ($type == '浪漫言情' || $type == '恩怨情仇' || $type == '都市言情' || $type == '言情小说'  || $type == '言情')
        {
            return 3;
        }
        else if ($type == '西方奇幻' || $type == '东方玄幻' || $type == '玄幻小说' || $type == '转世重生' || $type == '玄幻' || $type == '奇幻玄幻')
        {
            return 2;
        }
    	else if ($type == '玄幻魔法' || $type == '异术超能' || $type == '奇幻魔法' || $type == '变身情缘' || $type == '玄幻魔幻')
    	{
    		return 28;
    	}
    	else if ($type == '异世大陆' || $type == '异界大陆')
    	{
    		return 25;
    	}
        else if ($type == '架空历史' || $type == '三国梦想' || $type == '历史穿越' || $type == '历史小说' || $type == '历史传记' || $type == '军事历史' || $type == '历史军事' || $type == '穿越女尊' || $type == '两宋元明' || $type == '架空' || $type == '秦汉三国')
        {
            return 11;
        }
        else if ($type == '快意江湖' || $type == '传统武侠' || $type == '武侠同人' || $type == '美文同人' || $type == '历史武侠')
        {
            return 1;
        }
        else if ($type == '奇幻修真' || $type == '现代修真')
        {
            return 16;
        }
    	else if ($type == '古典仙侠' || $type == '仙侠异能' || $type == '仙侠奇侠' || $type == '武侠仙侠' || $type == '浪子异侠' || $type == '洪荒封神')
    	{
    		return 26;
    	}
        else if ($type == '灵异鬼怪' || $type == '推理灵异' || $type == '推理侦探' || $type == '灵异神怪' || $type == '恐怖惊悚')
        {
            return 8;
        }
    	else if ($type == '科幻世界' || $type == '科幻灵异' || $type == '科幻冒险' || $type == '科幻动漫' || $type == '机器时代' || $type == '星际战争' || $type == '数字生命' || $type == '骇客时空' || $type == '末世危机')
    	{
    		return 7;
    	}
        else if ($type == '千千心结')
        {
            return 4;
        }
    	else if ($type == '外国经典' || $type == '中国名著')
        {
            return 9;
        }
    	else if ($type == '战争幻想' || $type == '战争风云' || $type == '军事战争' || $type == '现代战争')
        {
            return 10;
        }
    	else if ($type == '青春校园' || $type == '热血青春' || $type == '菁菁校园')
        {
            return 13;
        }
    	else if ($type == '纯爱耽美')
        {
            return 14;
        }
    	else if ($type == '都市情感' || $type == '激情生活')
        {
            return 15;
        }
    	else if ($type == '动漫同人' || $type == '激情生活')
        {
            return 39;
        }
        else 
        {
            return 42;
        }
    }


    public static function siteUrl()
    {
        return self::$siteUrl;
    }

    public static function siteName()
    {
        return self::$siteName;
    }
}