<?php
/**
 * 针对www.paoshu8.com的采集类
 * @author yzxh24
 */
class Site_Paoshuba extends Site_Base
{
    public static $siteUrl = 'http://www.paoshu8.com';
    
    public static $siteName = '泡书吧';
    
    public static $fetchNew = false;
    
    public $lockTime = 1800;
    
    protected $_infoFiles = '';
    
    protected $_typeId = '';
    
    protected $_chapterListUrl = null;
    
    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);

 		preg_match_all('/\d+/i', $detailUrl, $matches);
 		$this->_bookId = intval($matches[0][1]);
    }
    
	/**
 	 * 获取小说整体信息，如简介、ID之类的 
 	 * @return void
 	 */
 	public function getInfoFile()
 	{
 		$infoUrl = self::siteUrl() . '/Book/' . $this->getBookId() . '/Index.aspx';
 		$this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');
 		preg_match('/Html\/Book\/(\d+)\/' . $this->getBookId() . '\/List\.shtm/is', $this->_infoFiles, $typeid);
 		$this->_typeId = intval($typeid[1]);
 	}

    /**/
    public function getFile($url, $inCharset = null, $outCharset = null)
    {
        return self::proxyFetch($url, $inCharset, $outCharset);
    }
    /**/
 	
	/**
 	 * 获取小说名 
 	 * @return string
 	 */
 	public function getBookName()
 	{
		preg_match('/<h1>(.*?)\<\/h1>/is', $this->_infoFiles, $bookname);

		return trim($bookname[1]);
 	}
 	
	/**
 	 * 取得小说作者名
 	 * @return string
 	 */
	public function getAuthor()
	{		
		preg_match('/<a\s*href=\"\/Book\/Search\.aspx\?SearchKey=.*?\">(.*?)\<\/a>/is', $this->_infoFiles, $author);
		
		return trim($author[1]);
	}
	
	/**
	 * 取得小说简介
	 * @return string
	 */
	public function getBookInfo()
	{
		preg_match('/内容简介：<\/span>(.*?)<strong>小说关键字/is', $this->_infoFiles, $bookinfo);
		
		return analyzeText(strip_tags($bookinfo[1]));
	}
	
	/**
	 * 取得小说写作状态
	 * @return int
	 */
	public function getStatus()
	{
	    // 此站没有完结的标示
	    preg_match('/写作情况：(.*?)\<\/td>/is', $this->_infoFiles, $matches);
	    if (!empty($matches[0]) && false !== strpos(trim($matches[0]), '完'))
 		{
 			return 1;
 		}
 		
	    return 0;
	}
	
	/**
	 * 取得章节列表的url 
	 * @return string
	 */
	public function getChapterListUrl()
	{
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }
        
		return $this->_chapterListUrl = self::siteUrl() . '/Html/Book/'. $this->_getTypeId() . '/' . $this->getBookId() . '/List.shtm';
   	}

   	/**
   	 * 设置章节列表URL
   	 * @param string $url
   	 * @return void
   	 */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }
    
	/**
	 * 取得小说所有章节地址
	 * @return array
	 */
	public function getChapterList()
	{
	    $list = array('hasVolume' => true);
        $chapterListContent = $this->getFile($this->getChapterListUrl(), 'gb18030', 'utf-8');
        preg_match_all('/<div\s*id=\"NclassTitle\">(.*?)\<\/ul>/isu', $chapterListContent, $matches);
        foreach ($matches[1] as $value)
        {
            preg_match_all('/(.*?)<\/div>/is', $value, $mm);
            $vName = !empty($mm[1][1]) ? $mm[1][1] : $mm[1][0];
            $volumeName = trim(filter_content($vName));
            preg_match_all('/<li>\s*\<a\s*title=\".*?\"\s*href=\"(\d+\.shtm)\">(.*?)\<\/a>\<\/li>/is', $value, $mmm);
            $urlArray = $mmm[1];
            $titleArray = $mmm[2];
            $array = array();
            if (!empty($urlArray))
            {
                foreach ($urlArray as $key => $value)
                {
                    $array[] = array('url' => $value, 'title' => $titleArray[$key]);
                }
            }
            $list['chapters'][][$volumeName] = $array;
        }
        $this->setChapterArray($list);

        return $list;
	}
    
	/**
	 * 取得章节名
	 * @param string $chapterContent
	 * @return string
	 */
	public function getChapterName($chapterContent)
	{
		preg_match('/<span class=\"newstitle\">(.*?)\<\/span>/is', $chapterContent, $chaptername);
		
		return $this->analyzeChapterName(str_replace(array('正文','作品','分卷阅读'), '', $chaptername[1]));
	}
	
	/**
	 * 从原始的单一章节内容中过滤出章节内容 
	 * @param string $chapterContent
	 * @return string
	 */
	public function filterChapterContent($chapterContent)
	{
	    $chapterContent = preg_replace('/<div style=[\'|\"]display:none[\'|\"]>(.*?)\<\/div>/is', '', $chapterContent);
		preg_match('/<div\s*id=\"BookText\">(.*?)\<\/div>/is', $chapterContent, $content);
		$result = preg_replace('/<\/div>/is', '', $content[1]);
		
		/**/
		$result = rtrim($result, "<br>");
		$result = rtrim($result, "<br>");
		$result = rtrim($result, "<br>");
		/**/
		$result = $this->analyzeContent($result);
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "&nbsp;");
		$result = rtrim($result, "<br/>");
		$result = rtrim($result, "<br/>");
		
		return $result;
	}
	
	/**
	 * 取得单一章节内容
	 * @param $contentUrl
	 * @return string
	 */
	public function getChapterContent($contentUrl)
	{
        echo $contentUrl;die;
		$content = $this->getFile($contentUrl);
        echo $content;die;

	    preg_match('/charset=(.*?)(\'|\")/i', $content, $charset);
		if(strpos(strtolower($charset[1]), 'utf-8') !== false)
		{
			$content = iconv('gbk', 'utf-8', $content);
		}
		else if(strpos(strtolower($charset[1]), 'gb2312') !== false)
		{
			$content = iconv('GB18030', 'utf-8', $content);
		}

		return $content;
	}
	
	/**
	 * 取得一条完整的章节内容URL
	 * @param string $chapterHtml
	 * @return string
	 */
	public function getChapterContentUrl($chapterHtml)
	{
		$url = self::siteUrl() . '/Html/Book/' . $this->_getTypeId() . '/' . $this->getBookId() . '/' . $chapterHtml;
		
		return $url;
	}

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match('/[0-9]{6,}\.(gif|jpg|png)/is', $chapterContent) || preg_match('/front\.gif/is', $chapterContent))
        {
            return true;
        }
        
        return false;
    }

    /**
     * 检查内容是否可以保存
     * @param string $chapterContent
     * @return boolean
     */
    public function isCanSave($chapterContent)
    {
        return !$this->isImage($chapterContent);
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return self::siteUrl() . $image;
    }

    /**
	 * 抓取封面地址
	 * @return array
	 */
	public function getCoverUrl()
	{
		preg_match('/<img src=\"(\/DownFiles\/Book\/BookCover\/.*?\.(gif|jpg))\"/is', $this->_infoFiles, $cover);
        if(!empty($cover[1]) && false !== strpos($cover[1], 'DownFiles'))
        {
            $url = explode('/', $cover[1]);
            $name = $url[count($url) - 1];
            if (!empty($name) && 'noimg.gif' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[1];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }

     	return $return;
	}
	
	/**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/Book/Search.aspx';
        $post = array(
            'SearchClass' => 1,
            'SearchKey' => iconv('utf-8','gb18030',$bookName),
        );
        
        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        curl_close($handle);
        
        if (empty($output) || !preg_match_all('/<div id=\"CListTitle\">(.*?)\<\/div>/i', $output, $matchs))
        {
            return false;
        }

        $authorPattern = '/'.$authorName.'/i';
        $bookPattern = '/'.$bookName.'/i';
        $urlPattern = '/Book\/\d+\/Index.aspx/i';
        
        foreach ($matchs[1] as $match)
        {
            $match = iconv('gb18030', 'utf-8', $match);
            if (false !== strpos($match, $bookName) && false !== strpos($match, $authorName))
            {
                preg_match($urlPattern, $match, $url);
                return self::siteUrl() . '/' . $url[0];
            }
        }
        
        return false;
    }
    
	/**
 	 * 返回更新列表地址，主要用于监控更新
 	 * @return string
 	 */
 	public static function getListUrl()
 	{
 	    return self::siteUrl() . '/Book/ShowBooklist.aspx';
 	}
 	
 	public function getContentRegex()
 	{
 	    return array(
 	        '(\（|\()?电( ){0,}脑( ){0,}阅( ){0,}读(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
            //'(\(|《)?(1|１|⑴)\W+(6|６|⑹)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|》)?',
        	'(\()?全文字(.*?)(學網|学网)(\)|）)?',
        	'(<|《)?16(k|K)小(说|說)网(.*?)\.(сΝ|om|ｃｎ|Сｎ|ＣＮ|c-n|M|оМ)(>|》)',
        	'(\()?本书转载(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)[\)]?',
        	'１６(.*?)小(说|說)网',
        	'(\()?(小说整理|电脑阅读)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)(\)|）)?',
        	'(\（|\()?电脑看小说(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?更\/新\/(最|超)\/快(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?请记住我们(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
        	'(\（|\()?web用户(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
            '1⑹(.*?)(整理|首发)',
        	'[x|X][s|S]?(.*?)学网[\)]?',
        	'本(書|书)(.*?)(學|学)(網|网)',
        	'更\/新\/最\/快',
            '更新最快(.*?)\]',
            '手机轻松(.*?)整理',
            'wap(.*?)\|(k|K)',
            '(\(|\（|\[)www.16kbook.com(\)|\）|\])',
            '(手机访问|手机看小说|ｗ-α-р|ｗｗ`ｗ|ｗ-а-р|ωωω|ｗｗｗ|ｗａｐ|⑴⑹|ω)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)[\)]?',
            '\[ww(.*?)m\]',
            '本章节由(.*?)书友上传',
            '\*(.*?)吧\*',
            '(\(|（|\*)(w|W|ｗ|ω)(.*?)(\)|）|\*)',
            '╔(.*?)╝',
 	        '\(www(.*?)com\小说网\)',
 	        '\(www(.*?)com\)',
 	        '(\\\|\/|\|\/\\\)(.*?)(\\\|\/|\/\\\|)',
 	        '(\|\/\\\)*p[-|_|~|\s]*a(.*?)(c|ｃ|Ｃ)[-|_|~|\s]*(o|ｏ)[-|_|~|\s]*(m|Ｍ)(\/\\\|)*',
 	        'p[-|_|~|\s]*a(.*?)(c|ｃ|Ｃ)[-|_|~|\s]*(o|ｏ)[-|_|~|\s]*(m|Ｍ)',
 	        '泡(-|_|\*)?书(-|_|\*)?吧首\W发',
 	        '泡(-|_|\*)?书(-|_|\*)?吧',
 	        '(\(|（)ww(.*)c\s*o\s*m(\)|）)',
 	        'www.paoshu8.com',
 	        'paoshu8',
            '(\(|（|\*)(w|W)(.*?)吧(\)|）|\*)'
 	    );
 	}
 	
 	public function getChaperNameRegex()
 	{
 	    return array(
 	        //'(|(w|ω|W)(.*))?(ㄧ|一|1|１|1|l|⑴|①)(.*)(m|M|Ｍ|М|ｍ|n|ｎ|m|网|網|传)',
    		//'(www.|ωωω.)*(1|１|１|ㄧ)\s*(6|６|б)(.*)[com|ｃom|om|m]',
    		//'(１|1|l|①|⑴)\s*(6|６|б)(.*)s',
 	    );
 	}
 	
 	/**
 	 * 返回更新列表
 	 * @param int $page
 	 * @return array
 	 */
 	public static function getNewList($page = 1)
 	{
 	    $url = self::getListUrl() . '?page=' . $page;
        $content = self::proxyFetch($url);
 	    $content = iconv('gbk', 'utf-8', $content);
 	    //preg_match('/<div class=\"con\" style=\"background:none\">(.*?)\<\/div>/is', $content, $matches);
 	    //preg_match_all('/<a class=\"f14\"\s*href=\"(.*?)\"\s*target=\"_blank\">(.*?)\<\/a>/is', $matches[1], $match);
 	    preg_match_all('/<a href="(\/Book\/\d+\/Index\.aspx)\"><font\s*color=\"#006699\">(.*?)\<\/font>\<\/a>/is', $content, $match);
 	    $urlArray = $match[1];
 	    $nameArray = $match[2];
 	    
 	    $result = array();
 	    foreach ($urlArray as $key => $value)
 	    {
 	        $result[self::siteUrl() . $value] = trim(strip_tags($nameArray[$key]));
 	    }

 	    return $result;
 	}
    
	/**
	 * 取得目标小说所属的分类ID
	 * @return int 
	 */
	private function _getTypeId()
	{
        $typeid = $this->_typeId;
	    if (empty($typeid))
        {
            if (!is_null($this->_chapterListUrl))
            {
                preg_match('/Html\/Book\/(\d+)\/' . $this->getBookId() . '\/List\.shtm/is', $this->getChapterListUrl(), $tmp);
                $typeid = $tmp[1];
            }

            if ($typeid !== '0' && empty($typeid))
            {
                if (empty($this->_infoFiles))
                {
                    $this->getInfoFile();
                }
                preg_match('/Html\/Book\/(\d+)\/' . $this->getBookId() . '\/List\.shtm/is', $this->_infoFiles, $tmp);
                $typeid = $tmp[1];
            }
	    }
		
		return $typeid;
	}
	
    /**
     * 返回本站与采集站点分类之间的对应关系
     * @return int
     */
    public function getType()
    {
        preg_match('/<a href=\"\/Book\/LN\/\d+\.aspx\">(.*?)\<\/a>/is', $this->_infoFiles, $category);
        $type = trim($category[1]);
        
        if ($type == '网络游戏' || $type == '幻想网游' || $type == '虚拟网游' || $type == '游戏生涯' || $type == '游戏异界')
        {
            return 12;
        }
    	else if ($type == '电子竞技' || $type == '游戏竞技' || $type == '体育竞技' || $type == '网游竟技' || $type == '足球运动' || $type == '篮球运动')
    	{
    		return 27;
    	}
        else if ($type == '都市生活' || $type == '白领生涯' || $type == '商海沉浮' || $type == '都市激战' || $type == '现代文学' || $type == '官场沉浮' || $type == '黑道风云' || $type == '都市' || $type == '都市娱乐' )
        {
            return 6;
        }
    	else if ($type == '都市异能' || $type == '都市重生')
    	{
    		return 5;
    	}
        else if ($type == '浪漫言情' || $type == '恩怨情仇' || $type == '都市言情' || $type == '言情小说'  || $type == '言情')
        {
            return 3;
        }
        else if ($type == '西方奇幻' || $type == '东方玄幻' || $type == '玄幻小说' || $type == '转世重生' || $type == '玄幻' || $type == '奇幻玄幻')
        {
            return 2;
        }
    	else if ($type == '玄幻魔法' || $type == '异术超能' || $type == '奇幻魔法' || $type == '变身情缘' || $type == '玄幻魔幻')
    	{
    		return 28;
    	}
    	else if ($type == '异世大陆' || $type == '异界大陆')
    	{
    		return 25;
    	}
        else if ($type == '两晋隋唐' || $type == '两宋元明' || $type == '架空历史' || $type == '三国梦想' || $type == '历史穿越' || $type == '历史小说' || $type == '历史传记' || $type == '军事历史' || $type == '历史军事' || $type == '穿越女尊' || $type == '两宋元明' || $type == '架空' || $type == '秦汉三国')
        {
            return 11;
        }
        else if ($type == '快意江湖' || $type == '传统武侠' || $type == '武侠同人' || $type == '美文同人' || $type == '历史武侠')
        {
            return 1;
        }
        else if ($type == '奇幻修真' || $type == '现代修真')
        {
            return 16;
        }
    	else if ($type == '古典仙侠' || $type == '仙侠异能' || $type == '仙侠奇侠' || $type == '武侠仙侠' || $type == '浪子异侠' || $type == '洪荒封神')
    	{
    		return 26;
    	}
        else if ($type == '灵异鬼怪' || $type == '推理灵异' || $type == '推理侦探' || $type == '灵异神怪' || $type == '恐怖惊悚')
        {
            return 8;
        }
    	else if ($type == '科幻世界' || $type == '科幻灵异' || $type == '科幻冒险' || $type == '科幻动漫' || $type == '机器时代' || $type == '星际战争' || $type == '数字生命' || $type == '骇客时空' || $type == '末世危机')
    	{
    		return 7;
    	}
        else if ($type == '千千心结')
        {
            return 4;
        }
    	else if ($type == '外国经典' || $type == '中国名著')
        {
            return 9;
        }
    	else if ($type == '战争幻想' || $type == '战争风云' || $type == '军事战争' || $type == '现代战争' || $type == '抗战烽火')
        {
            return 10;
        }
    	else if ($type == '青春校园' || $type == '热血青春' || $type == '菁菁校园')
        {
            return 13;
        }
    	else if ($type == '纯爱耽美')
        {
            return 14;
        }
    	else if ($type == '都市情感' || $type == '激情生活')
        {
            return 15;
        }
    	else if ($type == '动漫同人' || $type == '激情生活')
        {
            return 39;
        }
        else 
        {
            return 42;
        }
    }


    public static function siteUrl()
    {
        return self::$siteUrl;
    }

    public static function siteName()
    {
        return self::$siteName;
    }
}
