<?php
namespace collect\site;

class Xiaoshuomm extends Base
{
    public static $siteUrl = 'http://www.xiaoshuomm.com';
    
    public static $siteName = '小说MM';
    
    public static $continuance = true;
    
    protected $_infoFiles = '';
    
    protected $_typeId = '';
    
    protected $_chapterListUrl = null;
    
    protected $_denyBooks = array();
    
    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);

        preg_match('/id=(\d+)/i', $detailUrl, $match);
        $this->_bookId = $match[1];
    }
    
	/**
 	 * 获取小说整体信息，如简介、ID之类的 
 	 * @return void
 	 */
 	public function getInfoFile()
 	{
        $infoUrl = static::siteUrl() . '/modules/article/articleinfo.php?id='.$this->getBookId();
 		$this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');
        preg_match('/<li><a\s*href=\"http:\/\/www\.xiaoshuomm\.com\/files\/article\/html\/(\d+)\/\d+\/\"\s*class=\"btnlink\">点击阅读\<\/a>\<\/li>/is', $this->_infoFiles, $match);
        $this->_typeId = $match[1];
 	}
 	
	/**
 	 * 获取小说名 
 	 * @return string
 	 */
 	public function getBookName()
 	{
		preg_match('/<span\s*style=\"font-size:16px;\s*font-weight:\s*bold;\s*line-height:\s*150%\">(.*?)\<\/span>/i', $this->_infoFiles, $book);
        
		return str_replace('TXT下载', '', trim($book[1]));
 	}
 	
	/**
 	 * 取得小说作者名
 	 * @return string
 	 */
	public function getAuthor()
	{		
		preg_match('/<td width=\"25%\">作&nbsp;&nbsp;&nbsp;\s*者：\s*(.*?)\<\/td>/i', $this->_infoFiles, $author);
		$value = '网络转载';
		if (!empty($author[1]))
		{
            $value = trim($author[1]);
		}

		return $value;
	}
	
	/**
	 * 取得小说简介
	 * @return string
	 */
	public function getBookInfo()
	{
		$pattern = '/内容简介：(.*?)\<\/p>/is';
        preg_match_all($pattern, $this->_infoFiles, $info);

		return analyzeText($info[1][0]);
	}
	
	/**
	 * 取得小说写作状态
	 * @return int
	 */
	public function getStatus()
	{
	    preg_match('/<td>文章状态：\s*(.*?)\<\/td>/i', $this->_infoFiles, $status);
 		if (!empty($status[0]) && false !== strpos(trim($status[0]), '完'))
 		{
 			return 1;
 		}
 		
 		return 0;
	}
	
	/**
	 * 取得章节列表的url 
	 * @return string
	 */
	public function getChapterListUrl()
	{
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }

        return $this->_chapterListUrl = self::siteUrl() . '/files/article/html/' . $this->_getTypeId() . '/' . $this->getBookId() . '/';
   	}

   	/**
   	 * 设置章节列表URL
   	 * @param string $url
   	 * @return void
   	 */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }
    
	/**
	 * 取得小说所有章节地址
	 * @return array
	 */
	public function getChapterList()
	{
	    $list = array('hasVolume' => false);
        $chapterListContent = $this->getFile($this->getChapterListUrl(), 'gb18030', 'utf-8');
        preg_match_all('/<a\s*href=\"(\d+\.html)\">(.*?)\<\/a>/is', $chapterListContent, $matches);
        $urlArray = $matches[1];
        $titleArray = $matches[2];
        foreach ($urlArray as $key => $value)
        {
            $chapters[] = array('url' => $this->getChapterListUrl() . $value, 'title' => $titleArray[$key]);
        }
        $list['chapters'] = $chapters;
        $this->setChapterArray($list);

        return $list;
	}
	
	/**
	 * 取得章节名
	 * @param string $chapterContent
	 * @return string
	 */
	public function getChapterName($chapterContent)
	{
		preg_match('/<p>(.*?)\<\/p>/i', $chapterContent, $chaptername);

		return $this->analyzeChapterName(str_replace(array('作品正文', '正文'), '', $chaptername[1]));
	}
	
	/**
	 * 从原始的单一章节内容中过滤出章节内容 
	 * @param string $chapterContent
	 * @return string
	 */
	public function filterChapterContent($chapterContent)
	{
	    preg_match('/<div id=\"content\">(.*?)\<\/p>/is', $chapterContent, $match);
        $content = str_replace(array("<p>", '</br>'), array('<br/>', ''), $match[1]);
        
        return $this->analyzeContent($content);
	}
	
	/**
	 * 取得单一章节内容
	 * @param $contentUrl
	 * @return string
	 */
	public function getChapterContent($contentUrl)
	{
		$content = $this->getFile($contentUrl, 'gb18030', 'utf-8');

		return $content;
	}
	
	public function getContentRegex()
	{
	    return array(
	        '\([ww](.+?)(小说|文字|更新).+?\)',
            '手机阅读(.+?)(}|com|coｍ)',
            '小(\^)说(\*|\^)?(mm|MM|mM|Mm)(.+?)(com|更新|coｍ|o m|更新!)',
            '(ｗwｗ|wｗw|wwW|Www)(.+?)(提供|供)',
            '免费文字更新',
            '精彩小说',
            '(ｗwｗ|wｗw|wwW|Www)\.xiaoshuomm\.(com|更新|coｍ|o m|更新!)',
            '(ｗwｗ|wｗw|wwW|Www|www)\.(.+?)\.(com|更新|coｍ|o m|更新!)',
            '小说\*(MM|mm|Mm|mM)书友整理提供',
	        '\[小说(.*?)(奉献|提供|更新)\]',
            '文字更新最快',
            '(\(|（)未完待续(.*?)网阅读！?(\)|）)'
	    );
	}
	
	/**
	 * 取得一条完整的章节内容URL
	 * @param string $chapterHtml
	 * @return string
	 */
	public function getChapterContentUrl($chapterHtml)
	{
	    //return self::siteUrl() . '/files/article/html/' . $this->_getTypeId() . '/' . $this->getBookId() . '/' . $chapterHtml;
        return $chapterHtml;
	}

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match_all('/\/(DownFiles|attachment)(.*?)\.(gif|jpg)/i', $chapterContent, $images))
        {
            return $images[0];
        }
        
        return false;
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return self::siteUrl() . $image;
    }

    /**
	 * 抓取封面地址
	 * @return array
	 */
	public function getCoverUrl()
	{
        $return = false;
        $pattern = '/\/files\/article\/image\/'.$this->_typeId.'\/'.$this->getBookId().'\/'.$this->getBookId().'s\.(gif|jpg|png|bmp)/i';
        preg_match($pattern, $this->_infoFiles, $cover);
     	if(!empty($cover[0]))
        {
            $url = explode('/', $cover[0]);
            $name = $url[count($url) - 1];
            if (!empty($name) && 'nocover.jpg' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[0];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }

        return $return;
	}
	
	/**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/modules/article/search.php';
        $post = array(
            'searchtype' => 'articlename',
            'searchkey' => iconv('utf-8','gbk',$bookName),
        );

        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_HEADER, 1);
        //curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        //$tempinfo = curl_multi_getcontent($handle);
        curl_close($handle);
        
        if (empty($output))
        {
            return false;
        }

        // 先检查header里是否有302跳转，有的话表示能搜到
        $pattern = str_replace(array('.','?','/'),array('\.','\?','\/'),self::siteUrl()) . '\/mminfo\/\d+\/\d+\.html';
        $pattern = '/Location:\s(' . $pattern . ')/i';
        if (preg_match($pattern, $output, $matchs))
        {
            // 单个结果时杰奇系统会自动跳转到结果页面
            return $matchs[1];
        }
        else
        {
            // 结果有多个
            preg_match_all('/<tr>(.*?)\<\/tr>/is', $output, $matches);
            if (empty($matches[1]))
            {
                return false;
            }

            $authorPattern = '/' . $authorName . '/i';
            $bookPattern = '/' . $bookName . '/i';
            $urlPattern = '/mminfo\/\d+\/\d+\.html/i';
            foreach ($matches[1] as $matche)
            {
                $matche = iconv('gbk', 'utf-8', $matche);
                if (false !== strpos($matche, $bookName) && false !== strpos($matche, $authorName))
                {
                    preg_match($urlPattern, $matche, $url);
                    return self::siteUrl() . '/' . $url[0];
                }
            }
        }
        
        return false;
    }
    
	/**
 	 * 返回更新列表地址，主要用于监控更新
 	 * @return string
 	 */
 	public static function getListUrl()
 	{
 	    return self::siteUrl() . '/modules/article/toplist.php?sort=lastupdate';
 	}
 	
 	/**
 	 * 返回更新列表
 	 * @param int $page
 	 * @return array
 	 */
 	public static function getNewList($page = 1)
 	{
        $u1 = static::siteUrl() . '/wap/article/toplist.php?class=0&sort=lastupdate&page=1';
        $u2 = 'http://www.xiaoshuomm.com/wap/article/toplist.php?class=0&sort=lastupdate&page=2';
        $c1 = getFile($u1);
        $c2 = getFile($u2);
        $content = $c1 . $c2;
        preg_match_all('/<a\s*href=\"articleinfo\.php\?id=(\d+)\">《(.*?)》\<\/a>/is', $content, $matches);
        $urlArray = $matches[1];
 	    $nameArray = $matches[2];
        $result = array();
 	    foreach ($urlArray as $key => $value)
 	    {
            //$u = static::siteUrl() . '/detail-'.$value.'.html';
            $u = static::siteUrl() . '/modules/article/articleinfo.php?id='.$value;
 	        $result[$u] = $nameArray[$key];
 	    }

        return $result;
 	}
 	
	/**
	 * 取得目标小说所属的分类ID
	 * @return int 
	 */
	private function _getTypeId()
	{
	    $typeid = $this->_typeId;
        if (empty($typeid))
        {
            if (!is_null($this->_chapterListUrl))
            {
                preg_match('/files\/article\/html\/(\d+)\/'.$this->getBookId().'\//is', $this->getChapterListUrl(), $tmp);
                $typeid = $tmp[1];
            }
            
            if ($typeid !== '0' && empty($typeid))
            {
                if (is_null($this->_infoFiles))
                {
                     $this->_infoFiles = $this->getInfoFile();
                }
                preg_match('/files\/article\/html\/(\d+)\/'.$this->getBookId().'\//is', $this->_infoFiles, $tmp);

                $typeid = $tmp[1];
            }
        }

		return $typeid;
	}
	
    /**
     * 返回本站与采集站点分类之间的对应关系
     * @return int
     */
    public function getType()
	{
        preg_match('/<td width=\"25%\">类&nbsp;&nbsp;&nbsp;\s*别：\s*(.*?)\<\/td>/i', $this->_infoFiles, $category);
        $type = trim($category[1]);

	if ($type== '传统武侠' || $type== '武侠小说' || $type== '武侠修真' || $type== '国术武技' || $type== '谐趣武侠' || $type== '历史武侠' || $type== '武侠')
	    {
            return 1;
        }
        if ($type == '网络游戏' || $type == '幻想网游' || $type == '网游动漫' || $type == '虚拟网游' || $type == '游戏生涯'|| $type == '游戏异界'|| $type == '网游动漫'|| $type == '游戏小说' || $type== '游戏')
	    {
	        return 12;
	    }
		else if ($type == '电子竞技' || $type == '游戏竞技' || $type == '体育竞技' || $type == '网游竟技' || $type == '篮球运动' || $type == '竞技小说' || $type == '足球运动'|| $type == '弈林生涯' || $type== '竞技')
		{
			return 27;
		}
	    else if ($type == '都市生活' || $type == '白领生涯'|| $type == '都市言情' || $type == '商海沉浮' || $type == '都市激战' || $type == '官场沉浮'|| $type == '娱乐明星'|| $type == '宦海风云'|| $type == '商战风云'|| $type == '现实百态'|| $type == '乡土小说'|| $type == '职场励志'|| $type == '都市小说'|| $type == '合租情缘'|| $type == '谍战特工' || $type== '都市')
	    {
	        return 6;
	    }
		else if ($type == '都市异能' || $type == '都市重生'|| $type == '异能奇术')
		{
			return 5;
		}
	    else if ($type == '浪漫言情' || $type == '恩怨情仇' || $type == '都市言情' || $type == '言情小说' || $type == '古代言情' || $type == '现代言情' || $type == '快意江湖')
	    {
	        return 3;
	    }
	    else if ($type == '西方奇幻' || $type == '玄幻魔法' || $type == '东方玄幻'|| $type == '玄幻魔幻'  || $type == '玄幻小说' || $type == '转世重生'|| $type == '异界征战'|| $type == '异类兽族'|| $type == '领主贵族'|| $type == '远古神话'|| $type == '王朝争霸' || $type== '玄幻' || $type== '奇幻')
	    {
	        return 2;
	    }
		else if ($type == '异术超能' || $type == '奇幻魔法' || $type == '变身情缘'|| $type == '魔法校园')
		{
			return 28;
		}
		else if ($type == '异世大陆'|| $type == '异界大陆')
		{
			return 25;
		}
	    else if ($type == '架空历史' || $type == '三国梦想' || $type == '历史军事'|| $type == '历史穿越' || $type == '历史小说' || $type == '历史传记' || $type == '军事历史' || $type == '历史军事'|| $type == '秦汉三国'|| $type == '上古先秦'|| $type == '两宋元明'|| $type == '两晋隋唐'|| $type == '清史民国'|| $type == '外国历史'|| $type == '历史传记' || $type== '历史')
	    {
	        return 11;
	    }
	    else if ($type == '奇幻修真' || $type == '现代修真')
	    {
	        return 16;
	    }
		else if ($type == '古典仙侠' || $type == '洪荒封神' || $type == '武侠仙侠' || $type == '浪子异侠' ||$type == '仙侠小说' || $type== '仙侠')
		{
			return 26;
		}
	    else if ($type == '灵异小说' || $type == '推理灵异' || $type == '恐怖灵异' || $type == '侦探推理' || $type == '推理侦探' || $type == '灵异神怪' || $type == '恐怖惊悚'|| $type == '灵异奇谈'|| $type == '灵异恐怖'|| $type == '悬疑探险' || $type== '灵异')
	    {
	        return 8;
	    }
		else if ($type == '科幻世界' || $type == '科幻灵异' || $type == '科幻冒险' || $type == '科幻动漫' || $type == '机器时代' || $type == '星际战争' || $type == '数字生命' || $type == '骇客时空' || $type == '进化变异'|| $type == '未来世界'|| $type == '科幻小说'|| $type == '时空穿梭'|| $type == '古武机甲'|| $type == '末世危机' || $type== '科幻')
		{
			return 7;
		}
        else if ($type == '千千心结')
	    {
	        return 4;
	    }
		else if ($type == '外国经典' || $type == '中国名著'|| $type == '现代文学'|| $type == '戏剧名著')
	    {
	        return 9;
	    }
		else if ($type == '战争幻想' || $type == '战争风云' || $type == '军事战争' || $type == '现代战争'|| $type == '特种军旅'|| $type == '抗战烽火'|| $type == '军旅生活'|| $type == '军事小说' || $type== '军事')
	    {
	        return 10;
	    }
		else if ($type == '青春校园' || $type == '热血青春' || $type == '菁菁校园' || $type== '青春')
	    {
	        return 13;
	    }
		else if ($type == '纯爱耽美')
	    {
	        return 14;
	    }
		else if ($type == '都市情感' || $type == '激情生活'|| $type == '爱情婚姻')
	    {
	        return 15;
	    }
        else if ($type== '动漫同人' || $type== '武侠同人' || $type== '小说同人' || $type== '影视同人' || $type== '同人小说' || $type== '授权同人' || $type== '同人')
	    {
	        return 39;
	    }
        else if ($type== '影视剧本' || $type== '动漫剧本' || $type== '剧本')
	    {
	        return 40;
	    }
        else if ($type== '美文小说' || $type== '诗词散曲'|| $type== '美文散文' || $type== '散文诗词'|| $type== '童话寓言'|| $type== '休闲美文'|| $type== '文集评论'|| $type== '短篇小说'|| $type== '杂文笔札'|| $type== '游记杂文' || $type== '经典' || $type== '名著' || $type== '美文')
	    {
	        return 41;
	    }
	    else
	    {
	        return 42;
	    }
    }
}