<?php
namespace collect\site;

class Banyt extends Base
{
    public static $siteUrl = 'http://www.8yt.org';

    public static $siteName = '8yt';

    public $lockTime = 900;

    protected $_infoFiles = '';

    protected $_chapterListUrl = null;

    protected $_denyBooks = array(9477);

    public function __construct($detailUrl)
    {
        parent::__construct($detailUrl);

         preg_match('/Book\/(\d+)\.aspx/i', $detailUrl, $matches); 
         $this->_bookId = intval($matches[1]);
    }

    /**
      * 获取小说整体信息，如简介、ID之类的
      * @return void
      */
     public function getInfoFile()
     {
         $infoUrl = self::$siteUrl . '/Book/' . $this->_bookId . '.aspx';        
         $this->_infoFiles = $this->getFile($infoUrl, 'gbk', 'utf-8');         
     }

    /**
      * 获取小说名
      * @return string
      */
     public function getBookName()
     {
        preg_match('/<h1>(.*)</h1>/is', $this->_infoFiles, $bookname);

        return trim($bookname[1]);
     }

    /**
      * 取得小说作者名
      * @return string
      */
    public function getAuthor()
    {
        preg_match('/<font color="green">(.*?)<\/font><\/span>/i', $this->_infoFiles, $author);
        return trim($author[1]);
    }

    /**
     * 取得小说简介
     * @return string
     */
    public function getBookInfo()
    {
        preg_match('/<div class="intro">(.*?)<\/div>/is', $this->_infoFiles, $bookinfo);
        if(!empty($bookinfo))
        {
             return analyzeText($bookinfo[1]);
        }
        else
        {
            return '';
        }
       
    }

    /**
     * 取得小说写作状态
     * @return int
     */
    public function getStatus()
    {
        preg_match('/<font color="orange">(.*?)<\/font>/i', $this->_infoFiles, $status);  
       
        if ($status[1] == '连载')
        {
            return 0;
        }

        return 1;
    }

    /**
     * 取得章节列表的url
     * @return string
     */
    public function getChapterListUrl()
    {
        if (!is_null($this->_chapterListUrl))
        {
            return $this->_chapterListUrl;
        }

        preg_match('/<div class="l"><a href="(\/Read\/\d+\/\d+\/Index\.shtml)" class="l btn"><span>阅读<\/span><\/a>/i',$this->_infoFiles,$match);

        return $this->_chapterListUrl = self::$siteUrl.trim($match[1]);
    }

       /**
        * 设置章节列表URL
        * @param string $url
        * @return void
        */
    public function setChapterListUrl($url)
    {
        $this->_chapterListUrl = $url;
    }

    /**
     * 取得小说所有章节地址
     * @return array
     */
    public function getChapterList()
    {
        $list = array('hasVolume' => false);
        $chapterListContent = $this->getFile($this->getChapterListUrl(), 'gb18030', 'utf-8');
        preg_match_all('/<div class="Volume">.*?<\/div>(.*?)<\/div>/is', $chapterListContent, $matches);
        preg_match_all('/<li><a href="(\d+\.shtml)" title=".*?">(.*?)<\/a><\/li>/is',$chapterListContent, $mmm);
        $urlArray = $mmm[1];
        $titleArray = $mmm[2];
        $array = array();

        if (!empty($urlArray))
        {
            foreach ($urlArray as $key => $value)
            {
                $array[] = array('url' => $value, 'title' => $titleArray[$key]);
            }
        }

        $list['chapters'] = $array;
        /**
        preg_match_all('/<div class="Volume">.*?<\/div>(.*?)<\/div>/is', $chapterListContent, $matches);
        foreach($matches[0] as $value)
        {
            preg_match('/<h6>(.*)<\/h6>/i',$value,$match);
            $vName = $match[1];
            $volumeName = trim(filter_content($vName));
            preg_match_all('/<li><a href="(\d+\.shtml)" title=".*?">(.*?)<\/a><\/li>/is',$value,$mmm);
            $urlArray = $mmm[1];
            $titleArray = $mmm[2];
            $array = array();
          
            if (!empty($urlArray))
            {
                foreach ($urlArray as $key => $value)
                {
                    $array[] = array('url' => $value, 'title' => $titleArray[$key]);
                }
            }

            $list['chapters'][$volumeName] = $array;
        }
        /**/
        $this->setChapterArray($list);
        return $list;
    }

    /**
     * 取得章节名
     * @param string $chapterContent
     * @return string
     */
    public function getChapterName($chapterContent)
    {
        preg_match('/<div class="Title">(.*)<\/div>/is', $chapterContent, $chaptername);

        return $this->analyzeChapterName(str_replace(array('正文','作品','分卷阅读'), '', $chaptername[1]));
    }

    /**
     * 从原始的单一章节内容中过滤出章节内容
     * @param string $chapterContent
     * @return string
     */
    public function filterChapterContent($chapterContent)
    {
        preg_match('/<div id="booktxt" class="booktxt">(.*?)<\/div>/is', $chapterContent, $content);
        $result = $content[1]; 
        $result = $this->analyzeContent($result);
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "<br/>");
        $result = rtrim($result, "<br/>");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "&nbsp;");
        $result = rtrim($result, "<br/>");
        $result = rtrim($result, "<br/>");
        $result = str_replace('&amp;&amp;&amp;&amp;','',$result);
        return $result;
    }

    /**
     * 取得单一章节内容
     * @param $contentUrl
     * @return string
     */
    public function getChapterContent($contentUrl)
    {
        $content = $this->getFile($contentUrl, 'gb18030', 'utf-8');
        
        preg_match('/charset=(.*?)(\'|\")/i', $content, $charset);
        if(strpos(strtolower($charset[1]), 'utf-8') !== false)
        {
            $content = iconv('gbk', 'utf-8', $content);
        }
        else if(strpos(strtolower($charset[1]), 'gb2312') !== false)
        {
            //$content = iconv('gbk', 'utf-8', $content);
        }  
        return $content;
    }

    /**
     * 取得一条完整的章节内容URL
     * @param string $chapterHtml
     * @return string
     */
    public function getChapterContentUrl($chapterHtml)
    {
        $url = str_replace('Index.shtml', $chapterHtml, $this->getChapterListUrl());
        return $url;
    }

    /**
     * 判断内容是否为图片
     * @param string $chapterContent
     * @return boolean
     */
    public function isImage($chapterContent)
    {
        if (preg_match('/DownFiles\/Book/is', $chapterContent))
        {
            return true;
        }
        return false;
    }

    /**
     * 获取一个完整的图片地址
     * @param string $image
     * @return string
     */
    public function getImageUrl($image)
    {
        return self::siteUrl() . $image;
    }

    /**
     * 抓取封面地址
     * @return array
     */

    /*public function getCoverUrl()
    {
        $return = false;
        preg_match('/<div class="bookcover"><a href=.*><img src="(.*)"><\/a><\/div>/i', $this->_infoFiles, $cover);
        if(!empty($cover[1]) && false !== strpos($cover[1], 'DownFiles'))
        {
            $url = explode('/', $cover[1]);
         
            $name = $url[count($url) - 1];
            if (!empty($name) && 'noimg.gif' != $name)
            {
                $fileext = array_pop(explode('.', $name));
                $imgurl = self::siteUrl() . $cover[1];
                $return = array('url'=>$imgurl,'fileext'=>$fileext);
            }
        }
         return $return;
    }*/

    /**
     * 在目标站点搜索小说
     * @param string $bookName
     * @param string $authorName
     * @return string|boolean
     */
    public static function searchBook($bookName, $authorName)
    {
        $searchUrl = self::siteUrl() . '/Book/Search.aspx';
        $post = array(
            'SearchClass' => 1,
            'SearchKey' => iconv('utf-8','gb18030',$bookName),
        );

        $handle = curl_init();
        curl_setopt($handle, CURLOPT_URL, $searchUrl);
        curl_setopt($handle, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($handle, CURLOPT_POST, 1);
        curl_setopt($handle, CURLOPT_POSTFIELDS, $post);
        $output = curl_exec($handle);
        curl_close($handle);

        if (empty($output) || !preg_match_all('/<div id=\"CListTitle\">(.*?)\<\/div>/i', $output, $matchs))
        {
            return false;
        }

        $authorPattern = '/'.$authorName.'/i';
        $bookPattern = '/'.$bookName.'/i';
        $urlPattern = '/Book\/\d+\/Index.aspx/i';

        foreach ($matchs[1] as $match)
        {
            $match = iconv('gb18030', 'utf-8', $match);
            if (false !== strpos($match, $bookName) && false !== strpos($match, $authorName))
            {
                preg_match($urlPattern, $match, $url);
                return self::siteUrl() . '/' . $url[0];
            }
        }

        return false;
    }

    /**
      * 返回更新列表地址，主要用于监控更新
      * @return string
      */
     public static function getListUrl($page)
     {
         return 'http://www.8yt.org/Book/ShowBookList.aspx?page='.$page.'';
     }

     public function getContentRegex()
     {
         return array(
             'www.8yt.org',
             '全文字超速首发!',
             '八月天',
             '八月天小说网',
             '小说网（）',
             '8yt',
             '\[ww(.*?)m\]',
             '本章节由(.*?)书友上传',
             '╔(.*?)╝',
            '☆(.*)☆',
            '♀(.*)♀',
             '\(看小说(.*?)om\)',
             '更多手打(.*?)地址(:|：)?',
             '推荐阅读(：|:)',
             '最新最快更新的网站(：|:)?',
             '┏(.*?)┛',
             '【叶(.*?)】',
             '【(.*?)书】',
             '【悠(.*?)】',
             '看小说(.*?)子书',
             'xt\.com',
             '#(.*?)#',
             '[x|X][s|S]?(.*?)学网[\)]?',
             '(\()?(小说整理|电脑阅读)(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|оМ)(\)|）)?',
             '(\（|\()?电脑看小说(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
             '(\（|\()?更\/新\/(最|超)\/快(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
             '(\（|\()?请记住我们(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|ｍ|оМ)(\)|）)?',
             '(\（|\()?web用户(.*?)(сΝ|om|ｃｎ|Сｎ|m|M|М|ＣＮ|c-n|Ｏｍ|ｍ|оМ)(\)|）)?',
             '本(書|书)(.*?)(學|学)(網|网)',
             '更\/新\/最\/快',
             '手机轻松(.*?)整理',
             '\[ww(.*?)m\]',
             '本章节由(.*?)书友上传',
             '七路中文',
             '\/(ω|ｗ|w|W)(.*?)(ｍ|М|m|M|t)\/',
             '(ω|ｗ|w|W)(.*?)(ｍ|М|m|M|t)',
             '(\（|\()(.*?)手打(\)|）)',
             '\*\*(.*?)网(.*?)下载\*\*',
             'xt点com',
             'xt(.*?)子书',
             '小说阅读下载尽在小说网中文网更新超快小说更多',
             '小说网',
         );
     }

     public function getChaperNameRegex()
     {
         return array(
             //'(|(w|ω|W)(.*))?(ㄧ|一|1|１|1|l|⑴|①)(.*)(m|M|Ｍ|М|ｍ|n|ｎ|m|网|網|传)',
            //'(www.|ωωω.)*(1|１|１|ㄧ)\s*(6|６|б)(.*)[com|ｃom|om|m]',
            //'(１|1|l|①|⑴)\s*(6|６|б)(.*)s',
         );
     }

     /**
      * 返回更新列表
      * @param int $page
      * @return array
      */
     public static function getNewList($page = 1)
     {
         $result = array();
         $url = self::getListUrl($page);
         $content = getFile($url);
         $content = iconv('gbk', 'utf-8',$content); 
         
         //$content = self::proxyFetch($url,'gb18030', 'utf-8');
         if(!empty($content)){           
             preg_match('/<ul class="listbox" id="listbox">(.*?)\<\/ul>/is', $content, $matches);
             preg_match_all('/《<a href="(\/Book\/\d+\.aspx)" target="_blank">(.+?)<\/a>》/i', $matches[1], $match);
             $urlArray = $match[1];
             $nameArray = $match[2];
             foreach ($urlArray as $key => $value)
             {
                 $result[self::$siteUrl.$value] = $nameArray[$key];
             }
         }
         return $result;
     }

    public function isCanSave($content)
    {
        $content = str_replace('&nbsp;', ' ', $content);
        $content = preg_replace('/\s|\t|\n|\r/is', '', $content);
        $content = str_replace(' ', '', $content);
        if (empty($content))
        {
            return false;
        }
        $len = mb_strlen($content, 'utf-8');
        if ($len < 100 || preg_match('/decoration/is', $content))
        {
            return false;
        }

        return true;
    }


    /**
     * 返回本站与采集站点分类之间的对应关系
     * @return int
     */
    public function getTypeId()
    {
        preg_match('/<i><a href="\/Book\/LC\/\d+\.aspx">(.*?)<\/a><\/i>/i',$this->_infoFiles,$match);       
        $type = trim($match[1]);
        return $this->getBookTypeId($type);

    }

    public static function siteUrl()
    {
        return self::$siteUrl;
    }

    public static function siteName()
    {
        return self::$siteName;
    }
}