<?php
namespace collect\tools;

/**
 * 用来重新抓取一本书的章节，将保留原来章节的地址，多删少增
 */
require_once dirname(dirname(__FILE__)) . '/init.php';
use collect;

$bookId = $argv[1];
$targetSite = $argv[2] ? $argv[2] : 'Erwuba';

if (empty($bookId))
{
    echo "用法：php tools/ReCollectBook.php 小说ID 目标站点，其中目标站点为选\n";
    exit(0);
}

$book = getBookInfo($bookId);
if (empty($book))
{
    echo "小说不存在\n";
}

// 去目标站点搜书
$remoteUrl = \collect\Spider::search($targetSite, $book['bookname'], $book['author']);
if (false === $remoteUrl)
{
    echo "站点:".$targetSite." 搜不到小说《{$book['bookname']}》\n";
    exit(0);
}

$spiderObj = \collect\Spider::factory($remoteUrl, $targetSite);
$spiderObj->getInfoFile();
$remoteChapterList = $spiderObj->clearVolume($spiderObj->getChapterList());
$remoteChapters = $remoteChapterList['chapters'];
$remoteChapterCount = count($remoteChapters);

$chapters = $db->select(array('chapterid'))->from(\Chapter::table())->where('bookid='.$bookId)->asc('sort')->find();
$chapterCount = count($chapters);
$newList = array();

//print_r($remoteChapters);die;
// 大于0的先覆盖

if (!$remoteChapterCount)
{
    echo "没有获取到远程章节，退出\n";
    exit();
}

for ($i = 0; $i < $remoteChapterCount; $i++)
{
    $data = array();
    $remoteChapter = $remoteChapters[$i];
    $remoteChapterName = $remoteChapter['title'];
    $remoteChapterUrl = $spiderObj->getChapterContentUrl($remoteChapter['url']);
    
    if (isset($chapters[$i]))
    {
        // 章节存在则开始覆盖
        $chapter = $chapters[$i];
        $lastChapterId = $chapter['chapterid'];
        
        echo '覆盖 ' . $remoteChapterName . '...';
        $x = 0;
        do
        {
           $chapterContent = $spiderObj->getChapterContent($remoteChapterUrl);
           $x++;
        } while (empty($chapterContent) && $x < 3);
        $content = $spiderObj->filterChapterContent($chapterContent);
        
        // 遇到图片章节的内容，直接返回，目前不抓图片，以后再说
        if ($spiderObj->isImage($content))
        {
            echo "发现图片章节，跳过\n";
            continue;
        }

        if (!$spiderObj->isCanSave($content))
        {
            echo "内容不符合，跳过\n";
            continue;
        }
        
        $data['chaptername'] = $spiderObj->analyzeChapterName($remoteChapterName);
        $data['content'] = $content;
        $data['sort'] = $i + 1;
        \Chapter::single()->update($lastChapterId, $bookId, $data);
        echo "完成\n";
    }
    else
    {
        // 添加新的
        $x = 0;
        do
        {
           $chapterContent = $spiderObj->getChapterContent($remoteChapterUrl);
           $x++;
        } while (empty($chapterContent) && $x < 3);
        $content = $spiderObj->filterChapterContent($chapterContent);
        
        // 遇到图片章节的内容，直接返回，目前不抓图片，以后再说
        if ($spiderObj->isImage($content))
        {
            echo "发现图片章节，跳过\n";
            exit(0);
        }

        if (!$spiderObj->isCanSave($content))
        {
            echo "内容不符合，跳过\n";
            exit(0);
        }
        
        $data['chaptername'] = $spiderObj->analyzeChapterName($remoteChapterName);
        $data['content'] = $content;
        \Chapter::single()->create($bookId, $data);
    }
}

if ($remoteChapterCount < $chapterCount)
{
    // 章节有多，需要删除多余的
    echo '    有多余章节，开始删除...';
    $sql = 'SELECT chapterid FROM '.\Chapter::table().' WHERE bookid = '.$bookId.' AND sort > '.$i;
    $deleteChapterList = $db->setQueryString($sql)->find();
    if (!empty($deleteChapterList))
    {
        foreach ($deleteChapterList as $delete)
        {
            \Chapter::single()->delete($delete['chapterid'], $bookId);
        }
    }
    echo "完成\n";
}
