<?php

/*
 * Copyright (C) xgcms.com
 */

!defined('FRAMEWORK_PATH') && exit('FRAMEWORK_PATH not defined.');

include APP_PATH.'control/caiji_common_control.class.php';

class run_control extends caiji_common_control {
	
	function __construct(&$conf) {
		parent::__construct($conf);
		$this->_checked['caiji'] = 'active';
	}
	
	public function on_go(){
		
		$a=core::gpc('a','G');
		if(!in_array($a,array('colurl','colcontent','coltest'))) $this->message('未知操作！');
		$nid=intval(core::gpc('nid','G'));
		
		$page_start=core::gpc('page_start','G');
		$page_end=core::gpc('page_end','G');
		$xc=intval(core::gpc('xc','G'));
		$jg=intval(core::gpc('jg','G'));
		
		$nowpage=intval(core::gpc('nowpage','G'));
		empty($nowpage) && $nowpage=$page_start;
		
		$node=$this->kv->get('node_'.$nid);
		$this->caiji_node->format($node);
		
		$url="?run-$a-nid-$nid-page_start-$page_start-page_end-$page_end-xc-$xc-jg-$jg-nowpage-$nowpage.htm";
		
		$a=$a=='colurl'?'采集网址':($a=='colcontent'?'采集内容':'采集测试');
		
		$this->_title[]='【'.$node['name'].'】 '.$a;
		
		$this->view->assign('url',$url);
		$this->view->assign('node',$node);
		$this->view->assign('a',$a);
		$this->view->display('run_collect.htm');

	}
	public function on_colurl(){

		$nid=intval(core::gpc('nid','G'));
		
		$page_start=core::gpc('page_start','G');
		$page_end=core::gpc('page_end','G');
		$xc=intval(core::gpc('xc','G'));
		$jg=intval(core::gpc('jg','G'));
		
		$nowpage=intval(core::gpc('nowpage','G'));

		$nowpage<1 && $nowpage=$page_start;
		
		

        $num = core::gpc('num','G');
        $err = core::gpc('err','G');
		
        empty($num) && $num = 0;
        empty($err) && $err = 0;
		
        $conf=$this->kv->get('node_'.$nid);
		
		
		if($conf['is_content_page']==1){
			$this->colurl_type2($page_start,$page_end,$conf);
		}

		if($nowpage>$page_end||$conf['is_content_page']==1){
			$next ='?run-go-a-colcontent-nid-' . $nid . '-page-1-xc-' . $xc . '-jg-'.$jg.'.htm';
			echo '网址采集完成，即将采集内容...';
			echo '<script language="JavaScript">
				  setTimeout(function() {
						if (window.top!=window.self){parent.location.href="'.$next.'";}else{window.location.href="'.$next.'";}
				  }, 3000);
				  </script>';
			exit;
			//$this->message('网址采集完成',1,'?index-index-gid-'.$conf['gid'].'.htm');
		}
		$conf['pagesize_start']=$nowpage;

		$urls_list=$this->caiji_collect->url_list($conf,$nowpage+30);

		if($xc>1){
			$curl=$urls=$newurls=array();
			$end_page=$xc+$nowpage;
			$end_page >=$page_end && $end_page=$page_end;
			for($i=$nowpage;$i<=$end_page;$i=$i+$conf['par_num']){
				$url=$curl[$i]=$urls_list[$i];
			}
            $nowpage=$nowpage+$conf['par_num']*$xc;	
			$nowpage=min($nowpage,$page_end);
			
			$htmls=misc::multi_fetch_url($curl);

			foreach((array)$htmls as $html){
				$html=$this->caiji_collect->get_charset($html,$conf);
				$urls[]=$this->caiji_collect->get_url_lists($url,$html,$conf);
				
			}
			foreach($urls as $uv){
				foreach((array)$uv as $suv){
					$newurls[]=$suv;
				}
				
			}
			$urls=$newurls;
		}else{
			$url=$urls_list[$nowpage];//str_replace('[page]',$nowpage,$conf['urlpage']);
			//$html=misc::fetch_url($url);
			$html=$this->caiji_collect->geturlcont($url);
			$html=$this->caiji_collect->get_charset($html,$conf);
			$urls=$this->caiji_collect->get_url_lists($url,$html,$conf);
		}
		
		$current=($nowpage-$page_start)/$conf['par_num'];
		$current=max(1,$current);
		$total=($page_end-$page_start+1)/$conf['par_num'];
		
		$percent=round($current/$total*100);
		$percent=min(100,$percent);
		$percent.='%';
		$this->_title[]='【'.$conf['name'].'】 采集网址 '.$percent;
		$this->view->assign('current',$current);
		$this->view->assign('total',$total);
		$this->view->assign('percent',$percent);
		$this->view->assign('node',$conf);
		$this->view->display('run_progress.htm');
		
		echo $msg = "正在采集:<span style=\"color:#0000FF;\">" . $url.'</span><br />' ;
		
		foreach ((array)$urls as $k => $v) {
/*                if (empty($v['title'])) {
                    unset($urls[$k]);
                    continue;
                }*/

				
                $md5url = md5($v['url']);
                $arr = array('md5' => $md5url, 'nid' => $nid);
				
                $where=array('md5'=>$md5url,'nid'=>$nid);
				$rs=$this->caiji_urls->get_one($where);

                if ($rs) {
                    echo $v['url'] . '<font color=red>网址重复</font><br />';
                } else {
					if(isset($v['aid'])){
						$where=array('aid'=>$v['aid']);
						
						$rs=$this->caiji_content->get_list($where);
						
						if($rs){
							$continue=0;
							foreach($rs as $av){
							   if($av['nid']==$nid){
								   echo $v['url'] . '<font color=red>文章已存在</font><br />';
								   $continue=1;
								   break;
							   }
							}
							if($continue)  continue;
						}
					}
                    $arr2 = array(
                        'aid' => isset($v['aid'])?$v['aid']:0,
                        'url' => $v['url'],
                        'img' => $v['img'],
                        'title' => htmlspecialchars($v['title']),
                        'nid' => $nid,
                        'status' => 0,
                        'addtime' =>$_SERVER['time']);

                    $contentid=$this->caiji_content->create($arr2);
                    $urlid=$this->caiji_urls->create($arr);
						
                    $img = $v['img'];
                    if (!empty($img)) {
						
                        $iid = $this->caiji_images->get_one(array('md5'=>md5($img)));
                        if (empty($iid)) {
                            $imgarr = array(
                                'cid' => 0,
                                'tid' => $contentid,
                                'nid'=>$nid,
                                'image' => $img,
                                'create_time' => $_SERVER['time'],
                                'isthumb' => 1,
                                'md5' => md5($img),
                                );
                            $iid = $this->caiji_images->create($imgarr);
                        }
                    }
					
                    echo $v['url'] . '<font color=green>成功采集</font><br />';
                }
            }
			
		$nowpage=$nowpage+$conf['par_num'];
		
		$newconf=$this->caiji_node->read($nid);
		$newconf['lastdate']=$_SERVER['time'];
		$newconf['pagesize_start']=$nowpage;
		$newconf['pagesize_end']=$page_end;

		$this->caiji_node->update($newconf);
		$this->cache_update_node($nid);
		
		$next="?run-colurl-nid-$nid-page_start-$page_start-page_end-$page_end-xc-$xc-jg-$jg-nowpage-$nowpage.htm";
		$msg = "正在采集" . $url ;

        echo '<html>
				  <head>
				  <meta http-equiv="Content-Language" content="zh-CN">
				  <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
				  <meta http-equiv="refresh" content="'.$jg.';url=' . $next . '">
				  <title>' . $msg . '</title>
				  </head>
				  <body>
				  <a href="' . $next . '">点击采集下一页</a>
				 <script>window.parent.document.getElementById("current_page").value="'.$msg.'";</script>;
				  </body>
				  </html>';
        exit;
    }
	private function colurl_type2($page_start,$page_end,$conf){
            $nid=$conf['nid'];
            for ($i = $page_start; $i <= $page_end; $i=$i+$conf['par_num']) {

                $url = str_replace('[page]', $i, $conf['urlpage']);
                $md5url = md5($url);
                $arr = array('md5' => $md5url, 'nid' => $nid);
                $where=array('md5'=>array('LIKE'=>$md5url),'nid'=>$nid);
				$rs=$this->caiji_urls->get_one($where);
                if ($rs) {
                    echo $url . '<font color=red>网址重复</font><br />';
                } else {
                    $arr2 = array(
                        'aid' => 0,
                        'url' => $url,
                        'img' => 0,
                        'title' => 0,
                        'nid' => $nid,
                        'status' => 0,
                        'addtime' =>$_SERVER['time']);


                    $contentid=$this->caiji_content->create($arr2);
                    $urlid=$this->caiji_urls->create($arr);

                    echo $url . '<font color=green>成功生成网址</font><br />';
					
                }

            }
	}
	
	//采集内容
	public function on_colcontent(){
		$nid = intval(core::gpc('nid','G'));
        $page = misc::page();
        $count=intval(core::gpc('count','G'));
		$num = intval(core::gpc('num','G'));
		
		
        $conf = $this->kv->get('node_'.$nid);

        $c = $this->caiji_content;

        $map = array();
        $map['nid'] = $nid;
        $map['status'] = 0;

        $pagesize = misc::page('xc');
		$jg = misc::page('jg');;
        
		
        empty($count) && $count=$c->get_count($map);

		
		
        $list = $c->get_list($map,0,$pagesize);
        
		$current=$num;
		$current=max(1,$current);
        if (empty($list)||$current>$count) {
            
            //$conurl = '?content-index-nid-'.$conf['nid'].'.htm';
			$conurl = '?run-colcontent_spage-nid-'.$conf['nid'].'-jg-'.$jg.'-xc-'.$pagesize.'.htm';//采集分页
			echo '<html>
				  <head>
				  <meta http-equiv="Content-Language" content="zh-CN">
				  <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
				  <meta http-equiv="refresh" content="'.$jg.';url=' . $conurl . '">
				  <title>内容采集完成,即将采集分页...</title>
				  </head>
				  <body>
				  <a href="' . $conurl . '">内容采集完成,即将采集分页...</a>
				  </body>
				  </html>';
        exit;
			
        }
        //$current=$page*$pagesize;
		
		
		$percent=round($current/$count*100);
		$percent=min(100,$percent);
		$percent.='%';
		
		$this->_title[]='【'.$conf['name'].'】 采集内容 '.$percent;
		$this->view->assign('current',$current);
		$this->view->assign('total',$count);
		$this->view->assign('percent',$percent);
		$this->view->assign('node',$conf);
		$this->view->display('run_progress.htm');
		
		
		
        foreach ($list as $v) {
			if(empty($v['url'])) {
				$v['status']=4;
				$this->caiji_content->update($v);
				continue;
				
			}
			
            //$html=misc::fetch_url($v['url']);
			$html='';
			try{	
			   $html=$this->caiji_collect->geturlcont($v['url']);	
			}catch(Exception $e){
				continue;
			}
			if(empty($html)) continue;
			
			$html=$this->caiji_collect->get_charset($html,$conf);
			$check=$this->caiji_collect->check_content($html,$conf);
			
			if(empty($check)){
				$v['status']=4;
				$this->caiji_content->update($v);
				echo $v['url'].' <font color="red">采集失败</font> <br />';
				continue;
				
			}
			
            $html =$this->caiji_collect->get_content($html, $conf,$v['url']);
			
			
			     
			$data = array ();
            $data ['id']=$v['id'];
			$data ['nid'] = $conf ['nid'];
			$data ['title'] = htmlspecialchars($html ['title']);
			$data ['addtime'] = $_SERVER['time'];
			$data ['status'] = 1;
			//$data ['content'] = $html['content'];
			//$data ['reply'] = $html['reply'];
			$data ['data'] = '';
			
			//$data ['content_pages'] =$html ['content_pages'];
            

			foreach ( $html as $k => $v2 ) {
				if($k=='title') continue;
				
				if(in_array($k,array('content_pages','content','reply'))){
					
					if($k!='content_pages'){
						$tid=$v['id'];
						$turl=$v['url'];
						$v2 = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $v2);
					}
					
					$data[$k]=$v2;
					
					continue;
				}
				$data ['data'] .= $k . '[field]' . $v2 . '[_xgcms_]';
			}
			
			$tid=$v['id'];
			$turl=$v['url'];
			$data ['data'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $data ['data']);
						
			
			$data=array_merge($v,$data);
			
			
			//采集分页
            if(!empty($data['content_pages'])){
				$pagesurl=explode('[|]',$data['content_pages']);
				foreach($pagesurl as $k=>$pu){
					
					$arr2 = array(
					    'content_id'=>$v['id'],
                        'aid' => isset($v['aid'])?$v['aid']:0,
                        'url' => $pu,
                        'img' => '',
                        'title' => htmlspecialchars($v['title']),
                        'nid' => $v['nid'],
                        'status' => 0,
						'page'=>$k+1,
                        'addtime' =>$_SERVER['time']);
						$this->caiji_content_pages->create($arr2);
				}
				$data ['status'] = 5;//待采分页
				$data['totalpages']=count($pagesurl);//总页数
               /* $next='?run-colcontent_spage-nid-'.$nid.'-tid-'.$v['id'].'-xc-'.$pagesize.'-jg-'.$jg.'-num-'.$num.'.htm';
                echo '<html>
					  <head>
					  <meta http-equiv="Content-Language" content="zh-CN">
					  <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
					  <meta http-equiv="refresh" content="'.$jg.';url=' . $next . '">
					  <title>采集分页</title>
					  </head>
					  <body>
					  <a href="' . $next . '">点击采集下一页</a>
					  </body>
					  </html>';
					 exit;*/
                
            }else{
				$data ['status'] = 1;
			}
            $c->update($data);
            $num++;
            echo $v['url'] . "<br /><span style=\"color:green\">成功采集《" . htmlspecialchars($html['title']) . "》</span><br />";
           // print_r($html);
        }
		$newconf=$this->caiji_node->read($nid);
		$newconf['lastdate']=$_SERVER['time'];
		$this->caiji_node->update($newconf);
		$this->cache_update_node($nid);
		
        $next ='?run-colcontent-nid-' . $nid . '-page-' . ($page + 1) . '-xc-' . $pagesize . '-jg-'.$jg.'-count-'.$count.'-num-'.$num.'.htm';
		
        $msg = "正在采集第" . $page . "页";

        
		
        //echo "<script>window.location.href=.$next.<script>";
        echo '<html>
			  <head>
			  <meta http-equiv="Content-Language" content="zh-CN">
			  <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
			  <meta http-equiv="refresh" content="'.$jg.';url=' . $next . '">
			  <title>' . $msg . '</title>
			  </head>
			  <body>
			  <a href="' . $next . '">点击采集下一页</a>
			  </body>
			  </html>';
        exit;
		
		
	}
	public function on_colcontent_spage(){
        $nid = intval(core::gpc('nid','G'));
        //$tid = intval(core::gpc('tid','G'));
        $num = intval(core::gpc('num','G'));
		$page = misc::page(); 
        
        $size =misc::page('xc');
		$jg=misc::page('jg');
        $conf = $this->kv->get('node_'.$nid);
		
		$map=array('nid'=>$nid,'status'=>0);
		
        $list = $this->caiji_content_pages->get_list($map,0,$size);

		if(empty($list)) {
			$conurl = '?content-index-nid-'.$conf['nid'].'.htm';
			echo '内容分页采集完成...<script language="JavaScript">
			setTimeout(function() {
		 			if (window.top!=window.self){parent.location.href="'. $conurl.'";}else{window.location.href="'.$conurl.'";}
}, 3000);
</script>';exit;
		}
		foreach($list as $v){
			$url=$v['url'];
			$rs=$this->caiji_content->read($v['content_id']);
			if(!$rs){
			   $this->caiji_content_pages->delete($v['id']);	
				continue;
			}
			$html='';
			try{	
			   $html=$this->caiji_collect->geturlcont($url);	
			}catch(Exception $e){
				continue;
			}
			
			if(empty($html)) continue;
			
			//$html2 = misc::fetch_url($url2);//collection::get_content ( $url2, $conf );
			$html=$this->caiji_collect->get_charset($html,$conf);
			
			$check=$this->caiji_collect->check_content($html,$conf);
			
			if(empty($check)){
				echo $url.' <font color="red">采集失败</font> <br />';
				$this->caiji_content_pages->delete($v['id']);
				$rs['totalpages']--;
				$this->caiji_content->update($rs);
				continue;
			}

			$html=$this->caiji_collect->get_content($html,$conf,$url);
			
			$tid=$v['content_id'];
			$turl=$url;
			$html['content'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $html['content']);

			$rs ['content'].='[_page_]' . $html['content'];
			
			if(isset($html['reply'])){
				
				$html['reply'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $html['reply']);
				
				$rs ['reply'].='[_page_]' . $html['reply'];
			}
			$rs['totalpages']--;
			if($rs['totalpages']<1)	$rs['status']=1;   
			$this->caiji_content->update($rs);
			$this->caiji_content_pages->delete($v['id']);
			
			echo $url . "成功采集《" . $html['title'] . "》第".$v['page']."页<br />";
		}
	 $page++;
	 $next='?run-colcontent_spage-nid-'.$nid.'-page-'.$page.'-xc-'.$size.'-jg-'.$jg.'-num-'.$num.'.htm';
	 
     $msg = "正在采集第" . $page . "页";

		$newconf=$this->caiji_node->read($nid);
		$newconf['lastdate']=$_SERVER['time'];
		$this->caiji_node->update($newconf);
		$this->cache_update_node($nid);
		
        //echo "<script>window.location.href=.$next.<script>";
        echo '<html>
<head>
<meta http-equiv="Content-Language" content="zh-CN">
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
<meta http-equiv="refresh" content="'.$jg.';url=' . $next . '">
<title>' . $msg . '</title>
</head>
<body>
<a href="' . $next . '">点击采集下一页</a>
</body>
</html>';
        
    }
	public function on_coltest(){
		$nid=intval(core::gpc('nid','G'));
		$conf=$this->kv->get('node_'.$nid);

		$urls_list=$this->caiji_collect->url_list($conf,$conf['pagesize_start']+5);
		foreach($urls_list as $url){
			//$html = misc::fetch_url($url);
			$html=$this->caiji_collect->geturlcont($url);
			$html=$this->caiji_collect->get_charset($html,$conf);
			$url2=$url;
			if(!empty($html)) break;
			
		}
		$urls=$this->caiji_collect->get_url_lists($url2,$html,$conf);
		if(empty($urls)) $this->message('没有采集到网址列表，请检查规则是否正确！');
		echo '<font style="color:red;font-weight:bold">采集到的网址：</font><br />';

		foreach($urls as $v){
			$url=$v['url'];
			echo $v['url'].'《'.htmlspecialchars($v['title'])."》<br />";
		}
		
		//$html = misc::fetch_url($url);
		$html=$this->caiji_collect->geturlcont($url);
		$html=$this->caiji_collect->get_charset($html,$conf);
		$html=$this->caiji_collect->get_content($html,$conf,$url);
		echo '<br /><font style="color:red;font-weight:bold">文章采集测试:</font>'.$url."<br /><br />";
		 if(empty($html)) die('没有采集到文章内容，请检查规则是否正确！');
		 echo '<textarea style="width:100%;height:500px;">';
		 print_r($html);
/*		foreach($html as $hk=>$hv){
			echo $hk.'=>'.  htmlspecialchars($hv)."<br />";
		}*/
		echo '</textarea>';
		
		
		
	}
	
	public function local_img($old, $img,$nid,$tid,$url,$config,$thumb=0) {
		
		if(!empty($old) && !empty($img)){
                        $arr=self::download_img($old, $img,$url,$config);
						$old=$arr['old'];
						$img=$arr['img'];
						
				        $iid = $this->caiji_images->get_one(array('md5'=>md5($img)));
                        if (empty($iid)) {
                            $imgarr = array(
                                'nid' => $nid,
                                'tid' => $tid,
                                'nid'=>$nid,
                                'image' => $img,
                                'create_time' => $_SERVER['time'],
                                'isthumb' => $thumb,
                                'md5' => md5($img),
                                );
                            $iid = $this->caiji_images->create($imgarr);
                        }
		}
		return $old;
	}
	/**
	 * 转换图片地址为绝对路径，为下载做准备。
	 * @param array $out 图片地址
	 */
	protected static function download_img($old, $out,$url,$config) {
		$arr['old']=$old;
		$arr['img']=$out;
		
		if (!empty($old) && !empty($out) && strpos($out, '://') === false) {
			$arr['img']=self::url_check($out, $url, $config);
			$arr['old']=str_replace($out, $arr['img'], $old);
			return $arr;
		} else {
			return $arr;
		}
	}
	protected static function url_check($url, $baseurl, $config) {
		$urlinfo = parse_url($baseurl);
		
		$baseurl = $urlinfo['scheme'].'://'.$urlinfo['host'].(substr($urlinfo['path'], -1, 1) === '/' ? substr($urlinfo['path'], 0, -1) : str_replace('\\', '/', dirname($urlinfo['path']))).'/';
		if (strpos($url, '://') === false) {
			if ($url[0] == '/') {
				$url = $urlinfo['scheme'].'://'.$urlinfo['host'].$url;
			} else {
				if ($config['page_base']) {
					$url = $config['page_base'].$url;
				} else {
					$url = $baseurl.$url;
				}
			}
		}
		return $url;
	}
	
}


?>