<?php

/*
 * Copyright (C) xgcms.com
 */

!defined('FRAMEWORK_PATH') && exit('FRAMEWORK_PATH not defined.');

//include POST_PATH.'core/control/common_control.class.php';
include_once APP_PATH.'lib/string.func.php';
ignore_user_abort(true); 
set_time_limit(0);
class cron_control extends  base_control {
	
	function __construct(&$conf) {
		parent::__construct($conf);
		//$this->_checked['caiji'] = 'active';
		date_default_timezone_set('Asia/Shanghai');
		
		$_SERVER['time_fmt'] = date('Y-n-d H:i', $_SERVER['time']);			// +8 hours
		$arr = explode(' ', $_SERVER['time_fmt']);
		list($y, $n, $d) = explode('-', $arr[0]);
		$_SERVER['time_today'] = mktime(0, 0, 0, $n, $d, $y);	// -8 hours
	}
	public function on_index(){

		$id=intval(core::gpc('id'));

		$task=$this->caiji_auto->read($id);
		!$task && exit;
		if($_SERVER['time']-$task['lasttime']<$task['septime']) exit;
		
		for($i=1;$i<=3;$i++){
		
		$task_run=$this->runtime->get('caiji_task_'.$id);
		
		if(!$task_run){
			$groups=core::json_decode($task['groups']);
			$node_list=$nids=array();
			foreach((array)$groups as $g){
				$node_list+=$this->caiji_node->get_list(array('gid'=>$g));
			}
			foreach($node_list as $n){
				$nids[$n['nid']]=array('nid'=>$n['nid'],'time'=>0,'done'=>0);
			}
			$task_run=array('id'=>$id,'groups'=>$groups,'nids'=>$nids);
		}
		$nid=$all_done=0;
		foreach($task_run['nids'] as &$v){
			if($v['done']) $all_done++;
			if($nid) continue;
			if(!$v['done'] && (!$v['time'] || ($_SERVER['time']-$v['time'])>35)){
				$nid=$v['nid'];
				$v['time']=$_SERVER['time'];
			}
		}

		$this->runtime->set('caiji_task_'.$id,$task_run);
		$logarr=(array)core::json_decode($task['runlog']);
		
		if(!$nid){
			if($all_done==count($task_run['nids'])){
				$task['lasttime']=$_SERVER['time'];
				$this->caiji_auto->update($task);
				$this->runtime->delete('caiji_task_'.$id);
			}
			exit('no nid');
		}
		$rs=$this->col_run($nid,$task_run);
		$l='规则ID：'.$nid;
		if($rs==2){
			$l.=' 采集完成';
		}elseif($rs=='url'){
			$l.=' 采集网址';
		}elseif($rs=='content'){
			$l.=' 采集内容';
		}elseif($rs=='content_pages'){
		    $l.=' 采集内容分页';
		}elseif($rs=='post'){
		   $l.=' 发布内容';	
		}else{
		   $l.=' running';
		}
		$t=date('m-d H:i:s',$_SERVER['time']);
		$logarr[]=array('time'=>$_SERVER['time'],'time_fmt'=>$t,'log'=>$l);
		misc::arrlist_multisort($logarr, 'time', 0);
		$logarr=array_slice($logarr,0,100);
        $task['runlog']=core::json_encode($logarr);
		$this->caiji_auto->update($task);
		
		sleep(10);
		}
	}
	
	public function col_run($nid,$task_run){

		$conf=$this->kv->get('node_'.$nid);
		
		!$conf && $conf=$this->caiji_node->cache_update_node($nid);

		$cron=$this->runtime->get('node_cron_'.$nid);
		
		if(!$cron){
			if(!$conf) return 0;
			$cron=array('nid'=>$nid,'status_url'=>0,'status_content'=>0,'status_content_pages'=>0,'status_post'=>0,'page_start'=>$conf['pagesize_start'],'page_end'=>$conf['pagesize_end'],'nowpage'=>$conf['pagesize_start'],'lock'=>1);
			$this->runtime->set('node_cron_'.$nid,$cron);
		}elseif($cron['lock']){
			return 1;
		}
		
		if(!$cron['status_url']){
			$rs=$this->col_urls($cron,$conf);
			if($rs=='ok'){
				$cron['status_url']=1;
				$cron['nowpage']=1;
			}else{
				$rs=$this->col_urls($cron,$conf);
				if($rs=='ok'){
					$cron['status_url']=1;
					$cron['nowpage']=1;
				}else{
				    $cron['nowpage']=$rs;
				}
			}
			$r='url';
			//$this->runtime->set('node_cron_'.$nid,$cron);
		}elseif(!$cron['status_content']){
			$rs=$this->col_content($cron,$conf);
			if($rs=='ok'){
				$cron['status_content']=1;
				$cron['nowpage']=1;
			}else{
				$cron['nowpage']=$rs;
			}
			$r='content';
			//$this->runtime->set('node_cron_'.$nid,$cron);
		}elseif(!$cron['status_content_pages']){
			$rs=$this->col_content_pages($cron,$conf);
			if($rs=='ok'){
				$cron['status_content_pages']=1;
				$cron['nowpage']=1;
			}else{
				$cron['nowpage']=$rs;
			}
			$r='content_pages';
			//$this->runtime->set('node_cron_'.$nid,$cron);
		}elseif(!$cron['status_post']){
			$rs=$this->post($cron,$conf);
			if($rs=='ok'){
				$this->runtime->delete('node_cron_'.$nid);
				$newconf=$this->caiji_node->read($nid);
				$newconf['lastdate']=$_SERVER['time'];
                $this->caiji_node->update($newconf);
		        $this->caiji_node->cache_update_node($nid);
				
				$task_run['nids'][$nid]['done']=1;
				$this->runtime->set('caiji_task_'.$task_run['id'],$task_run);
				return 2;
			}else{
				$cron['nowpage']=$rs;
				//$this->runtime->set('node_cron_'.$nid,$cron);
			}
			$r='post';
		}
		$cron['lock']=0;
		$this->runtime->set('node_cron_'.$nid,$cron);
		return $r;
	}
	public function col_urls($cron,$conf){
		$nid=$cron['nid'];
		$page_start=$cron['page_start'];
		$page_end=$cron['page_end'];
		$nowpage=$cron['nowpage'];

		$nowpage<1 && $nowpage=$page_start;
		
		if($conf['is_content_page']==1){
			$this->colurl_type2($page_start,$page_end,$conf);
		}

		if($nowpage>$page_end||$conf['is_content_page']==1){//网址采集完成
			return 'ok';
		}
		
		$conf['pagesize_start']=$nowpage;

		$urls_list=$this->caiji_collect->url_list($conf,$nowpage+30);

		$url=$urls_list[$nowpage];//str_replace('[page]',$nowpage,$conf['urlpage']);
		//$html=misc::fetch_url($url);
		$html=$this->caiji_collect->geturlcont($url);
		$html=$this->caiji_collect->get_charset($html,$conf);
		$urls=$this->caiji_collect->get_url_lists($url,$html,$conf);

		
		$current=($nowpage-$page_start)/$conf['par_num'];
		$current=max(1,$current);
		$total=($page_end-$page_start)/$conf['par_num'];
		
		foreach ((array)$urls as $k => $v) {
/*                if (empty($v['title'])) {
                    unset($urls[$k]);
                    continue;
                }*/

				
                $md5url = md5($v['url']);
                $arr = array('md5' => $md5url, 'nid' => $nid);
				
                $where=array('md5'=>$md5url,'nid'=>$nid);
				$rs=$this->caiji_urls->get_one($where);

                if ($rs) {
                    continue;
                } else {
					if(isset($v['aid'])){
						$where=array('aid'=>$v['aid']);
						
						$rs=$this->caiji_content->get_list($where);
						
						if($rs){
							$continue=0;
							foreach($rs as $av){
							   if($av['nid']==$nid){
								   //echo $v['url'] . '<font color=red>文章已存在</font><br />';
								   $continue=1;
								   break;
							   }
							}
							if($continue)  continue;
						}
					}
                    $arr2 = array(
                        'aid' => isset($v['aid'])?$v['aid']:0,
                        'url' => $v['url'],
                        'img' => $v['img'],
                        'title' => htmlspecialchars($v['title']),
                        'nid' => $nid,
                        'status' => 0,
                        'addtime' =>$_SERVER['time']);

                    $contentid=$this->caiji_content->create($arr2);
                    $urlid=$this->caiji_urls->create($arr);
						
                    $img = $v['img'];
                    if (!empty($img)) {
						
                        $iid = $this->caiji_images->get_one(array('md5'=>md5($img)));
                        if (empty($iid)) {
                            $imgarr = array(
                                'cid' => 0,
                                'tid' => $contentid,
                                'nid'=>$nid,
                                'image' => $img,
                                'create_time' => $_SERVER['time'],
                                'isthumb' => 1,
                                'md5' => md5($img),
                                );
                            $iid = $this->caiji_images->create($imgarr);
                        }
                    }
                }
            }
		return $nowpage=$nowpage+$conf['par_num'];	
	}
	private function colurl_type2($page_start,$page_end,$conf){
            $nid=$conf['nid'];
            for ($i = $page_start; $i <= $page_end; $i=$i+$conf['par_num']) {

                $url = str_replace('[page]', $i, $conf['urlpage']);
                $md5url = md5($url);
                $arr = array('md5' => $md5url, 'nid' => $nid);
                $where=array('md5'=>array('LIKE'=>$md5url),'nid'=>$nid);
				$rs=$this->caiji_urls->get_one($where);
                if ($rs) {
					
                    //echo $url . '<font color=red>网址重复</font><br />';
                } else {
                    $arr2 = array(
                        'aid' => 0,
                        'url' => $url,
                        'img' => 0,
                        'title' => 0,
                        'nid' => $nid,
                        'status' => 0,
                        'addtime' =>$_SERVER['time']);


                    $contentid=$this->caiji_content->create($arr2);
                    $urlid=$this->caiji_urls->create($arr);

                    //echo $url . '<font color=green>成功生成网址</font><br />';
					
                }

            }
	}
	public function col_content($cron,$conf){
		$nid =$cron['nid'];
        $page = $cron['nowpage'];
		
        $c = $this->caiji_content;

        $map = array();
        $map['nid'] = $nid;
        $map['status'] = 0;

        $pagesize = 5;
		
		
        $list = $c->get_list($map,0,$pagesize);
        
		
        if (empty($list)) {
            return 'ok';
        }
		
        foreach ($list as $v) {
			if(empty($v['url'])) {
				$v['status']=4;
				$this->caiji_content->update($v);
				continue;
				
			}
			
            //$html=misc::fetch_url($v['url']);
			$html='';
			try{	
			   $html=$this->caiji_collect->geturlcont($v['url']);	
			}catch(Exception $e){
				continue;
			}
			if(empty($html)) continue;
			
			$html=$this->caiji_collect->get_charset($html,$conf);
			$check=$this->caiji_collect->check_content($html,$conf);
			
			if(empty($check)){
				$v['status']=4;
				$this->caiji_content->update($v);
				continue;
				
			}
			
            $html =$this->caiji_collect->get_content($html, $conf,$v['url']);
			
			
			     
			$data = array ();
            $data ['id']=$v['id'];
			$data ['nid'] = $conf ['nid'];
			$data ['title'] = htmlspecialchars($html ['title']);
			$data ['addtime'] = $_SERVER['time'];
			$data ['status'] = 1;
			//$data ['content'] = $html['content'];
			//$data ['reply'] = $html['reply'];
			$data ['data'] = '';
			
			$data ['content_pages'] =$html ['content_pages'];
			
			if(!empty($data['content_pages'])){
				$pagesurl=explode('[|]',$data['content_pages']);
				foreach($pagesurl as $k=>$pu){
					$arr2 = array(
					    'content_id'=>$v['id'],
                        'aid' => isset($v['aid'])?$v['aid']:0,
                        'url' => $pu,
                        'img' => '',
                        'title' => htmlspecialchars($v['title']),
                        'nid' => $v['nid'],
                        'status' => 0,
						'page'=>$k+1,
                        'addtime' =>$_SERVER['time']);
						$this->caiji_content_pages->create($arr2);
				}
				$data ['status'] = 5;//待采分页
				$data['totalpages']=count($pagesurl);//总页数   
            }else{
				$data ['status'] = 1;
			}
			
			foreach ( $html as $k => $v2 ) {
				if($k=='title') continue;
				if(in_array($k,array('content_pages','content','reply'))){
					
					if($k!='content_pages'){
						$tid=$v['id'];
						$turl=$v['url'];
						$v2 = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $v2);
					}
					
					$data[$k]=$v2;
					
					continue;
				}
				$data ['data'] .= $k . '[field]' . $v2 . '[_xgcms_]';
			}
			
			$tid=$v['id'];
			$turl=$v['url'];
			$data ['data'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $data ['data']);
						
			
			$data=array_merge($v,$data);
			if($data['status']==1){$data=$this->post_content($data,$conf);}
			$c->update($data);
		}
		return ($page + 1);
	}
	public function col_content_pages($cron,$conf){
		$nid =$cron['nid'];
        $page = $cron['nowpage'];
		
        $c = $this->caiji_content_pages;

        $map = array();
        $map['nid'] = $nid;
        $map['status'] = 0;

        $pagesize = 5;
		
		
        $list = $c->get_list($map,0,$pagesize);
        
		
        if (empty($list)) {
            return 'ok';
        }
		
        foreach ($list as $v) {
			$rs=$this->caiji_content->read($v['content_id']);
			if(!$rs) {
				$c->delete($v['id']);
				continue;
			}
			$html='';
			try{	
			   $html=$this->caiji_collect->geturlcont($v['url']);	
			}catch(Exception $e){
				continue;
			}
			if(empty($html)) continue;
			
			$html=$this->caiji_collect->get_charset($html,$conf);
			$check=$this->caiji_collect->check_content($html,$conf);
			
			if(empty($check)){
				$rs['totalpages']--;
				$this->caiji_content->update($rs);
				$c->delete($v['id']);
				continue;
				
			}
			
            $html =$this->caiji_collect->get_content($html, $conf,$v['url']);
			$tid=$v['content_id'];
			$turl=$v['url'];
			$html['content'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $html['content']);

			$rs ['content'].='[_page_]' . $html['content'];
			
			if(isset($html['reply'])){
				
				$html['reply'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie', "self::local_img('$0', '$1','$nid','$tid','$turl','$conf')", $html['reply']);
				
				$rs ['reply'].='[_page_]' . $html['reply'];
			}
			$rs['totalpages']--;
			if($rs['totalpages']<1){
				$rs['status']=1;
				$rs=$this->post_content($rs,$conf);
			}
			$this->caiji_content->update($rs);
			$this->caiji_content_pages->delete($v['id']);
		}
		return ($page + 1);
	}
	public function post_content($v,$conf){
		$post_id=$conf['post_id']; 
		$post_info=$this->caiji_post->read($post_id); 
		$post_url=$post_info['post_domain'].$post_info['post_url'];
		
		$fid=$post_info['catid'];
		
		if($post_info['post_to']==2){
			$fid=$this->forum->get_fid_by_name($conf['name']);
		}
		if($post_info['post_to']==5){
			$m_cate=$this->apps->m('xgcms','xgcms_category');
			if($m_cate){
			   $fid=$m_cate->get_one(array('catname'=>$conf['name']));
			   $fid=$fid?$fid['catid']:0;
			}else{
			   return 0;
			}
		}
		
		$postdata=$this->get_postdata($v,$post_info['post_data']);
            if($fid){
				$infos=$postdata;
				//$id=$this->caiji_xgpost->topic($infos,$fid);
				
				if($post_info['post_to']<4){
				    $id=$this->caiji_xgpost->topic($infos,$fid);
				}else{
					$id=$this->caiji_xgcms->add($infos,$fid);
				}
				
				if($id){
					$v['status']=2;
				    //$msg='<font color="green">发布成功</font>';
				}else{
					$v['status']=3;
				    //$msg='<font color="red">发布失败</font>';
				}
			}else{
				$html=misc::fetch_url($post_url,5,$postdata);
				if(!empty($html)&&!empty($post_info['post_success'])&&(stripos($html,$post_info['post_success'])||$html==$post_info['post_success'])){
					$v['status']=2;
				}else{
					$v['status']=3;
				}
			}
			return $v;
			//$this->caiji_content->update($v);
	}
	public function post($cron,$conf){
		$nid=$cron['nid'];
		$page=$cron['nowpage'];
		$xc=5;
		
		$post_id=$conf['post_id']; 
		$post_info=$this->caiji_post->read($post_id); 
		if(!$post_info) return 'ok';
		
		$this->caiji_node->format($conf);
		
		$map = array();
        $map['nid'] = $nid;
        $map['status'] = 1;

		$start=0;
		$order=array('id'=>1);
		
		$post_info['post_type']==2 && $order=array('id'=>-1);

		if($post_info['post_type']==3){
			
			$total=$this->caiji_content->index_count($map);
			
		    $start=rand(0,$total-$xc);
			$start=max(0,$start);
	    }
		
        $list = $this->caiji_content->get_list($map,$start,$xc,$order);

		$current=$page*$xc;
		
		$current=max(1,$current);
		
		
        if (empty($list)) {
            return 'ok';
        }
		
		$post_url=$post_info['post_domain'].$post_info['post_url'];
		
		$fid=$post_info['catid'];
		
		if($post_info['post_to']==2){
			$fid=$this->forum->get_fid_by_name($conf['name']);
		}
		foreach($list as $v){
			
			$postdata=$this->get_postdata($v,$post_info['post_data']);
            if($fid){
				//$postdata=str_replace('[_page_]','[\page]',$postdata);
				$infos=$postdata;//misc::explode('=', '&', $postdata);
				$id=$this->caiji_xgpost->topic($infos,$fid);
				if($id){
					$v['status']=2;
				    //$msg='<font color="green">发布成功</font>';
				}else{
					$v['status']=3;
				    //$msg='<font color="red">发布失败</font>';
				}
			}else{
				$html=misc::fetch_url($post_url,5,$postdata);
				if(!empty($html)&&!empty($post_info['post_success'])&&(stripos($html,$post_info['post_success'])||$html==$post_info['post_success'])){
					$v['status']=2;
				}else{
					$v['status']=3;
				}
			}
			$this->caiji_content->update($v);
			
			//echo '《'.$v['title'].'》'.$msg.'<br />';
		}
		$page++;
		return $page;
	}
	private function get_postdata($arr,$post){
		$arr_data=explode('[_xgcms_]',$arr['data']);
		
		foreach((array)$arr_data as $kd=>$vd){
			if(empty($vd)) continue;
			$field=explode('[field]',$vd);
			$arr[$field[0]]=$field[1];	
		}
		$postarr=misc::explode('=','&',$post);
		$data=array();
		foreach($postarr as $k=>$v){
			$v=str_replace(array('[',']'),'',$v);
			if(isset($arr[$v])){
				$data[$k]=$arr[$v];
			}else{
				$data[$k]=$v;
			}
		}
		$data=new_stripslashes($data);
		return $data;
		
	}
	public function local_img($old, $img,$nid,$tid,$url,$config,$thumb=0) {
		if(!empty($old) && !empty($img)){
                        $arr=self::download_img($old, $img,$url,$config);
						
						$old=$arr['old'];
						$img=$arr['img'];
						
				        $iid = $this->caiji_images->get_one(array('md5'=>md5($img)));
                        if (empty($iid)) {
                            $imgarr = array(
                                'nid' => $nid,
                                'tid' => $tid,
                                'nid'=>$nid,
                                'image' => $img,
                                'create_time' => $_SERVER['time'],
                                'isthumb' => $thumb,
                                'md5' => md5($img),
                                );
                            $iid = $this->caiji_images->create($imgarr);
                        }
		}
		return $old;
	}
	/**
	 * 转换图片地址为绝对路径，为下载做准备。
	 * @param array $out 图片地址
	 */
	protected static function download_img($old, $out,$url,$config) {
		$arr['old']=$old;
		$arr['img']=$out;
		
		if (!empty($old) && !empty($out) && strpos($out, '://') === false) {
			$arr['img']=self::url_check($out, $url, $config);
			$arr['old']=str_replace($out, $arr['img'], $old);
			return $arr;
		} else {
			return $arr;
		}
	}
	protected static function url_check($url, $baseurl, $config) {
		$urlinfo = parse_url($baseurl);
		
		$baseurl = $urlinfo['scheme'].'://'.$urlinfo['host'].(substr($urlinfo['path'], -1, 1) === '/' ? substr($urlinfo['path'], 0, -1) : str_replace('\\', '/', dirname($urlinfo['path']))).'/';
		if (strpos($url, '://') === false) {
			if ($url[0] == '/') {
				$url = $urlinfo['scheme'].'://'.$urlinfo['host'].$url;
			} else {
				if ($config['page_base']) {
					$url = $config['page_base'].$url;
				} else {
					$url = $baseurl.$url;
				}
			}
		}
		return $url;
	}
}

?>