<?php
// ɼģ
class SpiderAction extends BaseAction {
	protected function _initializeMore() {
		$this -> categoryObj = M('category');
		$this -> assign('category', get_category());
	}
	// 
	public function defaults() {
		$spiderRuleObj = M('SpiderRule');
		$page = intval(R('page'));
		$pageSize = 20;
		$this -> assign('data', $spiderRuleObj -> getList($page, $pageSize));
		$this -> assign('pages', $spiderRuleObj -> pages());
		$this -> display('spider');
	}
	// 
	public function add() {
		if ($this -> isSubmit) {
			$rid = M('SpiderRule') -> save();
			if ($rid) $this -> showMsg(L('OPERATE_SUCCESS'), '?c=spider&a=content_rule&rid=' . $rid);
			else $this -> showMsg(L('OPERATE_FAIL'));
		}
		$c = C('', 0, 'spider');
		$this -> assign('data', array('getnum' => $c['getnum'], 'postnum' => $c['postnum'], 'hitsmin' => $c['hitsmin'], 'hitsmax' => $c['hitsmax'], 'remotetype' => $c['remotetype'], 'passed' => $c['passed'], 'isdesc' => $c['isdesc'], 'unrepeat' => $c['unrepeat'], 'pic1_default' => $c['pic1_default'], 'sim_browser' => $c['sim_browser']));
		$this -> display('spider');
	}
	// ༭
	public function edit() {
		$spiderRuleObj = M('SpiderRule');
		if ($this -> isSubmit) {
			$msg = $spiderRuleObj -> save()?L('OPERATE_SUCCESS'):L('OPERATE_FAIL');
			$this -> showMsg($msg, $this -> forward);
		}
		$rid = intval(R('rid'));
		$this -> assign('data', $spiderRuleObj -> find($rid));
		$this -> display('spider');
	}
	// ݹ
	public function contentRule() {
		$spiderRuleObj = M('SpiderRule');
		$rid = intval(R('rid'));
		$data = $spiderRuleObj -> find($rid);
		if (empty($data['rid'])) $this -> showMsg(L('NOPARAM'));
		$cacheName = 'rule_' . $rid;
		if ($this -> isSubmit) {
			$myfield = array();
			$af = R('af');
			$arp = R('arp');
			foreach($af as $k => $v) {
				if (empty($v['begin'])) unset($af[$k], $arp[$k]);
				else $myfield[] = $k;
			}
			$myfield = implode('|', $myfield);
			$spiderRuleObj -> setFields(array('myfield' => $myfield), 'rid', $rid);
			$cache = array();
			$cache['af'] = Func :: newStripslashes($af);
			$cache['arp'] = Func :: newStripslashes($arp);
			$cache['protect'] = R('protect')?explode('(|)', stripslashes(R('protect'))):'';
			$cache['pagerule'] = Func :: newStripslashes(R('pagerule'));
			$cache['set'] = $data;
			if (Cache :: write($cacheName, $cache, 'rule', 'spider')) $this -> showMsg(L('OPERATE_SUCCESS'), $this -> forward);
			else $this -> showMsg(L('OPERATE_FAIL'));
		}
		extract($data);
		$rule = Cache :: read($cacheName, 'rule', 0, 'spider');
		$spiderObj = $this -> getSpider($rule);
		// ȫֶ
		$this -> assign('all_fields', $spiderObj -> fields);
		// ɼֶ
		$myfield = empty($myfield)?$spiderObj -> defaultFields:explode('|', $myfield);
		$this -> assign('myfield', $myfield);
		// ѡɼֶ
		$this -> assign('select_fields', array_diff_key($spiderObj -> fields, array_flip($myfield)));

		if (!empty($rule['af'])) {
			$af = $rule['af'];
			$arp = $rule['arp'];
			$pagerule = $rule['pagerule'];
			$protect = implode('(|)', $rule['protect']);
		} else {
			$af = $arp = array();
			$pagerule = array('begin' => '', 'end' => '', 'ismerge' => 0);
			$protect = '';
			foreach($spiderObj -> fields as $k => $v) {
				$af[$k] = array('begin' => $v['value'], 'end' => '');
				$arp[$k] = array('old' => '', 'new' => '');
			}
		}
		// ɼ
		$this -> assign('af', $af);
		// 滻
		$this -> assign('arp', $arp);
		$this -> assign('pagerule', $pagerule);
		$this -> assign('protect', $protect);

		$this -> assign('rid', $rid);
		$this -> display('spider');
	}
	// ƹ
	public function copy() {
		$spiderRuleObj = M('SpiderRule');
		$rid = intval(R('rid'));
		$data = $spiderRuleObj -> find($rid);
		$data['rid'] = $spiderRuleObj -> getId('rid');
		$data['addtime'] = TIME;
		if ($spiderRuleObj -> create($data) -> add()) {
			$s = Cache :: getPath('rule_' . $rid, 'rule', 'spider');
			$d = Cache :: getPath('rule_' . $data['rid'], 'rule', 'spider');
			@copy($s, $d);
			$this -> showMsg(L('OPERATE_SUCCESS'), $this -> referer);
		} else $this -> showMsg(L('OPERATE_FAIL'));
	}
	// 
	public function export() {
		$cache = Cache :: read('rule_' . intval(R('rid')), 'rule', 0, 'spider');
		$cache = Func :: newIconv(C('APP_CHARSET', 1), 'utf-8', $cache);
		$this -> assign('rule_str', base64_encode(serialize($cache)));
		$this -> display('spider');
	}
	// 
	public function import() {
		$spiderRuleObj = M('SpiderRule');
		if ($this -> isSubmit) {
			$rule = base64_decode(R('rule', 'post'));
			$cache = unserialize($rule);
			$cache = Func :: newIconv('utf-8', C('APP_CHARSET', 1), $cache);
			$rid = $spiderRuleObj -> getId('rid');
			$cache['set']['rid'] = $rid;
			$cache['set']['addtime'] = TIME;
			if ($spiderRuleObj -> create($cache['set']) -> add()) {
				$data = $spiderRuleObj -> find($rid);
				$cache['set'] = $data;
				Cache :: write('rule_' . $rid, $cache, 'rule', 'spider');
				$this -> showMsg(L('OPERATE_SUCCESS'), '?c=spider');
			} else $this -> showMsg(L('OPERATE_FAIL'));
		}
		$this -> display('spider');
	}
	// ɾ
	public function del() {
		if (M('SpiderRule') -> del()) $this -> showMsg(L('OPERATE_SUCCESS'), $this -> referer);
		else $this -> showMsg($this -> error);
	}
	// Թ
	public function test() {
		$rid = intval(R('rid'));
		$rule = Cache :: read('rule_' . $rid, 'rule', 0, 'spider');
		if (empty($rule)) show_error(L('RULE_NOT_EXIST', 'spider'));
		$this -> assign('rid', $rid);
		$work = R('work');
		$this -> assign('work', $work);
		if ($work) {
			$spiderObj = $this -> getSpider($rule);
			if ($work == 'test_url') {
				if (empty($rule['set']['starturl'])) show_error(L('SPIDER_URL_NOT_EXIST', 'spider'));
				$startUrl = preg_split("/[\r\n]+/", $rule['set']['starturl']);
				$startUrl = $startUrl[0];
				$data = $spiderObj -> getLinks($startUrl, $rule['set']['listbegin'], $rule['set']['listend'], $rule['set']['urlin'], $rule['set']['urlout']);
				$this -> assign('data', $data);
			} elseif ($work == 'test_content') {
				$data = $spiderObj -> getData(R('url'), $rule['af'], $rule['arp'], $rule['pagerule'], $rule['protect']);
				$content = '';
				if (is_array($data)) {
					foreach($data as $k => $v) {
						$content .= $spiderObj -> fields[$k]['name'] . "{$v}\r\n";
					}
				}
				$this -> assign('content', $content);
			}
		}
		$this -> display('spider_test');
	}
	// ɼַ
	public function getUrl() {
		$rid = intval(R('rid'));
		$page = intval(R('page'));
		$totlecnt = intval(R('totlecnt'));
		$totlenum = intval(R('totlenum'));
		$forward = $page?R('forward'):base64_encode($this -> referer);
		$rule = Cache :: read('rule_' . $rid, 'rule', 0, 'spider');
		if (empty($rule['set']['starturl'])) show_error(L('SPIDER_URL_NOT_EXIST', 'spider'));
		$startUrl = preg_split("/[\r\n]+/", $rule['set']['starturl']);
		$pages = count($startUrl);
		if (isset($startUrl[$page])) {
			$spiderObj = $this -> getSpider($rule);
			$data = $spiderObj -> getLinks($startUrl[$page], $rule['set']['listbegin'], $rule['set']['listend'], $rule['set']['urlin'], $rule['set']['urlout']);
			$cnt = count($data);
			$num = 0;
			if ($cnt) {
				$spiderUrlObj = M('SpiderUrl');
				foreach($data as $k => $v) {
					$v['title'] = Func :: shtmlspecialchars($v['title']);
					if ($rule['set']['unrepeat']) $isInsert = $this -> unrepeat($v['title']);
					else $isInsert = $spiderUrlObj -> field('urlid') -> where('url=' . $v['url']) -> find()?false:true;
					if ($isInsert) {
						$data = array('title' => $v['title'],
							'catid' => $rule['set']['catid'],
							'rid' => $rid,
							'url' => $v['url'],
							'img' => $v['img'],
							'addtime' => TIME
							);
						$spiderUrlObj -> create($data) -> add();
						$num++;
					}
				}
				$page += 1;
				$totlecnt = $totlecnt?$totlecnt + $cnt:$cnt;
				$totlenum = $totlenum?$totlenum + $num:$num;
				$tmp = $rule['set']['unrepeat']?L('SPIDER_UNREPEAT', 'spider'):'';
				$go = $page < $pages ? "?c=spider&a=get_url&rid={$rid}&page={$page}&totlecnt={$totlecnt}&totlenum={$totlenum}&forward={$forward}" : base64_decode($forward);
				$this -> showMsg(sprintf(L('SPIDER_URL', 'spider'), $cnt, $num, $totlecnt, $totlenum), $go, 3000);
			} else $this -> showMsg(L('OPERATE_FAIL'));
		} else $this -> showMsg(L('NOPARAM'), $this -> referer);
	}
	// ɼ
	public function getContent() {
		@set_time_limit(0);
		$rid = intval(R('rid'));
		if ($rid) {
			$rule = Cache :: read('rule_' . $rid, 'rule', 0, 'spider');
			$spiderUrlObj = M('SpiderUrl');
			$page = intval(R('page'));
			$page = max($page, 1);
			$pages = intval(R('pages'));
			if (!$pages) {
				$totle = $spiderUrlObj -> where(array('rid' => $rid, 'isdata' => 0)) -> count();
				if (!$totle) $this -> showMsg(L('SPIDER_IS_EMPTY', 'spider'));
				$pages = ceil($totle / $rule['set']['getnum']);
				Cache :: write('task_get_' . $rid, array('totle' => $totle, 'start' => SYS_START_TIME, 'forward' => $this -> referer), '', 'spider');
			}
			if ($page > $pages) $this -> showMsg(L('NOPARAM'), $this -> referer);
			$urls = $spiderUrlObj -> field(array('urlid', 'url')) -> getList(1, $rule['set']['getnum'], array('rid' => $rid, 'isdata' => 0), 'addtime DESC');
			$num = $cnt = 0;
			$succes = $fail = array();
			$spiderObj = $this -> getSpider($rule);
			// ʽȡôɼַ
			$contents = $spiderObj -> getMoreContent2($urls);
			foreach($contents as $k => $v) {
				$data = $spiderObj -> getData($urls[$k]['url'], $rule['af'], $rule['arp'], $rule['pagerule'], $rule['protect'], $v);
				if (!empty($data['content'])) {
					Cache :: write($urls[$k]['urlid'], $data, 'data', 'spider');
					$succes[] = $urls[$k]['urlid'];
					$num++;
				} else $fail[] = $urls[$k]['urlid'];
				$cnt++;
			}
			if (!empty($fail)) {
				$tmp = implode(',', $fail);
				$spiderUrlObj -> setFields(array('addtime' => TIME, 'isdata' => 2), 'urlid', $tmp);
			}
			if ($cnt) {
				if (!empty($succes)) {
					$tmp = implode(',', $succes);
					$spiderUrlObj -> setFields(array('addtime' => TIME, 'isdata' => 1), 'urlid', $tmp);
				}
				$page += 1;
				$successnum = R('successnum') + $num;
				if ($page > $pages) {
					$cacheName = 'task_get_' . $rid;
					$cache = Cache :: read($cacheName, '', 0, 'spider');
					$usetime = get_run_time($cache['start']); //ʹʱ
					Cache :: del($cacheName, '', 'spider');
					$this -> showMsg(sprintf(L('SPIDER_CONTENT_END', 'spider'), $cnt, $num, $cache['totle'], $successnum, $usetime), $cache['forward'], 4000);
				} else {
					$this -> showMsg(sprintf(L('SPIDER_CONTENT', 'spider'), $cnt, $num), "?c=spider&a=get_content&rid={$rid}&pages={$pages}&page={$page}&successnum={$successnum}", 1000);
				}
			} else $this -> showMsg(L('SPIDER_IS_EMPTY', 'spider'), $this -> referer);
		} else $this -> showMsg(L('NOPARAM'), $this -> referer);
	}
	// 
	public function content() {
		$page = intval(R('page'));
		$pageSize = 20;
		$spiderUrlObj = M('SpiderUrl');
		$tmp = $spiderUrlObj -> field('rid') -> group('rid') -> getList('', 0, '', 'rid');
		if (is_array($tmp)) {
			$spiderRuleObj = M('SpiderRule');
			$rule = array();
			foreach($tmp as $v) {
				$data = $spiderRuleObj -> field('name') -> find($v['rid']);
				$rule[] = $v['rid'] . '_' . $data['name'];
			}
			$rule = implode('|', $rule);
		} else $rule = '';
		$where = R('rid')?array('rid' => intval(R('rid'))):'';
		$this -> assign('data', $spiderUrlObj -> getList($page, $pageSize, $where));
		$this -> assign('pages', $spiderUrlObj -> pages());
		$this -> assign('rule', $rule);
		$this -> display('spider');
	}
	// ɾɼݡ
	public function delAll() {
		$selids = R('selids');
		if ($selids) {
			$array = explode(',', $selids);
			foreach($array as $urlid) {
				Cache :: del($urlid, 'data', 'spider');
			}
			$spiderUrlObj = M('SpiderUrl');
			$where = count($array) > 1 ? "urlid IN ($selids)" : "urlid=$selids";
			$spiderUrlObj -> where($where) -> delete();
			$this -> showMsg(L('OPERATE_SUCCESS'), $this -> referer);
		} else $this -> showMsg(L('NOPARAM'));
	}
	// ɾɼ
	public function delContent() {
		$selids = R('selids');
		if ($selids) {
			$array = explode(',', $selids);
			foreach($array as $urlid) {
				Cache :: del($urlid, 'data', 'spider');
			}
			$spiderUrlObj = M('SpiderUrl');
			$where = count($array) > 1 ? "urlid IN ($selids)" : "urlid=$selids";
			$spiderUrlObj -> where($where) -> create(array('isdata' => 0)) -> update();
			$this -> showMsg(L('OPERATE_SUCCESS'), $this -> referer);
		} else $this -> showMsg(L('NOPARAM'));
	}
	// ղɼ
	public function truncate() {
		$spiderUrlObj = M('SpiderUrl');
		$tableName = $spiderUrlObj -> getTableName();
		$spiderUrlObj -> db() -> query('TRUNCATE TABLE ' . $spiderUrlObj -> db() -> addSpecialChar($tableName));
		$path = dirname(Cache :: getPath('jxcms', 'data', 'spider'));
		FileUtil :: del($path, 1);
		$this -> showMsg(L('OPERATE_SUCCESS'), $this -> referer);
	}
	// 鿴ɼ
	public function view() {
		$urlid = intval(R('urlid'));
		$spiderUrlObj = M('SpiderUrl');
		$get = $spiderUrlObj -> find($urlid);
		$rule = Cache :: read('rule_' . $get['rid'], 'rule', 0, 'spider');
		if (empty($rule)) show_error(L('RULE_NOT_EXIST', 'spider'));
		$spiderObj = $this -> getSpider($rule);
		$data = Cache :: read($urlid, 'data', 0, 'spider');
		if ($this -> isSubmit) {
			$info = R('info', 'post');
			if ($info['content']) {
				if (empty($data['content'])) {
					$spiderUrlObj -> create(array('addtime' => TIME, 'isdata' => 1)) -> where('urlid=' . $urlid) -> update();
				}
				$data['title'] = stripslashes($info['title']);
				$data['content'] = stripslashes($info['content']);
				Cache :: write($urlid, $data, 'data', 'spider');
				$this -> showMsg(L('OPERATE_SUCCESS'), $this -> forward);
			} else $this -> showMsg(L('OPERATE_FAIL'));
		}
		if (empty($data['content'])) {
			$data = $spiderObj -> getData($get['url'], $rule['af'], $rule['arp'], $rule['pagerule'], $rule['protect']);
			if (!empty($data['content'])) {
				Cache :: write($urlid, $data, 'data', 'spider');
				$spiderUrlObj -> create(array('addtime' => TIME, 'isdata' => 1)) -> where('urlid=' . $urlid) -> update();
			} else {
				$spiderUrlObj -> create(array('addtime' => TIME, 'isdata' => 2)) -> where('urlid=' . $urlid) -> update();
			}
		}
		$this -> assign('fields', $spiderObj -> fields);
		$this -> assign('urlid', $urlid);
		$this -> assign('rule', $rule);
		$this -> assign('data', $data);
		$this -> display('spider');
	}
	public function post() {
		@set_time_limit(0);
		$rid = intval(R('rid'));
		if ($rid) {
			$rule = Cache :: read('rule_' . $rid, 'rule', 0, 'spider');
			$spiderUrlObj = M('SpiderUrl');
			$page = intval(R('page'));
			$page = max($page, 1);
			$pages = intval(R('pages'));
			if (!$pages) {
				$totle = $spiderUrlObj -> where(array('rid' => $rid, 'isdata' => 1)) -> count();
				if (!$totle) $this -> showMsg(L('POST_IS_EMPTY', 'spider'));
				$pages = ceil($totle / $rule['set']['postnum']);
				Cache :: write('task_post_' . $rid, array('totle' => $totle, 'start' => SYS_START_TIME, 'forward' => $this -> referer), '', 'spider');
			}
			if ($page > $pages) $this -> showMsg(L('NOPARAM'), $this -> referer);
			$order = $rule['set']['isdesc']?'DESC':'ASC';
			$urls = $spiderUrlObj -> field(array('urlid', 'img')) -> getList(1, $rule['set']['postnum'], array('rid' => $rid, 'isdata' => 1), 'urlid ' . $order);
			$num = $cnt = 0;
			$succes = array();
			$spiderObj = $this -> getSpider($rule);
			$moduleObj = M(strtolower(substr(get_class($spiderObj), 0, -6)));
			$error = array();
			foreach($urls as $url){
				if($spiderObj -> post($url, $moduleObj)) {
					$success[] = $url['urlid'];
					$num++;
				} else $error[] = $url['urlid'] . '-' . $spiderObj -> error;
				$cnt++;
			}
			if ($cnt) {
				if ($num) {
					$tmp = implode(',', $success);
					$spiderUrlObj -> where("urlid IN ($tmp)") -> delete();
				}
				$page += 1;
				$totlecnt = R('totlecnt') + $cnt;
				$successnum = R('successnum') + $num;
				if ($page > $pages) {
					$cacheName = 'task_post_' . $rid;
					$cache = Cache :: read($cacheName, '', 0, 'spider');
					$usetime = get_run_time($cache['start']); //ʹʱ
					Cache :: del($cacheName, '', 'spider');
					$msg = sprintf(L('POST_CONTENT_END', 'spider'), $cnt, $num, $cache['totle'], $successnum, $usetime);
					empty($error) or $msg .= '<br />' . implode('; ', $error);
					$this -> showMsg($msg, $cache['forward'], 4000);
				} else {
					$msg = sprintf(L('SPIDER_CONTENT', 'spider'), $cnt, $num);
					empty($error) or $msg .= '<br />' . implode('; ', $error);
					$this -> showMsg($msg, "?c=spider&a=post&rid={$rid}&pages={$pages}&page={$page}&successnum={$successnum}", 1000);
				}
			} else $this -> showMsg(L('POST_IS_EMPTY', 'spider'), $this -> referer);;
		} else $this -> showMsg(L('NOPARAM'));
	}
	private function getSpider($rule) {
		$catid = intval($rule['set']['catid']);
		$moduleid = get_category($catid, 'moduleid');
		if (!$moduleid) show_error(L('CATEGORY_INVALID'));
		$module = get_module($moduleid);
		if (!import('Lib/Spider/' . ucfirst($module['name']) . 'Spider', $module['dir'])) {
			show_error($module['title'] . L('SPIDER_PORT_NOT_EXIST', 'spider'));
		}
		$tmp = ucwords($module['name']) . 'Spider';
		return new $tmp($rule);
	}
	private function unrepeat($title) {
		$title = strip_tags($title);
		$path = Cache :: getPath(substr(sha1($title), 0, 2), '', 'spider');
		$content = is_file($path)?file_get_contents($path):'';
		$hash = md5($title);
		if ($content) {
			if (strstr($content, $hash)) return false;
			$max = 300; //󻺴
			$hashlen = strlen($hash) + 1;
			$maxnum = $hashlen * $max - 1;
			if ($maxnum <= strlen($content)) {
				$start = $hashlen * 50;
				$content = substr($str, $start);
			}
			$content .= '|';
		}
		return file_put_contents($path, $content . $hash);
	}
}