<?php
class String
{	
	function xss($html)
	{
		if(trim($html) === '')
		{
			return '';
		}

		$html = preg_replace('/<script[\s\S]*?script>|<iframe[\s\S]*?iframe>/', '', $html);
		$disable_attrs = array('onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
		$disable_attrs = join('|', $disable_attrs);
		preg_match_all('/<[^\/]*?>/', $html, $matches);
		$matches = array_unique($matches[0]);

		foreach($matches as $tag)
		{
			if(strpos($tag,'on') !== false && preg_match("/$disable_attrs/", $tag))
			{
				$html = str_replace($tag, preg_replace("/$disable_attrs/", "xss_$0",$tag), $html);
			}
		}
		return $html;
	}

	function sub_html($html, $length = -1)
	{
		$result = array();
		$stack = array();
		$sublen = 0;
		$segments = preg_split("/(<[^>]+?>)/si",$html, -1,PREG_SPLIT_NO_EMPTY| PREG_SPLIT_DELIM_CAPTURE);
		$tag = '';
		$half_open = array(
			'meta','input','link'
		);

		// $i = 0;
		foreach($segments as $seg)
		{
			if(trim($seg) === '')
			{
				continue;
			}

			//闭合标签不能截断
			if(preg_match("!<([a-z0-9]+)[^>]*?/>!si",$seg, $match))
			{
				$tag = self::format_tag($match[1]);
				$result[] = $seg;
			}
			else if(preg_match("!</([a-z0-9]+)[^>]*?>!si",$seg,$match))
			{
				$tag = self::format_tag($match[1]);
				
				if(count($stack) > 0 && $stack[count($stack)-1] == $tag)
				{
					//echo $tag,'[out]<br/>';
					array_pop($stack);
					$result[] = $seg;
				}
			}
			else if(preg_match("!<([a-z0-9]+)[^>]*?>!si",$seg,$match))
			{
				$tag = self::format_tag($match[1]);
				if(!in_array($tag, $half_open))
				{
					//echo $tag,'[in]<br/>';
					array_push($stack,$tag);
				}
				$result[] = $seg;
			}
			else if(preg_match("~<!--[\s\S]*?-->~si",$seg))
			{
				$result[] = $seg;
			}
			else if(preg_match("~<![^>]*>~", $seg))
			{
				$result[] = $seg;
			}
			else
			{
				if($tag === 'pre')
				{
					$seg = $this->clear_pre($seg);
				}
				else if($tag === 'script' || $tag === 'style')
				{
					$seg = $seg;
				}
				else
				{
					$seg = plain_text($seg);
				}

				if($length === -1)
				{
					$result[] = $seg;
					continue;
				}
				$seg_len = $this->mstrlen($seg);

				if($sublen + $seg_len < $length)
				{
					$result[] = $seg;
					$sublen += $seg_len;
				}
				else
				{
					$result[] = $this->msubstr($seg,0,$length-$sublen+1);
					break;
				}
			}

			// if(++$i > 20)
			// {
			// 	print_r($stack);
			// 	exit(0);
			// }
		}

		while(!empty($stack))
		{
			$result[] = '</'.array_pop($stack).'>';
		}
		
		return preg_replace('!<p[^>]*?><br /></p>!si','',join('', $result));
	}

	public function clear_pre($pre)
	{
		$pre = str_replace(array("\r\n", "\r"), "\n", $pre);
		$pre = str_replace('\t', '  ', $pre);
		$pre = preg_replace('!\n{2,}!', "\n", $pre);
		return $pre;
	}

	static public function format_tag($tag)
	{
		return trim(strtolower($tag));
	}

	function msubstr($str, $start=0, $length, $charset="utf-8", $suffix=true) {
		if(function_exists("mb_substr"))
			$slice = mb_substr($str, $start, $length, $charset);
		elseif(function_exists('iconv_substr')) {
			$slice = iconv_substr($str,$start,$length,$charset);
		if(false === $slice)
		{
			$slice = '';
		}
		}
		else
		{
			$re['utf-8']   = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
			$re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
			$re['gbk']    = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
			$re['big5']   = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
			preg_match_all($re[$charset], $str, $match);
			$slice = join("",array_slice($match[0], $start, $length));
		}
		return $suffix ? $slice.'...' : $slice;
	}

	function mstrlen($str,$charset = 'utf-8')
	{
		if (function_exists('mb_strlen'))
		{
			$length=mb_strlen($str,$charset);
		}
		else if(function_exists('iconv_substr'))
		{
			$length=iconv_strlen($str,$charset);
		}
		else
		{
			preg_match_all("/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $text, $ar);
			$length=count($ar[0]);
		}
		return $length;
	}
};