﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace SpaceBuilder.Common
{
    public class TudouUrlParser : IUrlParser
    {
        #region IUrlParser 成员

        public void ParseUrl(string url, ref UrlInfo info)
        {
            string htmlContent = HttpCollects.GetHTMLContent(url);
            if (string.IsNullOrEmpty(htmlContent))
                return;
            info.MediaType = MediaTypes.Video;
            info.PlayUrl = GetPlayerUrlString(htmlContent, url);//修改url为播放器地址            
            info.Subject = HttpCollects.GetTitle(htmlContent, true);
            info.Body = HttpCollects.GetDescription(htmlContent, true);
            info.ThumbnailUrl = GetThumbnailUrlString(htmlContent, true);
            return;
        }
        #endregion


        /// <summary>
        /// 修改url为播放器地址
        /// </summary>
        /// <param name="url">url地址</param>
        /// <returns></returns>
        public string GetPlayerUrlString(string html, string url)
        {
            string reglcode = @"(iid_code = icode =|lid_code = lcode =)+ '(?<lcode>[a-zA-Z\d-]*)'";
            string regiid = @"iid:(?<iid>\d+)";
            string lcode = string.Empty;

            Regex regex = new Regex(reglcode, RegexOptions.IgnoreCase);
            Match match = regex.Match(html);
            if (match.Success)
            {
                lcode = match.Groups["lcode"].Value;
                regex = new Regex(regiid, RegexOptions.IgnoreCase);
                match = regex.Match(html);
                if (match.Success)
                    return string.Format("http://www.tudou.com/l/{0}/&iid={1}/v.swf", lcode, match.Groups["iid"].Value);
                else
                    return string.Format("http://www.tudou.com/v/{0}/v.swf", lcode);
            }

            return url;
        }

        /// <summary>
        /// 获取缩略图地址
        /// </summary>
        /// <param name="html">html页面文档</param>
        /// <param name="ignoreCase"></param>
        /// <returns></returns>
        public string GetThumbnailUrlString(string html, bool ignoreCase)
        {
            string regString = "(thumbnail = pic = '|,pic:\")?(?<thumbnail>http://i[0-9]?.tdimg.com/[0-9\\/]*/[a-zA-Z].jpg)('|\")";
            Regex reg;

            if (ignoreCase)
                reg = new Regex(regString, RegexOptions.IgnoreCase);
            else
                reg = new Regex(regString);

            Match match = reg.Match(html);

            if (match.Success)
                return match.Groups["thumbnail"].Value;

            return string.Empty;
        }
    }
}
