﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace SpaceBuilder.Common
{
    public class TudouPlayListUrlParser : IUrlParser
    {
        #region IUrlParser 成员

        public void ParseUrl(string url, ref UrlInfo info)
        {
            info.PlayUrl = GetPlayerUrlString(url);//修改url为播放器地址            
            info.MediaType = MediaTypes.Video;
            string htmlContent = HttpCollects.GetHTMLContent(url);
            if (string.IsNullOrEmpty(htmlContent))
                return;
            info.Subject = HttpCollects.GetTitle(htmlContent, true);
            info.Body = HttpCollects.GetDescription(htmlContent, true);
            info.ThumbnailUrl = GetThumbnailUrlString(url, true);
            return;
        }
        #endregion

        /// <summary>
        /// 修改url为播放器地址或获取页面里的播放器地址
        /// </summary>
        /// <param name="url">url地址</param>
        /// <returns></returns>
        public string GetPlayerUrlString(string url)
        {
            string regString = @"^http://www.tudou.com/playlist/playindex.do\?lid=(\d+|\d+&iid=\d+&cid=\d+)$";
            Regex regex = new Regex(regString, RegexOptions.IgnoreCase);
            Match match = regex.Match(url);
            if (match.Success)
            {
                string regString1 = @"^http://www.tudou.com/playlist/playindex.do\?lid=\d+$";
                regex = new Regex(regString1, RegexOptions.IgnoreCase);
                match = regex.Match(url);
                if (match.Success)
                {
                    //获取页面里的播放器地址
                    string playerString = GetHtmlPlayerUrlString(url, true);
                    if (string.IsNullOrEmpty(playerString))
                    {
                        return url;
                    }
                    else
                    {
                        return playerString;
                    }
                }
                else
                {
                    string regString2 = @"^http://www.tudou.com/playlist/playindex.do\?lid=\d+&iid=(?<getcontent>\d+)&cid=\d+$";
                    regex = new Regex(regString2, RegexOptions.IgnoreCase);
                    match = regex.Match(url);
                    if (match.Success)
                    {
                        return string.Format("http://www.tudou.com/player/outside/player_outside.swf?iid={0}&default_skin=http://js.tudouui.com/bin/player2/outside/Skin_outside_17.swf", match.Groups["getcontent"].Value);
                    }
                    else
                    {
                        return url;
                    }
                }
            }
            else
            {
                return url;
            }
        }
        /// <summary>
        /// 获取页面里的播放器地址
        /// </summary>
        /// <param name="html">获取的文档</param>
        /// <param name="ignoreCase"></param>
        /// <returns></returns>
        public string GetHtmlPlayerUrlString(string url, bool ignoreCase)
        {
            string htmlContent = HttpCollects.GetHTMLContent(url);
            if (string.IsNullOrEmpty(htmlContent))
                return string.Empty;
            string regString = @"http://www.tudou.com/l/(?<getcontent>[a-zA-Z\d-_]+)";
            Regex reg;
            if (ignoreCase)
            {
                reg = new Regex(regString, RegexOptions.IgnoreCase);
            }
            else
            {
                reg = new Regex(regString);
            }
            Match match = reg.Match(htmlContent);
            if (match.Success)
            {
                return string.Format("http://www.tudou.com/l/{0}", match.Groups["getcontent"].Value);
            }
            else
            {
                return string.Empty;
            }
        }

        /// <summary>
        /// 换取缩略图
        /// </summary>
        /// <param name="url">url地址</param>
        /// <param name="ignoreCase"></param>
        /// <returns></returns>
        public string GetThumbnailUrlString(string url, bool ignoreCase)
        {
            //第一种只含有lid参数的url地址格式
            string regString1 = @"^http://www.tudou.com/playlist/playindex.do\?lid=\d+$";
            //第二种含有iid参数的url的地址格式
            string regString2 = @"^http://www.tudou.com/playlist/playindex.do\?lid=\d+&iid=(?<getiid>\d+)&cid=\d+$";
            Regex reg;
            if (ignoreCase)
            {
                reg = new Regex(regString1, RegexOptions.IgnoreCase);
            }
            else
            {
                reg = new Regex(regString1);
            }
            Match match = reg.Match(url);
            if (match.Success)
            {
                //符合只含有lid参数的url地址格式时从页面里直接获取相应的缩略图地址
                string regStringjpg = @"http://i01.img.tudou.com/data/imgs/i/(?<getcontent>[a-zA-Z\d/]+).jpg";
                if (ignoreCase)
                {
                    reg = new Regex(regStringjpg, RegexOptions.IgnoreCase);
                }
                else
                {
                    reg = new Regex(regStringjpg);
                }
                string htmlContent = HttpCollects.GetHTMLContent(url);
                match = reg.Match(htmlContent);
                if (match.Success)
                {
                    return string.Format("http://i01.img.tudou.com/data/imgs/i/{0}.jpg", match.Groups["getcontent"].Value);//缩略图地址
                }
                else
                {
                    return string.Empty;
                }
            }
            else
            {
                //符合只含有iid参数的url的地址格式时取出iid参数进行匹配缩略图地址
                if (ignoreCase)
                {
                    reg = new Regex(regString2, RegexOptions.IgnoreCase);
                }
                else
                {
                    reg = new Regex(regString2);
                }
                match = reg.Match(url);
                if (match.Success)
                {
                    string iid = match.Groups["getiid"].Value;
                    string iidString = iid.PadLeft(9, '0');
                    string path = (iidString.Substring(0, 3) + "/" + iidString.Substring(3, 3) + "/" + iidString.Substring(6, 3));
                    return string.Format("http://i01.img.tudou.com/data/imgs/i/{0}/p.jpg", path);//根据iid匹配缩略图地址
                }
                else
                {
                    return string.Empty;
                }
            }

        }
    }
}
