﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace SpaceBuilder.Common
{
    public class SinaUrlParser : IUrlParser
    {
        #region IUrlParser 成员

        public void ParseUrl(string url,ref UrlInfo info)
        {
            info.PlayUrl = GetPlayerUrlString(url);//修改url为播放器地址   
            info.MediaType = MediaTypes.Video;
            string htmlContent = HttpCollects.GetHTMLContent(url);
            if (string.IsNullOrEmpty(htmlContent))
            {
                return;
            }
            info.Subject = HttpCollects.GetTitle(htmlContent, true);
            info.Body = HttpCollects.GetDescription(htmlContent, true);
            info.ThumbnailUrl = GetThumbnailUrlString(htmlContent, true);
            return;
        }

        #endregion

        /// <summary>
        /// 修改url为播放器地址
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public string GetPlayerUrlString(string url)
        {
            string regString = @"^http://you.video.sina.com.cn/((b/(?<getvid>\d+)-(?<getuid>\d+).html)|(pg/topicdetail/topicPlay.php\?([a-zA-Z\d_=&]*)tid=(?<gettid>\d+)&uid=(?<getuid>\d+)([&t=\d]*)#(?<getvid>\d+)))$";
            string playerFormat = @"http://p.you.video.sina.com.cn/player/outer_player.swf?auto=1&vid={0}&autoplay=1";

            Regex regex = new Regex(regString, RegexOptions.IgnoreCase);
            Match match = regex.Match(url);
            if (match.Success)
            {
                return string.Format(playerFormat, match.Groups["getvid"].Value);
            }
            else
            {
                return url;
            }
        }

        /// <summary>
        /// 获取缩略图地址
        /// </summary>
        /// <param name="html">html页面文档</param>
        /// <param name="ignoreCase"></param>
        /// <returns></returns>
        public string GetThumbnailUrlString(string html, bool ignoreCase)
        {
            string regString = @"'http://(?<getcontent1>[a-zA-Z\d]+).v.iask.com/(?<getcontent2>[a-zA-Z\d\/_]+).jpg'";
            string regStringList = @"http://(?<getcontent3>[a-zA-Z\d]+).sinaimg.com.cn/(?<getcontent4>[a-zA-Z\d\/]+).gif";
            Regex reg;
            if (ignoreCase)
            {
                reg = new Regex(regString, RegexOptions.IgnoreCase);
            }
            else
            {
                reg = new Regex(regString);
            }
            Match match = reg.Match(html);
            if (match.Success)
            {
                return string.Format("http://{0}.v.iask.com/{1}.jpg", match.Groups["getcontent1"].Value, match.Groups["getcontent2"].Value);
            }
            else
            {
                reg = new Regex(regStringList);
                match = reg.Match(html);
                if (match.Success)
                {
                    return string.Format("http://{0}.sinaimg.com.cn/{1}.gif", match.Groups["getcontent3"].Value, match.Groups["getcontent4"].Value);
                }
                else
                {
                    return string.Empty;
                }
            }
        }
    }
}
