﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace SpaceBuilder.Common
{
    public class SinaNewsUrlParser : IUrlParser
    {
        #region IUrlParser 成员
        public void ParseUrl(string url, ref UrlInfo info)
        {
            string htmlContent = HttpCollects.GetHTMLContent(url);//获取文档内容
            if (string.IsNullOrEmpty(htmlContent))
            {
                return;
            }
            info.PlayUrl = GetPlayerUrlString(htmlContent, url);//从页面里获取播放器地址
            info.MediaType = MediaTypes.Video;
            info.Subject = HttpCollects.GetTitle(htmlContent, true);
            info.Body = HttpCollects.GetDescription(htmlContent, true);
            info.ThumbnailUrl = GetThumbnailUrlString(htmlContent, true);//从页面里获取缩略图地址
            return;
        }
        #endregion

        /// <summary>
        /// 从页面里获取播放器地址
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public string GetPlayerUrlString(string html, string url)
        {
            string playerFormat = "http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=[0-9]*_[0-9]*_[a-zA-Z0-9\\+]*/s.swf";

            Regex regex = new Regex(playerFormat, RegexOptions.IgnoreCase);
            Match match = regex.Match(html);
            if (match.Success)
            {
                return match.Value;
            }
            else
            {
                return url;
            }
        }

        /// <summary>
        /// 从页面里获取缩略图地址
        /// </summary>
        /// <param name="html">html页面文档</param>
        /// <param name="ignoreCase"></param>
        /// <returns></returns>
        public string GetThumbnailUrlString(string html, bool ignoreCase)
        {
            string regString = "src=\"http://(?<getcontent1>[a-zA-Z\\d]+).v.iask.com/(?<getcontent2>[\\d\\/_-]+).jpg\"";
            string regString2 = "src=\"http://(?<getcontent3>[a-zA-Z\\d]+).sinaimg.cn/(?<getcontent4>[a-zA-Z\\d\\/_-]+).jpg\"";
            Regex reg;
            if (ignoreCase)
            {
                reg = new Regex(regString, RegexOptions.IgnoreCase);
            }
            else
            {
                reg = new Regex(regString);
            }
            Match match = reg.Match(html);
            if (match.Success)
            {
                return string.Format("http://{0}.v.iask.com/{1}.jpg", match.Groups["getcontent1"].Value, match.Groups["getcontent2"].Value);
            }
            else
            {
                if (ignoreCase)
                {
                    reg = new Regex(regString2, RegexOptions.IgnoreCase);
                }
                else
                {
                    reg = new Regex(regString2);
                }
                match = reg.Match(html);
                if (match.Success)
                {
                    return string.Format("http://{0}.sinaimg.cn/{1}.jpg", match.Groups["getcontent3"].Value, match.Groups["getcontent4"].Value);
                }
                else
                {
                    return string.Empty;
                }
            }
        }
    }
}
