﻿//------------------------------------------------------------------------------
// <copyright company="Tunynet">
// Copyright (c) Tunynet Inc. All rights reserved.
// </copyright> 
//------------------------------------------------------------------------------


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using SpaceBuilder.News;
using Lucene.Net.Search;
using Lucene.Net.Index;

using Lucene.Net.QueryParsers;
using SpaceBuilder.Common;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using SpaceBuilder.Utils;
using Lucene.Net.Analysis.Standard;

namespace SpaceBuilder.LuceneSearch
{
    /// <summary>
    /// 资讯全文检索
    /// </summary>
    public class NewsSearchManager : SearchManagerBase<NewsThread>
    {
        private static readonly string NewsIndexFileDirectory = "News";
        private static volatile NewsSearchManager _self = null;
        private static readonly object lockObject = new object();
        private NewsSearchManager(string indexFileDirectory) : base(indexFileDirectory) { }
        public static NewsSearchManager Instance()
        {
            if (_self == null)
            {
                lock (lockObject)
                {
                    if (_self == null)
                    {
                        _self = new NewsSearchManager(NewsIndexFileDirectory);
                    }
                }
            }

            return _self;
        }

        /// <summary>
        /// 搜索
        /// </summary>
        /// <param name="query">NewsThreadFullTextQuery</param>
        /// <returns></returns>
        public SearchResultDataSet<NewsThread> Search(NewsThreadFullTextQuery query)
        {
            //索引文件不存在时，返回null
            if (!IsIndexFilesExists)
                return new SearchResultDataSet<NewsThread>();

            BooleanQuery currentQuery = new BooleanQuery();
            BooleanQuery queryForFilter = new BooleanQuery();

            if (query.UserID > 0)
            {
                Term userIDTerm = new Term(NewsIndexFields.UserID, query.UserID.ToString());
                Query userIDQuery = new TermQuery(userIDTerm);
                queryForFilter.Add(userIDQuery, BooleanClause.Occur.MUST);
            }

            if (query.SectionID > 0)
            {
                if (query.IncludeSectionDescendant)
                {
                    QueryParser sectionIDQueryParser = new QueryParser(CurrentLuceneVersion, NewsIndexFields.SectionID, new WhitespaceAnalyzer());

                    List<NewsSection> childSections = NewsSections.GetAllChilds(query.SectionID, false);
                    StringBuilder sectionIDs = new StringBuilder();
                    sectionIDs.Append(query.SectionID.ToString());

                    if (childSections != null)
                    {
                        foreach (NewsSection newsSection in childSections)
                        {
                            sectionIDs.Append(" " + newsSection.SectionID);
                        }
                    }

                    Query siteCategoryIDQuery = sectionIDQueryParser.Parse(sectionIDs.ToString());
                    currentQuery.Add(siteCategoryIDQuery, BooleanClause.Occur.MUST);
                }
                else
                {
                    Term siteCategoryIDTerm = new Term(NewsIndexFields.SectionID, query.SectionID.ToString());
                    Query siteCategoryIDQuery = new TermQuery(siteCategoryIDTerm);
                    currentQuery.Add(siteCategoryIDQuery, BooleanClause.Occur.MUST);
                }
            }

            if (query.PrefixID > 0)
            {
                Term prefixIDTerm = new Term(NewsIndexFields.SubjectPrefixID, query.PrefixID.ToString());
                Query prefixIDQuery = new TermQuery(prefixIDTerm);
                currentQuery.Add(prefixIDQuery, BooleanClause.Occur.MUST);
            }

            if (!string.IsNullOrEmpty(query.Keyword))
            {
                Query postKeywordQuery = null;
                query.Keyword = StringUtilsForLucene.LuceneKeywordsScrubber(query.Keyword.ToLower());
                string keywordSegments = SegmentForQueryParser(query.Keyword);
                if (!string.IsNullOrEmpty(query.Keyword))
                {
                    string[] searchFieldsForKeyword = new string[4];
                    searchFieldsForKeyword[0] = NewsIndexFields.Subject;
                    searchFieldsForKeyword[1] = NewsIndexFields.Body;
                    searchFieldsForKeyword[2] = NewsIndexFields.Tags;
                    searchFieldsForKeyword[3] = NewsIndexFields.Author;

                    MultiFieldQueryParser keywordParser = new MultiFieldQueryParser(CurrentLuceneVersion, searchFieldsForKeyword, GetChineseAnalyzerOfUnTokenized());
                    keywordParser.SetLowercaseExpandedTerms(true);
                    keywordParser.SetDefaultOperator(QueryParser.OR_OPERATOR);
                    postKeywordQuery = keywordParser.Parse(keywordSegments);

                    currentQuery.Add(postKeywordQuery, BooleanClause.Occur.MUST);
                }
            }

            if (!string.IsNullOrEmpty(query.TagName))
            {
                string[] tagSegments = SegmentForPhraseQuery(query.TagName);
                if (tagSegments != null && tagSegments.Length > 0)
                {
                    PhraseQuery tagQuery = new PhraseQuery();
                    foreach (var tagSegment in tagSegments)
                        tagQuery.Add(new Term(NewsIndexFields.Tags, tagSegment));

                    tagQuery.SetSlop(PhraseQuerySlop);
                    tagQuery.SetBoost((float)Math.Pow(3, 5));
                    currentQuery.Add(tagQuery, BooleanClause.Occur.MUST);
                }
            }

            if (!string.IsNullOrEmpty(query.Author))
            {
                string[] authorSegments = SegmentForPhraseQuery(query.Author);
                if (authorSegments != null && authorSegments.Length > 0)
                {
                    PhraseQuery tagQuery = new PhraseQuery();
                    foreach (var tagSegment in authorSegments)
                        tagQuery.Add(new Term(NewsIndexFields.Author, tagSegment));

                    tagQuery.SetSlop(PhraseQuerySlop);
                    tagQuery.SetBoost((float)Math.Pow(3, 5));
                    currentQuery.Add(tagQuery, BooleanClause.Occur.MUST);
                }
            }

            SortField[] sortFields;
            switch (query.SortBy)
            {
                case FullTextQueryNewsSortBy.DateCreated:
                    sortFields = new SortField[] { new SortField(NewsIndexFields.ThreadID, SortField.INT, true) };
                    break;
                default:
                    sortFields = new SortField[] { SortField.FIELD_SCORE };
                    break;
            }

            Filter filter = null;
            if (queryForFilter.Clauses().Count > 0)
                filter = new QueryWrapperFilter(queryForFilter);

            SearchResultDataSet<NewsThread> pds = Search(currentQuery, filter, sortFields, query.PageIndex, query.PageSize);
            foreach (var item in pds.Records)
            {
                item.Title = HighlighterForKeyWord(item.Title, query.Keyword);
                item.Summary = HighlighterForKeyWord(item.Summary, query.Keyword);
            }
            return pds;
        }

        /// <summary>
        /// 获取相关资讯（已经排除自身）
        /// </summary>        
        /// <param name="thread"></param>
        /// <param name="topNumber">获取的最大记录数</param>
        /// <returns></returns>
        public ICollection<NewsThread> GetCorrelativeThreads(NewsThread thread, int topNumber)
        {
            //索引文件不存在时，返回null
            if (!IsIndexFilesExists || thread == null)
                return new List<NewsThread>();

            BooleanQuery currentQuery = new BooleanQuery();

            if (thread.Tags != null && thread.Tags.Count > 0)
            {
                foreach (var tag in thread.Tags)
                {
                    string[] tagSegments = SegmentForPhraseQuery(tag);
                    if (tagSegments != null && tagSegments.Length > 0)
                    {
                        PhraseQuery tagQuery = new PhraseQuery();
                        foreach (var tagSegment in tagSegments)
                            tagQuery.Add(new Term(NewsIndexFields.Tags, tagSegment));

                        tagQuery.SetSlop(PhraseQuerySlop);
                        tagQuery.SetBoost((float)Math.Pow(3, 5));
                        currentQuery.Add(tagQuery, BooleanClause.Occur.SHOULD);
                    }
                }
            }

            string titleSegments = SegmentForQueryParser(StringUtilsForLucene.LuceneKeywordsScrubber(thread.Title));
            QueryParser subjectQueryParser = new QueryParser(CurrentLuceneVersion, NewsIndexFields.Subject, GetChineseAnalyzerOfUnTokenized());
            currentQuery.Add(subjectQueryParser.Parse(titleSegments), BooleanClause.Occur.SHOULD);
            QueryParser bodyQueryParser = new QueryParser(CurrentLuceneVersion, NewsIndexFields.Body, GetChineseAnalyzerOfUnTokenized());
            currentQuery.Add(bodyQueryParser.Parse(titleSegments), BooleanClause.Occur.SHOULD);

            ICollection<NewsThread> threads =Search(currentQuery, null, null, topNumber+1);
            NewsThread self = threads.Where(n => n.ThreadID == thread.ThreadID).SingleOrDefault();
            if (self != null)
                threads.Remove(self);
            else
                threads.Remove(threads.Last());

            return threads;
        }

        /// <summary>
        /// 初始化索引
        /// </summary>
        public override void InitializeIndex(string indexPath)
        {
            if (!System.IO.Directory.Exists(indexPath))
            {
                try
                {
                    System.IO.Directory.CreateDirectory(indexPath);
                }
                catch
                {
                    throw new ApplicationException(string.Format("create Directory '{0}' failed", PhysicalIndexDirectory));
                }
            }

            #region 索引资讯

            int indexPageSize = 2000;
            bool createIndexFile = true;
            PagingDataSet<NewsThread> pds = NewsThreads.GetThreadsForAdmin(indexPageSize, 1, null, -1, -1, string.Empty);
            double tIndex = Convert.ToDouble(pds.TotalRecords) / Convert.ToDouble(indexPageSize);
            int indexPageIndex = (int)Math.Ceiling(tIndex);

            if (pds.TotalRecords > 0)
            {
                //分多次进行索引
                for (int i = 1; i <= indexPageIndex; i++)
                {
                    if (i != 1)
                        pds = NewsThreads.GetThreadsForAdmin(indexPageSize, i, null, -1, -1, string.Empty);
                    Insert(pds.Records, indexPath, createIndexFile);
                    if (createIndexFile)
                        createIndexFile = false;
                }
            }

            #endregion
        }

        /// <summary>
        /// Document转化成NewsThread
        /// </summary>
        protected override NewsThread ConvertDocumentToObj(Document doc)
        {
            NewsThread newsThread = new NewsThread();
            int userID;
            int.TryParse(doc.Get(NewsIndexFields.UserID), out userID);
            newsThread.UserID = userID;

            newsThread.Contributor = doc.Get(NewsIndexFields.Author);

            int sectionID;
            int.TryParse(doc.Get(NewsIndexFields.SectionID), out sectionID);
            newsThread.SectionID = sectionID;

            int threadID;
            int.TryParse(doc.Get(NewsIndexFields.ThreadID), out threadID);
            newsThread.ThreadID = threadID;

            newsThread.Title = doc.Get(NewsIndexFields.Subject);
            newsThread.Summary = doc.Get(NewsIndexFields.Body);

            int prefixID;
            int.TryParse(doc.Get(NewsIndexFields.SubjectPrefixID), out prefixID);
            newsThread.PrefixID = prefixID;

            int auditingStatusValue = (int)AuditingStatuses.Success;
            int.TryParse(doc.Get(NewsIndexFields.AuditingStatus), out auditingStatusValue);
            newsThread.AuditingStatus = (AuditingStatuses)auditingStatusValue;

            try
            {
                newsThread.PostDate = DateTools.StringToDate(doc.Get(NewsIndexFields.PostDate));
            }
            catch { }

            return newsThread;
        }


        /// <summary>
        /// NewsThread 转化成Document进行索引的存储
        /// </summary>
        protected override Document ConvertObjToDocument(NewsThread thread)
        {
            if (thread == null)
                return null;

            //待审核及未通过审核不允许加入索引
            if (thread.AuditingStatus == AuditingStatuses.Pending || thread.AuditingStatus == AuditingStatuses.Fail)
                return null;

            Document doc = new Document();
            Field field;
            field = new Field(NewsIndexFields.UserID, thread.UserID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field(NewsIndexFields.SectionID, thread.SectionID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field(NewsIndexFields.ThreadID, thread.ThreadID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field(NewsIndexFields.Author, thread.Contributor.ToLower(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field(NewsIndexFields.Subject, thread.Title.ToLower(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
            field.SetBoost(2.0F);
            doc.Add(field);

            field = new Field(NewsIndexFields.Body, HtmlUtils.StripAllTags(thread.GetBody(true)).ToLower(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
            doc.Add(field);

            field = new Field(NewsIndexFields.SubjectPrefixID, thread.PrefixID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            if (thread.Tags != null)
            {
                foreach (var tag in thread.Tags)
                {
                    field = new Field(NewsIndexFields.Tags, tag.ToLower(), Field.Store.YES, Field.Index.ANALYZED);
                    field.SetBoost(2.0F);
                    doc.Add(field);
                }
            }

            field = new Field(NewsIndexFields.AuditingStatus, ((int)thread.AuditingStatus).ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            field = new Field(NewsIndexFields.PostDate, DateTools.DateToString(thread.PostDate, DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED);
            doc.Add(field);

            return doc;
        }

    }
}
