﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Text.RegularExpressions;
/**********************************/
//作者：速分享
//地址：http://www.sulianqi.cn
/**********************************/
namespace Common
{
    public class SegmentHelper
    {
        /// <summary>
        /// 根据特殊符号先对输入内容进行大块分割
        /// </summary>
        /// <param name="inputStr">输入内容</param>
        /// <returns></returns>
        private static List<string> GetWords(string inputStr)
        {
            string w = @"\w+";//任意一个字母或数字或下划线，也就是 A~Z,a~z,0~9,_ 中任意一个
            string str1 = Regex.Replace(inputStr, @"\W", " ", RegexOptions.ECMAScript);//替换之后只留下数字和字母
            string str2 = Regex.Replace(inputStr, @"\w", " ", RegexOptions.ECMAScript);//替换之后只留下汉字数字和字母
            string str = str1 + str2;//重新组合数字和字母+汉字，中间有空格

            MatchCollection mtcTemp = Regex.Matches(str, w);
            List<string> words = new List<string>();
            foreach (Match m in mtcTemp)
            {
                words.Add(m.Value);
            }
            return words;
        }
        public static string CheckType(string value)
        {
            string type = "qt";
            Regex reg = new Regex(@"^[A-Za-z]+$");//字母
            Regex regnum = new Regex(@"^[0-9]");//数字
            Regex reghz = new Regex(@"^[\u4e00-\u9fa5]{0,}$");//汉字

            if (reg.IsMatch(value))
            {
                type = "zm";
            }
            else if (regnum.IsMatch(value))
            {
                type = "sz";
            }
            else if (reghz.IsMatch(value))
            {
                type = "hz";
            }
            return type;
        }
        /// <summary>
        /// 返回输入内容的关键字
        /// </summary>
        /// <param name="inputStr">输入内容</param>
        /// <param name="x"></param>
        /// <param name="maxlen"></param>
        /// <returns></returns>
        public static List<string> SplitKey(string inputStr, int startLen = 2, int maxlen = 4)
        {
            List<string> result = new List<string>();
            List<string> words = GetWords(inputStr);
            for (int i = 0; i < words.Count; i++)
            {
                if (!string.IsNullOrEmpty(words[i]))
                {
                    result.AddRange(DoSplitKey(words[i], 2, maxlen));
                }
            }
            return result;
        }

        /// <summary>
        /// 将字符串分割与词库匹配，找出关键词
        /// </summary>
        /// <param name="inputStr">输入内容</param>
        /// <param name="charLen">单个关键词的最大长度</param>
        /// <returns></returns>
        private static List<string> DoSplitKey(string inputStr, int startLen = 2, int charLen = 4)
        {
            List<string> KeyList = GetKeyList();
            List<string> list = new List<string>();
            string a = string.Empty;
            string type = CheckType(inputStr);
            if (type == "zm" || type == "sz" || inputStr.Length < 3)
            {
                list.Add(inputStr);
            }
            else if (inputStr.Length > 3)
            {
                for (int i = 0; i < inputStr.Length; i++)
                {
                    for (int j = 2; j <= charLen; j++)
                    {
                        if (i + j <= inputStr.Length)
                        {
                            string val = inputStr.Substring(i, j);

                            if (KeyList.Contains(val))
                            {
                                if (!list.Contains(val))
                                {
                                    list.Add(val);
                                }
                            }
                        }
                    }
                }
            }

            return list;
        }
        private static List<string> GetKeyList()
        {
            List<string> list = Common.CacheHelper.GetCache("keyList") as List<string>;
            if (list == null)
            {
                list = new List<string>();
                string sql = "select Word from SDict";
                System.Data.SQLite.SQLiteDataReader reader = SqliteHelper.ExecuteReader(sql, Common.SqliteHelper.DbName.ShareSeg, new object[] { });
                while (reader.Read())
                {
                    list.Add(reader["Word"].ToString());
                }
                reader.Close();
            }
            else
            {
                Common.CacheHelper.SetCache("keyList", list);
            }
            return list;
        }
    }
}
