码迷,mamicode.com
首页 > 其他好文 > 详细

Finder(文件内容搜索工具)

时间:2014-07-10 00:14:50      阅读:335      评论:0      收藏:0      [点我收藏+]

标签:style   blog   http   color   文件   os   

搜索文件夹内容的小工具

Github

bubuko.com,布布扣

两种搜索模式的算法:

BoyerMooreSearch.cs

bubuko.com,布布扣
using System.Threading;
using System.Collections.Generic;
using System.Linq;

namespace Finder.Algorithms
{
    /// <summary>
    /// An implemention of Boyer-Moore algorithm.
    /// <para/>author : Ornithopter
    /// </summary>
    class BoyerMooreSearch : SearchBase
    {
        /// <summary>
        /// 
        /// </summary>
        /// <param name="source"></param>
        /// <param name="pattern"></param>
        /// <param name="token"></param>
        /// <returns>An array of matched index</returns>
        public int[] Search(string source, string pattern, CancellationToken token)
        {
            var matchIndexes = new List<int>();

            // step increasment.
            int delta;

            // prepare a map providing delta for each char in pattern string.
            var deltaMap = CreateDeltaMap(pattern);

            // start searching.
            for (var i = pattern.Length - 1; i < source.Length; i += delta)
            {
                token.ThrowIfCancellationRequested();
                // find next match and update delta.
                if (FindNext(source, pattern, i, deltaMap, token, out delta))
                {
                    // add to result list if found.
                    matchIndexes.Add(i - (pattern.Length - 1));
                }
            }
            return matchIndexes.ToArray();
        }

        private static bool Match(string source, int[] deltaMap, string pattern, CancellationToken token)
        {
            // step increasment.
            int delta;

            // start searching.
            for (var i = pattern.Length - 1; i < source.Length; i += delta)
            {
                token.ThrowIfCancellationRequested();
                // find next match and update delta.
                if (FindNext(source, pattern, i, deltaMap, token, out delta))
                {
                    return true;
                }
            }
            return false;
        }

        /// <summary>
        /// Find the next matched index and update delte at the same time.
        /// </summary>
        /// <param name="source"></param>
        /// <param name="pattern"></param>
        /// <param name="start"></param>
        /// <param name="deltaMap"></param>
        /// <param name="delta"></param>
        /// <returns>true if found one, otherwise false.</returns>
        private static bool FindNext(string source, string pattern, int start, int[] deltaMap, CancellationToken token, out int delta)
        {
            int i = pattern.Length - 1,
                index = 0;

            // start comparing from the last char in pattern.
            while (source[start - index] == pattern[i - index])
            {
                token.ThrowIfCancellationRequested();
                if (index != pattern.Length - 1)
                {
                    index++;
                }
                else
                {
                    // matchs to the end. So it‘s a search result.
                    delta = pattern.Length;
                    return true;
                }
            }

            // found one dismatched char at (start - index), get delta from map.
            var c = source[start - index];
            delta = /*c > 128 ? 0 : */deltaMap[c];

            if (delta == 0)
            {
                // this means the source[start] char is the last char in pattern
                // and only appears once. So delta should be the length of pattern.
                delta = pattern.Length;
            }
            return false;
        }

        static int[] CreateDeltaMap(string pattern)
        {
            const int alphabetSize = 0xffff;
            var patternLength = pattern.Length;
            var deltaMap = new int[alphabetSize];

            // initialize the map.
            for (var i = 0; i < alphabetSize; i++)
            {
                deltaMap[i] = patternLength;
            }

            // start from 0, which means any duplicated char will only have
            // the index nearest to the end.
            for (var i = 0; i < patternLength; i++)
            {
                var index = pattern[i];
                deltaMap[index] = patternLength - i - 1;
            }
            return deltaMap;
        }

        protected override void Build(CancellationToken token)
        {
            //throw new NotImplementedException();
        }

        public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
        {
            var fileList = FileList;

            var deltaMap = CreateDeltaMap(keyword);

            return fileList.Where(filePath => Match(ReadContent(filePath), deltaMap, keyword, token)).ToArray();
        }
    }
}
View Code

Trie.cs

bubuko.com,布布扣
using System.Threading;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Finder.Algorithms
{
    class Trie : SearchBase
    {
        class Node
        {
            public Dictionary<char, Node> Next { get; private set; }
            public Dictionary<int, bool> FileIndex { get; private set; }

            public Node()
            {
                FileIndex = new Dictionary<int, bool>();
                Next = new Dictionary<char, Node>();
            }
        }

        private static readonly HashSet<char> Ignorance = new HashSet<char>
        {
            [,
            (,
            {,
        };

        private static readonly HashSet<char> Spliters = new HashSet<char>
        {
             ,
            ,,
            \n,
            \r,
            ;,
            ],
            ),
            },
        };

        private Node _root;


        protected override void Build(CancellationToken token)
        {
            _root = new Node();

            var index = 0;
            foreach (var filePath in FileList)
            {
                token.ThrowIfCancellationRequested();
                Build(index++, ReadContent(filePath));
            }
        }

        private void Build(int fileIndex, string content)
        {
            var node = _root;
            foreach (var t in content)
            {
                var c = t;

                if (Ignorance.Contains(c))
                {
                    continue;
                }
                if (Spliters.Contains(c))
                {
                    node.FileIndex[fileIndex] = true;
                    node = _root;
                    continue;
                }

                /*if (!Char.IsLetter(c))
                    continue;*/

                if (Char.IsUpper(c))
                    c = Char.ToLower(c);

                Node next;
                if (!node.Next.TryGetValue(c, out next))
                {
                    next = new Node();
                    node.Next[c] = next;
                }
                node = next;
                node.FileIndex[fileIndex] = false;
            }
            node.FileIndex[fileIndex] = true;
        }

        private string[] Search(string keyword, bool matchWholeWord, CancellationToken token)
        {
            var node = _root;
            foreach (var t in keyword)
            {
                token.ThrowIfCancellationRequested();
                var c = t;
                if (Char.IsUpper(c))
                    c = Char.ToLower(c);

                Node next;
                if (!node.Next.TryGetValue(c, out next))
                {
                    return new string[0];
                }
                node = next;
            }

            if (matchWholeWord)
            {
                return node.FileIndex.Where(_ => _.Value).Select(_ => FileList[_.Key]).ToArray();
            }
            return node.FileIndex.Select(_ => FileList[_.Key]).ToArray();
        }

        public const string ConfigMatchWholeWord = "ConfigMatchWholeWord";

        public override string[] Search(string keyword, Dictionary<string, object> config, CancellationToken token)
        {
            var matchWholeWord = (bool)config[ConfigMatchWholeWord];
            return Search(keyword, matchWholeWord, token);
        }
    }
}
View Code

 

Finder(文件内容搜索工具),布布扣,bubuko.com

Finder(文件内容搜索工具)

标签:style   blog   http   color   文件   os   

原文地址:http://www.cnblogs.com/ornithopter/p/3812497.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!