TrieSearch.cs
//#define WEB using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace TrieCS { class MatchInfoComparer : IEqualityComparer<MatchInfo> { public bool Equals(MatchInfo x, MatchInfo y) { return x.Index.Equals(y.Index); } public int GetHashCode(MatchInfo obj) { return obj.Index; } } public class PerfInfo { public double FilterAddMs { get; internal set; } public double TotalMs { get; internal set; } } class SearchResult { public int Index { get; set; } public List<int> Positions { get; set; } public int Priority { get; set; } public string Content { get; set; } public SearchResult() { Positions = new List<int>(); } } public class MatchInfo : IEquatable<MatchInfo>, IComparable<MatchInfo> { public int Index { get; set; } public int Position { get; set; } public int Priority { get; set; } public char Data { get; set; } public override string ToString() { return String.Format("[{0}.{1}]", Index, Position); } public bool Equals(MatchInfo other) { return Index == other.Index && Position == other.Position; } public int CompareTo(MatchInfo other) { return this.Index.CompareTo(other.Index); } } class Node { public char Data { get; set; } public List<MatchInfo> Infos { get; set; } public Hashtable Next { get; set; } public Node() { Next = new Hashtable(26); Infos = new List<MatchInfo>(); } public override string ToString() { return Data.ToString(); } } class TrieSearch { const string ALPHABET = "abcdefghijklmnopqrstyvwxyz"; MatchInfoComparer _comparer = new MatchInfoComparer(); HiPerfTimer _timer = new HiPerfTimer(); public PerfInfo LastPerfInfo { get; private set; } public Node Root { get; private set; } Hashtable _nodes = new Hashtable(); public Hashtable Nodes { get { return _nodes; } } private string[] _keywords; public string[] Keywords { get { return _keywords; } set { _keywords = value; Root = new Node(); _nodes = new Hashtable(); for (int i = 0; i < _keywords.Length; i++) { BuildTree(Root, _keywords[i], i); } } } private void BuildTree(Node root, string keyword, int index) { Stack<Node> parents = new Stack<Node>(); Node node = root; bool flag_start = true; for (int j = 0; j < keyword.Length; j++) { parents.Push(node); var c = keyword[j]; var info = new MatchInfo { Index = index, Position = j, Priority = j == 0 ? 2 : 1, Data = c, }; if (flag_start) { flag_start = false; info.Priority += 1; } if (Char.IsUpper(c)) info.Priority += 1; c = Char.ToLower(c); Node newNode = node.Next[c] as Node; if (newNode == null) { #if WEB if (_nodes[c] == null) _nodes[c] = new Node(); newNode = _nodes[c] as Node; #else newNode = new Node(); #endif newNode.Data = c; node.Next[c] = newNode; } #if WEB foreach (var p in parents) { if (p == newNode) continue; p.Next[c] = newNode; } #endif newNode.Infos.Add(info); node = newNode; } } public List<MatchInfo> Search(string data) { LastPerfInfo = new PerfInfo(); var infoList = new List<MatchInfo>(); int pos = 0; bool first = true; Search(data, ref pos, Root, infoList, ref first); return infoList.ToList(); } public SearchResult[] Convert(List<MatchInfo> matchInfos) { var results = new Dictionary<int, SearchResult>(); var groups = matchInfos.GroupBy(_=>_.Data); foreach (var group in groups) { foreach (var sr in group) { SearchResult result; if (!results.TryGetValue(sr.Index, out result)) { result = new SearchResult(); result.Content = Keywords[sr.Index]; results[sr.Index] = result; } if (!result.Positions.Contains(sr.Position)) { result.Positions.Add(sr.Position); if (result.Priority < sr.Priority) result.Priority = sr.Priority; } } } var c = new Comparison<int>((a, b) => { return a.CompareTo(b); }); var continues = new Func<IEnumerable<int>, bool>(list => { int last = list.ElementAt(0); foreach (var item in list.Skip(1)) { if (item != last + 1) return false; last = item; } return true; }); foreach (var item in results.Values) { item.Positions.Sort(c); if (continues(item.Positions)) item.Priority += 2; } return results.Values.ToArray(); } #if WEB private void Search(string data, int pos, Node node, List<MatchInfo> infoList) { if (node == null) return; var lastNode = node; bool flag_first = true; for (int i = pos; i < data.Length; i++) { var c = data[i]; node = node.Next[c] as Node; if (node == null) { infoList.Clear(); break; } else { if (flag_first) { flag_first = false; infoList.AddRange(node.Infos); } else { _timer.Start(); var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index); infoList.AddRange(node.Infos); for (int ii = 0; ii < infoList.Count; ii++) { if (!inter.Contains(infoList[ii].Index)) infoList.RemoveAt(ii--); } _timer.Stop(); LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0; } } } return; } #else private void Search(string data, ref int pos, Node node, List<MatchInfo> infoList, ref bool flag_first) { if (node == null) return; Node lastNode; for (; pos < data.Length; pos++) { var c = data[pos]; lastNode = node; node = node.Next[c] as Node; if (node == null) { foreach (var nc in ALPHABET) { if (nc == c) continue; if (lastNode.Next[nc] == null) continue; Search(data, ref pos, lastNode.Next[nc] as Node, infoList, ref flag_first); } break; } else { if (flag_first) { flag_first = false; infoList.AddRange(node.Infos); } else { _timer.Start(); var inter = infoList.Intersect(node.Infos, _comparer).ToLookup(_=>_.Index); infoList.AddRange(node.Infos); for (int ii = 0; ii < infoList.Count; ii++) { if (!inter.Contains(infoList[ii].Index)) infoList.RemoveAt(ii--); } _timer.Stop(); LastPerfInfo.FilterAddMs += _timer.Duration * 1000.0; } } } return; } #endif } }
MainWindow.cs
using System; using System.Collections; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows; using System.Windows.Controls; using System.Windows.Data; using System.Windows.Documents; using System.Windows.Input; using System.Windows.Media; using System.Windows.Media.Imaging; using System.Windows.Navigation; using System.Windows.Shapes; namespace TrieCS { /// <summary> /// MainWindow.xaml 的交互逻辑 /// </summary> public partial class MainWindow : Window { TrieSearch _trie = new TrieSearch(); HiPerfTimer _timer = new HiPerfTimer(); public MainWindow() { InitializeComponent(); Initiate(); } public void Initiate() { } private void content_TextChanged_1(object sender, TextChangedEventArgs e) { _trie.Keywords = content.Text.Replace("\r\n", " ").Replace("\r", " ").Replace("\n", " ").Split(‘ ‘); NodesListBox.ItemsSource = _trie.Nodes.OfType<DictionaryEntry>().Select(_=>_.Value).ToList(); DisplayResults(); } private void keyword_TextChanged_1(object sender, TextChangedEventArgs e) { DisplayResults(); } private void DisplayResults() { _timer.Start(); var results = _trie.Convert(_trie.Search(keyword.Text)); Array.Sort(results, new Comparison<SearchResult>((a, b) => { return b.Priority.CompareTo(a.Priority); })); _timer.Stop(); time.Text = String.Format("Total:{0:0.00000}ms, FilterAdd:{1:0.00000}ms, Total words:{2}", _timer.Duration * 1000.0, _trie.LastPerfInfo.FilterAddMs, _trie.Keywords.Length); result.ItemsSource = results; } } }
Mainwindow.xaml
<Window x:Class="TrieCS.MainWindow" xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" Title="MainWindow" Height="350" Width="525"> <Grid> <Grid.ColumnDefinitions> <ColumnDefinition Width="*"/> <ColumnDefinition Width="170"/> </Grid.ColumnDefinitions> <Grid.RowDefinitions> <RowDefinition Height="*"/> <RowDefinition Height="Auto"/> <RowDefinition Height="*"/> <RowDefinition Height="Auto"/> </Grid.RowDefinitions> <TextBox x:Name="content" Grid.Row="0" TextChanged="content_TextChanged_1" AcceptsReturn="True"/> <TextBox x:Name="keyword" Grid.Row="1" TextChanged="keyword_TextChanged_1" Height="24"/> <ListView x:Name="result" Grid.Row="2"> <ListView.View> <GridView> <GridViewColumn Width="50" DisplayMemberBinding="{Binding Priority}" Header="匹配度" /> <GridViewColumn Width="50" DisplayMemberBinding="{Binding Index}" Header="序号" /> <!--<GridViewColumn Width="50" DisplayMemberBinding="{Binding Position}" Header="位置" />--> <GridViewColumn Width="250" DisplayMemberBinding="{Binding Content}" Header="内容" /> </GridView> </ListView.View> </ListView> <TextBlock x:Name="time" Grid.Row="3" Height="24"/> <Grid Grid.Column="1" Grid.RowSpan="3"> <Grid.RowDefinitions> <RowDefinition Height="*"/> <RowDefinition Height="*"/> <RowDefinition Height="*"/> </Grid.RowDefinitions> <ListBox x:Name="NodesListBox" DisplayMemberPath="Data"></ListBox> <ListBox x:Name="MatchInfoListBox" DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}" ItemsSource="{Binding Infos}" Grid.Row="1"/> <ListBox x:Name="NextNodesListBox" DataContext="{Binding Path=SelectedItem, ElementName=NodesListBox}" ItemsSource="{Binding Next}" DisplayMemberPath="Value" Grid.Row="2"/> </Grid> </Grid> </Window>
HiPerfTimer
/* High precision timer class * - http://www.eggheadcafe.com/articles/20021111.asp * * (Thanks to the author!) */ using System; using System.Runtime.InteropServices; using System.ComponentModel; using System.Threading; namespace TrieCS { public class HiPerfTimer { [DllImport("Kernel32.dll")] private static extern bool QueryPerformanceCounter(out long lpPerformanceCount); [DllImport("Kernel32.dll")] private static extern bool QueryPerformanceFrequency(out long lpFrequency); private long startTime; private long stopTime; private long freq; /// <summary> /// ctor /// </summary> public HiPerfTimer() { startTime = 0; stopTime = 0; freq = 0; if (QueryPerformanceFrequency(out freq) == false) { throw new Win32Exception(); // timer not supported } } /// <summary> /// Start the timer /// </summary> /// <returns>long - tick count</returns> public long Start() { QueryPerformanceCounter(out startTime); return startTime; } /// <summary> /// Stop timer /// </summary> /// <returns>long - tick count</returns> public long Stop() { QueryPerformanceCounter(out stopTime); return stopTime; } /// <summary> /// Return the duration of the timer (in seconds) /// </summary> /// <returns>double - duration</returns> public double Duration { get { return (double)(stopTime - startTime) / (double)freq; } } /// <summary> /// Frequency of timer (no counts in one second on this machine) /// </summary> ///<returns>long - Frequency</returns> public long Frequency { get { QueryPerformanceFrequency(out freq); return freq; } } } }
原文:http://www.cnblogs.com/ornithopter/p/3773119.html