首页 > 数据库技术 > 详细

Lucene.net 全文检索数据库

时间:2019-02-19 20:14:02      阅读:155      评论:0      收藏:0      [点我收藏+]
#define Search

using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using TestApp.Bll;
using TestApp.Model;

namespace TestApp
{
    class Program
    {
        static void Main()
        {
#if CreateIndex
            Console.WriteLine("开始创建索引");
            var bll = new ItemBll();
            CreateIndex(bll.GetItemInfos());
#endif
#if Search
            #region 查词
            StringBuilder sb = new StringBuilder();
            //索引库目录
            Lucene.Net.Store.Directory dir_search = FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"), new NoLockFactory());
            IndexReader reader = IndexReader.Open(dir_search, true);
            IndexSearcher search = null;
            try
            {
                search = new IndexSearcher(reader);
                QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "ItemName", new PanGuAnalyzer());
                Query query = parser.Parse(LuceneHelper.GetKeyWordSplid("甲醇"));
                //执行搜索,获取查询结果集对象  
                TopDocs ts = search.Search(query, null, 20000);
                ///获取命中的文档信息对象  
                ScoreDoc[] docs = ts.ScoreDocs;
                Console.WriteLine(docs.Length);
                for (int i = 0; i < docs.Length; i++)
                {
                    int docId = docs[i].Doc;
                    Document doc = search.Doc(docId);
                    var id = doc.Get("id");
                    Console.WriteLine(id);
                    var itemName = doc.Get("ItemName");
                    Console.WriteLine(itemName);
                    var purity = doc.Get("Purity");
                    Console.WriteLine(purity);
                    var size = doc.Get("Size");
                    Console.WriteLine(size);
                    var unit = doc.Get("Unit");
                    Console.WriteLine(unit);
                    var venderName = doc.Get("VenderName");
                    Console.WriteLine(venderName);
                }
            }
            catch (Exception ex)
            {
                throw;
            }
            finally
            {
                if (search != null)
                    search.Dispose();
                if (dir_search != null)
                    dir_search.Dispose();
            }
            #endregion

#endif

            
        }

        //帮助类,对搜索的关键词进行分词
        public static class LuceneHelper
        {
            public static string GetKeyWordSplid(string keywords)
            {
                StringBuilder sb = new StringBuilder();
                Analyzer analyzer = new PanGuAnalyzer();
                TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords));
                ITermAttribute ita = null;
                bool hasNext = stream.IncrementToken();
                while (hasNext)
                {
                    ita = stream.GetAttribute<ITermAttribute>();
                    sb.Append(ita.Term + " ");
                    hasNext = stream.IncrementToken();
                }
                return sb.ToString();
            }
        }

        /// <summary>
        /// 创建索引文件
        /// </summary>
        private static void CreateIndex(List<ItemInfo> list)
        {
            IndexWriter writer = null;
            Analyzer analyzer = new PanGuAnalyzer();
            Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"));
            int i = 0;
            try
            {
                ////IndexReader:对索引进行读取的类。
                //该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。
                bool isCreate = !IndexReader.IndexExists(dir);
                writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);
                //添加索引
                foreach (var item in list)
                {
                    Document doc = new Document();
                    if (item.ItemId % 1000 == 0)
                        Console.WriteLine($"开始写入{item.ItemId}");

                    doc.Add(new Field("id", item.ItemId.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 1;
                    doc.Add(new Field("ItemName", item.ItemName?.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    i = 2;
                    doc.Add(new Field("Purity", item.Purity?.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 3;
                    doc.Add(new Field("Size", item.Size.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 4;
                    doc.Add(new Field("Unit", item.Unit?.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 5;
                    doc.Add(new Field("VenderName", item.VenderName?.ToString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    i = 6;
                    doc.Add(new Field("Price", item.Price.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    i = 7;

                    writer.AddDocument(doc, analyzer);
                }
                writer.Optimize();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
                Console.WriteLine($"error step {i}");
                throw;
            }
            finally
            {
                if (writer != null)
                    writer.Dispose();
                if (dir != null)
                    dir.Dispose();
            }
        }
    }
}

 

Lucene.net 全文检索数据库

原文:https://www.cnblogs.com/LTEF/p/10403114.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!