前言:
Lucene是目前最受欢迎的Java全文搜索框架,准确地说,它是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎。Lucene为开发人员提供了相当完整的工具包,可以非常方便地实现强大的全文检索功能。
本实例主要使用lucene实现存储与读取索引库
代码:
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.zzm</groupId> <artifactId>lucene</artifactId> <packaging>war</packaging> <version>0.0.1-SNAPSHOT</version> <name>lucene Maven Webapp</name> <url>http://maven.apache.org</url> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>3.0.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-memory</artifactId> <version>3.0.1</version> </dependency> </dependencies> <build> <finalName>lucene</finalName> </build> </project>
Article.class (pojo对象)
package com.zzm.lucene.domain; /** * @ClassName 文章类 * @author zhanmin.zheng * @CreateDate 2016/02/26 * @ModifyDate * @version 1.0 */ public class Article { private Long id;//主键 private String title;//标题 private String content;//内容 public Article() { super(); } public Article(long id, String title, String content) { super(); this.id = id; this.title = title; this.content = content; } public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } @Override public String toString() { return "Article [id=" + id + ", title=" + title + ", content=" + content + "]"; } }
WeclomeService.java
package cm.zzm.lucene.service; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.zzm.lucene.domain.Article; /** * @Description acticle对象持久化 * @author zzm * @CreateDate 2016/02/26 * @ModifyDate * @Version 1.0 */ public class WeclomeService { public void CreateIndex() throws IOException { /** * 1.创建一个article对象,并且把信息存放进去 * 2.创建indexWriter的api吧数据存放在索引库 * 3.关闭indexWriter对象 */ Article article = new Article(1L, "lucene全文索引", "百度,google都是搜索引擎"); /** * IndexWriter * @Param 索引库 * @Param 生成器 */ // Path path = Paths.get("./indexDir");//5.0版本使用path // Directory directory = FSDirectory.open(path);//创建一个索引库 Directory directory = FSDirectory.open(new File("./indexDir"));//创建一个索引库 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);//创建分析器 IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);//创建indexWriter对象,指定索引库 Document document = new Document(); Field idField = new Field("id", article.getId().toString(), Store.YES, Index.NOT_ANALYZED); Field titleField = new Field("title", article.getTitle(), Store.YES, Index.NOT_ANALYZED); Field contentField = new Field("content", article.getContent(), Store.YES, Index.NOT_ANALYZED); document.add(idField); document.add(titleField); document.add(contentField); indexWriter.addDocument(document); indexWriter.close(); } public void searchIndex() throws IOException, ParseException { /** * 1.创建一个IndexSearcher对象 * 2.调用search方法进行检索 * 3.输出内容 */ /** * 创建一个IndexSearcher对象 */ Directory directory = FSDirectory.open(new File("./indexDir")); IndexSearcher IndexSearcher = new IndexSearcher(directory); /** * 调用search方法进行检索 */ Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer); Query query = queryParser.parse("goo");//查询的关键词 TopDocs topDocs = IndexSearcher.search(query, 2); int count = topDocs.totalHits;//根据关键词查询出来的记录数 ScoreDoc[] scoreDocs = topDocs.scoreDocs; List<Article> articleList = new ArrayList<Article>(); for (ScoreDoc scoreDoc : scoreDocs) { float score = scoreDoc.score;//关键字得分 int index = scoreDoc.doc;//索引下标 Document document = IndexSearcher.doc(index); /** * 把document转换成article对象 */ Article article = new Article(); article.setId(Long.parseLong(document.get("id")));//document.getField("id").stringValue() article.setTitle(document.get("title")); article.setContent(document.get("content")); articleList.add(article); } System.out.println("articleList is "+articleList); } }
原文:http://www.cnblogs.com/sz-zzm/p/5222048.html