创建实体类
package index; /** * 新闻实体类 * @author moonxy * */ public class News { private int id;//新闻id private String title;//新闻标题 private String content;//新闻内容 private int reply;//评论数 public News() { } public News(int id, String title, String content, int reply) { super(); this.id = id; this.title = title; this.content = content; this.reply = reply; } public int getId() { return id; } public void setId(int id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public int getReply() { return reply; } public void setReply(int reply) { this.reply = reply; } }
2
package index; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * Lucene 创建索引 * @author moonxy * */ public class CreateIndex { public static void main(String[] args) { // 创建3个News对象 News news1 = new News(); news1.setId(1); news1.setTitle("安倍晋三本周会晤特朗普 将强调日本对美国益处"); news1.setContent("日本首相安倍晋三计划2月10日在华盛顿与美国总统特朗普举行会晤时提出加大日本在美国投资的设想"); news1.setReply(672); News news2 = new News(); news2.setId(2); news2.setTitle("北大迎4380名新生 农村学生700多人近年最多"); news2.setContent("昨天,北京大学迎来4380名来自全国各地及数十个国家的本科新生。其中,农村学生共700余名,为近年最多..."); news2.setReply(995); News news3 = new News(); news3.setId(3); news3.setTitle("特朗普宣誓(Donald Trump)就任美国第45任总统"); news3.setContent("当地时间1月20日,唐纳德·特朗普在美国国会宣誓就职,正式成为美国第45任总统。"); news3.setReply(1872); // 开始时间 Date start = new Date(); System.out.println("**********开始创建索引**********"); // 创建IK分词器 Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig icw = new IndexWriterConfig(analyzer); // CREATE 表示先清空索引再重新创建 icw.setOpenMode(OpenMode.CREATE); Directory dir = null; IndexWriter inWriter = null; // 存储索引的目录 Path indexPath = Paths.get("indexdir"); try { if (!Files.isReadable(indexPath)) { System.out.println("索引目录 ‘" + indexPath.toAbsolutePath() + "‘ 不存在或者不可读,请检查"); System.exit(1); } dir = FSDirectory.open(indexPath); inWriter = new IndexWriter(dir, icw); // 设置新闻ID索引并存储 FieldType idType = new FieldType(); idType.setIndexOptions(IndexOptions.DOCS); idType.setStored(true); // 设置新闻标题索引文档、词项频率、位移信息和偏移量,存储并词条化 FieldType titleType = new FieldType(); titleType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); titleType.setStored(true); titleType.setTokenized(true); FieldType contentType = new FieldType(); contentType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); contentType.setStored(true); contentType.setTokenized(true); contentType.setStoreTermVectors(true); contentType.setStoreTermVectorPositions(true); contentType.setStoreTermVectorOffsets(true); Document doc1 = new Document(); doc1.add(new Field("id", String.valueOf(news1.getId()), idType)); doc1.add(new Field("title", news1.getTitle(), titleType)); doc1.add(new Field("content", news1.getContent(), contentType)); doc1.add(new IntPoint("reply", news1.getReply())); doc1.add(new StoredField("reply_display", news1.getReply())); Document doc2 = new Document(); doc2.add(new Field("id", String.valueOf(news2.getId()), idType)); doc2.add(new Field("title", news2.getTitle(), titleType)); doc2.add(new Field("content", news2.getContent(), contentType)); doc2.add(new IntPoint("reply", news2.getReply())); doc2.add(new StoredField("reply_display", news2.getReply())); Document doc3 = new Document(); doc3.add(new Field("id", String.valueOf(news3.getId()), idType)); doc3.add(new Field("title", news3.getTitle(), titleType)); doc3.add(new Field("content", news3.getContent(), contentType)); doc3.add(new IntPoint("reply", news3.getReply())); doc3.add(new StoredField("reply_display", news3.getReply())); inWriter.addDocument(doc1); inWriter.addDocument(doc2); inWriter.addDocument(doc3); inWriter.commit(); inWriter.close(); dir.close(); } catch (IOException e) { e.printStackTrace(); } Date end = new Date(); System.out.println("索引文档用时:" + (end.getTime() - start.getTime()) + " milliseconds"); System.out.println("**********索引创建完成**********"); } }
3 简单查询
package query; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser.Operator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * 单域搜索 * @author moonxy * */ public class QueryParseTest { public static void main(String[] args) throws ParseException, IOException { // 搜索单个字段 String field = "title"; // 搜索多个字段时使用数组 //String[] fields = { "title", "content" }; Path indexPath = Paths.get("indexdir"); Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer();//最细粒度分词 QueryParser parser = new QueryParser(field, analyzer); // 多域搜索 //MultiFieldQueryParser multiParser = new MultiFieldQueryParser(fields, analyzer); // 关键字同时成立使用 AND, 默认是 OR parser.setDefaultOperator(Operator.AND); // 查询语句 // Query query = parser.parse("农村学生");//查询关键词 Query query = parser.parse("特朗普");//查询关键词 System.out.println("Query:" + query.toString()); // 返回前10条 TopDocs tds = searcher.search(query, 10); for (ScoreDoc sd : tds.scoreDocs) { // Explanation explanation = searcher.explain(query, sd.doc); // System.out.println("explain:" + explanation + "\n"); Document doc = searcher.doc(sd.doc); System.out.println("DocID:" + sd.doc); System.out.println("id:" + doc.get("id")); System.out.println("title:" + doc.get("title")); System.out.println("content:" + doc.get("content")); System.out.println("文档评分:" + sd.score); } dir.close(); reader.close(); } }
4高亮查询
package query; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * Lucene查询高亮 * @author moonxy * */ public class HighlighterTest { public static void main(String[] args) throws IOException, InvalidTokenOffsetsException, ParseException { String field = "title"; Path indexPath = Paths.get("indexdir"); Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse("北大学生"); System.out.println("Query:" + query); // 查询高亮 QueryScorer score = new QueryScorer(query, field); SimpleHTMLFormatter fors = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");// 定制高亮标签 Highlighter highlighter = new Highlighter(fors, score);// 高亮分析器 // 返回前10条 TopDocs tds = searcher.search(query, 10); for (ScoreDoc sd : tds.scoreDocs) { // Explanation explanation = searcher.explain(query, sd.doc); // System.out.println("explain:" + explanation + "\n"); Document doc = searcher.doc(sd.doc); System.out.println("id:" + doc.get("id")); System.out.println("title:" + doc.get("title")); Fragmenter fragment = new SimpleSpanFragmenter(score); highlighter.setTextFragmenter(fragment); String str = highlighter.getBestFragment(analyzer, field, doc.get(field));// 获取高亮的片段 System.out.println("高亮的片段:" + str); } dir.close(); reader.close(); } }
注意:如果没有索引文件夹 ,自己先创建,我的文件夹是indexdir
原文:https://www.cnblogs.com/woshuaile/p/10342827.html