lucene是一款嵌入web后台的搜索引擎。就我们通常的认知,baidu的原理是把全网的东西爬到自己的数据库中,然后根据数据库建立一个索引库,一些查询算法就是建立在索引库的基础上来检索索引库的内容。lucene在其中的作用类似上边的创建和维护索引库。这样的好处是减轻了数据库的压力,并且比mysql like查询要节约很多的时间,只是建立和维护索引需要花费不少的时间。最近想要把lucene嵌入到自己的web端。
首先是pom文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.Itheima</groupId>
<artifactId>luceneDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<skipTests>true</skipTests>
</properties>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.6.RELEASE</version>
</parent>
<dependencies>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>8.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>8.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>8.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<!--ik-analyzer 中文分词器-->
<dependency>
<groupId>cn.bestwu</groupId>
<artifactId>ik-analyzers</artifactId>
<version>5.1.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-thymeleaf -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
<version>2.2.6.RELEASE</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
</dependencies>
</project>
/**
* 创建索引库
*/
@Test
public void createIndexTest() throws IOException {
//1.采集数据
NewsDAO newsDAO = new NewsDAOImpl();
List<News> news = newsDAO.queryNewsList();
List<Document> docs = new ArrayList<>();
for(News news1:news){
//2.创建文档对象
Document doc = new Document();
//创建域对象 并放入文档
/**
* 是否分词:否,主键分词后无意义
* 是否索引:是,需要查询 必须索引
* 是否存储:是,主键id比较特殊,可以根据此修改数据库
*/
doc.add(new StringField("id",String.valueOf(news1.getId()), Field.Store.YES));
/**
* 是否分词:是,对title分词可以搜寻部分标题对应的Doc,名称字段需要查询并且分词有意义
* 是否索引:是,需要查询,必须索引
* 是否存储,是,需要显示标题
*/
doc.add(new TextField("title",news1.getTitle(), Field.Store.YES));
/**
* 是否分词:否 需要查询,并且需要展示 (如果是根据价格范围查询,必须切分词)
* 是否索引:是 将日期转化为String添加到索引中,方便查询区间
* 是否存储:是
*/
doc.add(new StringField("pubDate", CommonUtil.dateToString(news1.getPubDate()), Field.Store.YES));
/**
* 是否分词:y 需要查询,显示,所以是TextField
* 是否索引: y
* 是否存储: y
*/
doc.add(new TextField("infoSoure",news1.getInfoSoure(), Field.Store.YES));
/**
* 是否分词: y
* 是否索引: y
* 是否存储: y
*/
doc.add(new TextField("summary",news1.getSummary(), Field.Store.YES));
/**
* 是否分词: n
* 是否索引: n
* 是否存储: y
*/
doc.add(new StoredField("sourceUrl",news1.getSourceUrl()));
/**
* 是否分词: n
* 是否索引: y
* 是否存储: y
*/
doc.add(new StringField("updateTime",CommonUtil.dateToString(news1.getUpdateTime()), Field.Store.YES));
docs.add(doc);
//System.out.println(news1.toString());
}
//3.创建分词器对象
Analyzer analyzer = new StandardAnalyzer();
//4.创建Directory对象,索引库的位置
Directory dir = FSDirectory.open(Paths.get(DirPath));
//5.创建indexWriterConfig对象,指定切分词使用的分词器
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//6.创建indexWriter输出流对象,指定输出的位置和config对象
IndexWriter indexWriter = new IndexWriter(dir,config);
//7.写入文档到索引库
for(Document d:docs){
indexWriter.addDocument(d);
}
//8.释放资源
indexWriter.close();
}
/**
* 修改索引库
*/
@Test
public void updateIndexTest() throws IOException {
//需要变更的内容
Document doc = new Document();
doc.add(new StringField("id","567", Field.Store.YES));
doc.add(new TextField("title","XXXXXX unknown", Field.Store.YES));
doc.add(new StringField("pubDate", "20200406", Field.Store.YES));
doc.add(new TextField("infoSoure","XXXX", Field.Store.YES));
doc.add(new TextField("summary","null", Field.Store.YES));
doc.add(new StoredField("sourceUrl","update to null"));
doc.add(new StringField("updateTime","20200406", Field.Store.YES));
//3.创建分词器对象
Analyzer analyzer = new StandardAnalyzer();
//4.创建Directory对象,索引库的位置
Directory dir = FSDirectory.open(Paths.get(DirPath));
//5.创建indexWriterConfig对象,指定切分词使用的分词器
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//6.创建indexWriter输出流对象,指定输出的位置和config对象
IndexWriter indexWriter = new IndexWriter(dir,config);
//第一个参数为修改的条件
indexWriter.updateDocument(new Term("id","567"),doc);
//8.释放资源
indexWriter.close();
}
持续更新0-0
原文:https://www.cnblogs.com/elward-lv/p/12810487.html