1. lucene-3.5.0.jar
2. 新建目录C:\testsource,新建目录C:\testindex。
3.在C:\testsource下新建test1.txt, test2.txt,内容分别为:“商务休闲品牌男装西裤衬衫”,“潮流休闲品牌女装裙子大衣”。
4.创建索引
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 给text文件建立索引
* @author liam.huang@foxmail.com
*/
public class TextFileIndexer {
public static void main(String[] args) throws Exception{
//text文件路径
File sourceDir = new File("C:\\testsource");
File[] sourceFiles = sourceDir.listFiles();
//索引文件路径
File indexDir = new File("C:\\testindex");
Directory indexFilesDir = FSDirectory.open(indexDir);
//构建analyzer
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
//配置IndexWriter
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
iwConfig.setOpenMode(OpenMode.CREATE);
//构建IndexWriter
IndexWriter indexWriter = new IndexWriter(indexFilesDir, iwConfig);
long startTime = new Date().getTime();
for(int i=0; i<sourceFiles.length; i++){
if(sourceFiles[i].isFile() && sourceFiles[i].getName().endsWith(".txt")){
System.out.println("\nFile " + sourceFiles[i].getCanonicalPath() + "正在被索引......");
String temp = fileReaderAll(sourceFiles[i].getCanonicalPath(), "UTF-8");
System.out.println(temp);
Field FieldPath = new Field("path", sourceFiles[i].getPath(), Field.Store.YES, Field.Index.NO);
Field FieldBody = new Field("body", temp, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
Document document = new Document();
document.add(FieldPath);
document.add(FieldBody);
indexWriter.addDocument(document);
}
}
//关闭IndexWriter
indexWriter.close();
long endTime = new Date().getTime();
System.out.println("\n花费了" + (endTime-startTime) + " 毫秒把文档增加到索引里面去!索引文件地址:" + sourceDir.getPath());
}
//读取文件所有内容
private static String fileReaderAll(String filePath, String charset) throws IOException {
String line = new String();
String temp = new String();
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), charset));
while((line=reader.readLine())!=null){
temp += line;
}
reader.close();
return temp;
}
}
输出结果:
File C:\testsource\test1.txt正在被索引...... 商务休闲品牌男装西裤衬衫 File C:\testsource\test2.txt正在被索引...... 潮流休闲品牌女装裙子大衣 花费了569 毫秒把文档增加到索引里面去!索引文件地址:C:\testsource
5.关键字检索
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 关键字检索
* @author liam.huang@foxmail.com
*/
public class TextQuery {
public static void main(String[] args) throws Exception{
String queryString = "休闲 装";
//索引文件路径
String indexDir = "C:\\testindex";
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(indexDir)));
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Query query = null;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
QueryParser queryParser = new QueryParser(Version.LUCENE_35, "body", analyzer);
queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
query = queryParser.parse(queryString);
ScoreDoc[] hits = null;
if(indexSearcher!=null){
//返回最多为10条记录
TopDocs results = indexSearcher.search(query, 10);
hits = results.scoreDocs;
if(hits.length>0){
System.out.println("找到:" + hits.length + " 个结果!");
}else{
System.out.println("没有找到");
}
indexSearcher.close();
}
}
}
输出结果:
找到:2 个结果!
原文:http://my.oschina.net/lock0818/blog/380083