1
2
3
4
可以查看某串字符最终被分割成什么样子,这样便于查询时深刻明白为什么有的查不到有的却能查到.
package com.isoftstone.www.tool;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
/**
*展示分词后的效果
*/
public class AnalyzerTool {
/**
* 打印分词后的信息
* @param str 待分词的字符串
* @param analyzer 分词器
*/
public static void displayToken(String str,Analyzer analyzer){
try {
//将一个字符串创建成Token流
TokenStream stream = analyzer.tokenStream("content", new StringReader(str));
CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class);
stream.reset();//一定要重置,不然老报错
while(stream.incrementToken()){
System.out.print("【" + cta + "】");
}
System.out.println();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Analyzer aly1 = new StandardAnalyzer(Version.LUCENE_40);
Analyzer aly2 = new StopAnalyzer(Version.LUCENE_40);
Analyzer aly3 = new SimpleAnalyzer(Version.LUCENE_40);
Analyzer aly4 = new WhitespaceAnalyzer(Version.LUCENE_40);
String str = "Hello ,I am King,我是 中国人,my email is jianyuan_5731@qq.com";
AnalyzerTool.displayToken(str, aly1);
AnalyzerTool.displayToken(str, aly2);
AnalyzerTool.displayToken(str, aly3);
AnalyzerTool.displayToken(str, aly4);
}
}
原文:http://www.cnblogs.com/whatlonelytear/p/5009124.html