SpanQuery按照词在文章中的距离或者查询几个相邻词的查询
SpanQuery包括以下几种:
SpanTermQuery:词距查询的基础,结果和TermQuery相似,只不过是增加了查询结果中单词的距离信息。
SpanFirstQuery:在指定距离可以找到第一个单词的查询。
SpanNearQuery:查询的几个语句之间保持者一定的距离。
SpanOrQuery:同时查询几个词句查询。
SpanNotQuery:从一个词距查询结果中,去除一个词距查询。
下面一个简单例子介绍
- package com;
-
-
- import java.io.IOException;
- import java.io.StringReader;
- import java.util.ArrayList;
- import java.util.List;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.Token;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.WhitespaceAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Field.Index;
- import org.apache.lucene.document.Field.Store;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.spans.SpanFirstQuery;
- import org.apache.lucene.search.spans.SpanNearQuery;
- import org.apache.lucene.search.spans.SpanNotQuery;
- import org.apache.lucene.search.spans.SpanOrQuery;
- import org.apache.lucene.search.spans.SpanQuery;
- import org.apache.lucene.search.spans.SpanTermQuery;
- import org.apache.lucene.search.spans.Spans;
- import org.apache.lucene.store.RAMDirectory;
-
- public class SpanQueryTest {
-
- private RAMDirectory directory;
-
- private IndexSearcher indexSearcher;
-
- private IndexReader reader;
-
- private SpanTermQuery quick;
-
- private SpanTermQuery brown;
-
- private SpanTermQuery red;
-
- private SpanTermQuery fox;
-
- private SpanTermQuery lazy;
-
- private SpanTermQuery sleepy;
-
- private SpanTermQuery dog;
-
- private SpanTermQuery cat;
-
- private Analyzer analyzer;
-
-
- public void index() throws IOException {
-
- directory = new RAMDirectory();
-
- analyzer = new WhitespaceAnalyzer();
-
- IndexWriter writer = new IndexWriter(directory, analyzer, true);
-
- Document doc1 = new Document();
-
- doc1.add(new Field("field",
- "the quick brown fox jumps over the lazy dog", Store.YES,
- Index.TOKENIZED));
-
- Document doc2 = new Document();
-
- doc2.add(new Field("field",
- "the quick red fox jumps over the sleepy cat", Store.YES,
- Index.TOKENIZED));
-
- writer.addDocument(doc1);
-
- writer.addDocument(doc2);
-
- writer.optimize();
-
- writer.close();
-
- quick = new SpanTermQuery(new Term("field", "quick"));
-
- brown = new SpanTermQuery(new Term("field", "brown"));
-
- red = new SpanTermQuery(new Term("field", "red"));
-
- fox = new SpanTermQuery(new Term("field", "fox"));
- lazy = new SpanTermQuery(new Term("field", "lazy"));
- sleepy = new SpanTermQuery(new Term("field", "sleepy"));
- dog = new SpanTermQuery(new Term("field", "dog"));
- cat = new SpanTermQuery(new Term("field", "cat"));
-
- indexSearcher = new IndexSearcher(directory);
-
- reader = IndexReader.open(directory);
- }
-
- private void dumpSpans(SpanQuery query) throws IOException {
-
-
- Hits hits = indexSearcher.search(query);
- for (int i = 0; i < hits.length(); i++) {
-
- }
-
-
-
- Spans spans = query.getSpans(reader);
-
- int numSpans = 0;
-
- float[] scores = new float[2];
- for (int i = 0; i < hits.length(); i++) {
- scores[hits.id(i)] = hits.score(i);
- }
-
- while (spans.next()) {
-
- numSpans++;
-
- int id = spans.doc();
-
- Document doc = reader.document(id);
-
- Token[] tokens = AnalyzerUtils.tokensFromAnalysis(analyzer, doc
- .get("field"));
-
- StringBuffer buffer = new StringBuffer();
-
- for (int i = 0; i < tokens.length; i++) {
-
-
-
- if (i == spans.start()) {
- buffer.append("<");
- }
- buffer.append(tokens[i].termText());
- if (i + 1 == spans.end()) {
- buffer.append(">");
- }
- buffer.append(" ");
- }
- buffer.append("(" + scores[id] + ") ");
-
- System.out.println(buffer);
- }
-
-
- }
-
-
- public void spanTermQueryTest() throws IOException {
- dumpSpans(brown);
-
-
-
- }
-
-
- public void spanFirstQueryTest() throws IOException {
-
-
-
- SpanFirstQuery firstQuery = new SpanFirstQuery(brown, 3);
- dumpSpans(firstQuery);
-
-
-
- }
-
-
- public void spanNearQueryTest() throws IOException {
-
-
-
-
- SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[] { quick,
- brown, fox }, 5, true);
-
- dumpSpans(nearQuery);
-
-
-
-
- nearQuery = new SpanNearQuery(new SpanQuery[] { quick, dog, brown }, 5,
- false);
-
- dumpSpans(nearQuery);
-
-
-
-
- }
-
-
- public void spanNotQueryTest() throws IOException {
-
-
-
- SpanNearQuery quick_fox = new SpanNearQuery(new SpanQuery[] { quick,
- fox }, 1, true);
-
-
- dumpSpans(quick_fox);
-
-
-
-
-
-
- SpanNotQuery no_quick_red_fox = new SpanNotQuery(quick_fox, red);
-
- dumpSpans(no_quick_red_fox);
-
-
-
-
-
- }
-
-
- public void spanOrQueryTest() throws IOException {
-
- SpanNearQuery quick_fox = new SpanNearQuery(new SpanQuery[] { quick,
- fox }, 1, true);
-
- SpanNearQuery lazy_dog = new SpanNearQuery(
- new SpanQuery[] { lazy, dog }, 0, true);
-
- SpanNearQuery sleepy_cat = new SpanNearQuery(new SpanQuery[] { sleepy,
- cat }, 0, true);
-
- SpanNearQuery qf_near_ld = new SpanNearQuery(new SpanQuery[] {
- quick_fox, lazy_dog }, 3, true);
-
- dumpSpans(qf_near_ld);
-
- SpanNearQuery qf_near_sc = new SpanNearQuery(new SpanQuery[] {
- quick_fox, sleepy_cat }, 3, true);
-
- dumpSpans(qf_near_sc);
-
- SpanOrQuery or = new SpanOrQuery(new SpanQuery[] { qf_near_ld,
- qf_near_sc });
-
- dumpSpans(or);
-
-
-
-
-
-
- }
-
- public static void main(String[] args) throws IOException {
-
- SpanQueryTest test = new SpanQueryTest();
-
- test.index();
-
- test.spanOrQueryTest();
- }
- }
-
- class AnalyzerUtils {
- public static Token[] tokensFromAnalysis(Analyzer analyzer, String text)
- throws IOException {
- TokenStream stream = analyzer.tokenStream("contents", new StringReader(
- text));
- boolean b = true;
- List<Token> list = new ArrayList<Token>();
- while (b) {
- Token token = stream.next();
- if (token == null)
- b = false;
- else
- list.add(token);
- }
- return (Token[]) list.toArray(new Token[0]);
- }
- }
lucene的多种搜索2-SpanQuery
原文:http://www.cnblogs.com/1130136248wlxk/p/5031130.html