lucene最新版本为1.9,从apache svn中checkout 出来已经包括了Highlighter
测试一下:
package org.apache.lucene.search.highlight;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.io.StringReader;
/**
* @author martin
*/
public class WordsHighlighterTest extends TestCase {
private IndexReader reader;
RAMDirectory ramDirectory;
final private static String FIELD_NAME = "contents";
final private static String queryString = "索引";
String [] words = {
"1:索引内容结构:Document,以及包含于Document的多个Field索",
"2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
"3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
"4:索引创建速度的调整"
};
protected void setUp() throws Exception {
ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
for (String s : words) {
addDoc(indexWriter, s);
}
indexWriter.optimize();
indexWriter.close();
reader = IndexReader.open(ramDirectory);
}
private void addDoc(IndexWriter indexWriter, String s) throws IOException {
Document doc = new Document();
doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
indexWriter.addDocument(doc);
}
public void testSimpleWords() throws Exception {
Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Searcher searcher = new IndexSearcher(ramDirectory);
Hits hits = searcher.search(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="red">", "</font>"), new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
Analyzer analyzer = new StandardAnalyzer();
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, 2, "");
System.out.println(" " + result);
}
}
protected void tearDown() throws Exception {
super.tearDown();
}
}
输出:
Searching for: "索 引"
4:索引创建速度的调整
3:索引的写入IndexWriter,索引的写入目标Directory,实现包
1:索引内容结构:Document,以索
2:索引内容优先性调整因子,boost(可对整个Document或Field指
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.io.StringReader;
/**
* @author martin
*/
public class WordsHighlighterTest extends TestCase {
private IndexReader reader;
RAMDirectory ramDirectory;
final private static String FIELD_NAME = "contents";
final private static String queryString = "索引";
String [] words = {
"1:索引内容结构:Document,以及包含于Document的多个Field索",
"2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
"3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
"4:索引创建速度的调整"
};
protected void setUp() throws Exception {
ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
for (String s : words) {
addDoc(indexWriter, s);
}
indexWriter.optimize();
indexWriter.close();
reader = IndexReader.open(ramDirectory);
}
private void addDoc(IndexWriter indexWriter, String s) throws IOException {
Document doc = new Document();
doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
indexWriter.addDocument(doc);
}
public void testSimpleWords() throws Exception {
Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Searcher searcher = new IndexSearcher(ramDirectory);
Hits hits = searcher.search(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color="red">", "</font>"), new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
Analyzer analyzer = new StandardAnalyzer();
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, 2, "");
System.out.println(" " + result);
}
}
protected void tearDown() throws Exception {
super.tearDown();
}
}
输出:
Searching for: "索 引"
4:索引创建速度的调整
3:索引的写入IndexWriter,索引的写入目标Directory,实现包
1:索引内容结构:Document,以索
2:索引内容优先性调整因子,boost(可对整个Document或Field指
相关推荐
《Lucene Highlighter 3.4.0:搜索引擎中的文本高亮工具》 在信息爆炸的时代,搜索引擎成为了我们获取知识的重要途径。Apache Lucene,作为一款高性能、全文本搜索库,为开发者提供了强大的搜索功能。其中,Lucene ...
lucene-highlighter-3.5.0.jar lucene高亮包
《深入理解Lucene Highlighter 7.7.2:文本高亮的核心技术》 在信息检索领域,Lucene是一款强大的全文搜索引擎库,被广泛应用于各种信息检索系统中。而Lucene Highlighter作为其核心组件之一,是实现搜索结果高亮...
《Lucene+HighLighter高亮显示实例解析》 在信息技术领域,搜索引擎的构建与优化是至关重要的一环,其中,如何有效地对搜索结果进行高亮显示,以突出关键信息,是提升用户体验的关键因素之一。本篇文章将深入探讨...
lucene3.0-highlighter.jar lucene3.0的高亮jar包,从lucene3.0源码中导出来的
lucene-highlighter-2.4.0.jar lucene highlighter
《Apache Lucene Highlighter模块详解》 Apache Lucene是一个高性能、全文检索库,它为Java开发者提供了强大的文本分析和搜索功能。在Lucene的众多组件中,Highlighter模块扮演着重要角色,它能够帮助我们从搜索...
赠送jar包:lucene-highlighter-6.6.0.jar; 赠送原API文档:lucene-highlighter-6.6.0-javadoc.jar; 赠送源代码:lucene-highlighter-6.6.0-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-6.6.0.pom;...
赠送jar包:lucene-highlighter-7.3.1.jar; 赠送原API文档:lucene-highlighter-7.3.1-javadoc.jar; 赠送源代码:lucene-highlighter-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.3.1.pom;...
lucene-highlighter.jar lucene-highlighter.jar
赠送jar包:lucene-highlighter-7.7.0.jar; 赠送原API文档:lucene-highlighter-7.7.0-javadoc.jar; 赠送源代码:lucene-highlighter-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.7.0.pom;...
赠送jar包:lucene-highlighter-7.2.1.jar; 赠送原API文档:lucene-highlighter-7.2.1-javadoc.jar; 赠送源代码:lucene-highlighter-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.2.1.pom;...
赠送jar包:lucene-highlighter-7.3.1.jar; 赠送原API文档:lucene-highlighter-7.3.1-javadoc.jar; 赠送源代码:lucene-highlighter-7.3.1-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.3.1.pom;...
赠送jar包:lucene-highlighter-7.7.0.jar; 赠送原API文档:lucene-highlighter-7.7.0-javadoc.jar; 赠送源代码:lucene-highlighter-7.7.0-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.7.0.pom;...
赠送jar包:lucene-highlighter-7.2.1.jar; 赠送原API文档:lucene-highlighter-7.2.1-javadoc.jar; 赠送源代码:lucene-highlighter-7.2.1-sources.jar; 赠送Maven依赖信息文件:lucene-highlighter-7.2.1.pom;...
lucene-highlighter-4.3.0.jar
lucene-highlighter-2.9.0.jar
lucene-highlighter-3.0.1.jar 还在为找不到jar文件烦心吗,不用了到我空间来有你想要的,持续更新。
《Lucene 3.6.2与Highlighter:搜索引擎中的文本高亮技术》 Lucene是一个高度可定制的全文搜索引擎库,广泛应用于各种Java应用程序中,以实现强大的搜索功能。在3.6.2版本中,Lucene提供了一整套功能,包括索引构建...
实现中文分词的jar包:lucene-highlighter-3.6.1.jar