Lucene实现搜索结果命中关键字高亮显示

浏览 4285 次

锁定老帖子主题：Lucene实现搜索结果命中关键字高亮显示精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者	正文
liuwei1981 等级: 性别: 文章: 158 积分: 140 来自: 太原	发表时间：2009-03-24 最后修改：2009-03-24 相关推荐: SDRplay_RSP_API-Windows-2.13.1.exe [uboot] （番外篇）uboot串口&console&stdio设备工作流程 Solaris10中级读书笔记之一:目录结构的介绍为Linux系统配置serial console 【常用操作】ubuntu系统部署说明更多相关推荐入门技术 Lucene 使用lucene实现搜索结果命中关键字高亮显示，大致流程与lucene 建立文件索引和针对索引进行搜索（lucene2.2版本）介绍的一致，只需在代码里稍作修改。 1.索引生成过程：（红色为修改部分，针对需要进行高亮显示索引内容，进行分词与关键字位置索引） package demo.example.searcher; import java.io.; import java.util.; import org.apache.lucene.analysis.standard.; import org.apache.lucene.index.; import org.apache.lucene.document.; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class Indexer { private static Log log = LogFactory.getLog(Indexer.class); public static void main(String[] args) throws Exception { File indexDir = new File("C:\\index"); File dataDir = new File("C:\\lucene\\src"); long start = new Date().getTime(); int numIndexed = index(indexDir, dataDir); long end = new Date().getTime(); System.out.println("use:" + (end - start)); } public static int index(File indexDir, File dataDir) { int ret = 0; try { IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(), true); writer.setUseCompoundFile(false); indexDirectory(writer, dataDir); ret = writer.docCount(); writer.optimize(); writer.close(); } catch (Exception e) { e.printStackTrace(); } return ret; } public static void indexDirectory(IndexWriter writer, File dir) { try { File[] files = dir.listFiles(); for (File f : files) { if (f.isDirectory()) { indexDirectory(writer, f); } else { indexFile(writer, f); } } } catch (Exception e) { e.printStackTrace(); } } public static void indexFile(IndexWriter writer, File f) { try { System.out.println("Indexing:" + f.getCanonicalPath()); Document doc = new Document(); Reader txtReader = new FileReader(f); doc.add(new Field("contents", txtReader，Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.addDocument(doc); } catch (Exception e) { e.printStackTrace(); } } } 2.搜索过程，红色的为修改部分，需要highlight和analysis的lucene的jar文件 package demo.example.searcher; import java.util.; import org.apache.lucene.search.highlight.; import org.apache.lucene.analysis.; import org.apache.lucene.search.; import org.apache.lucene.queryParser.; import org.apache.lucene.analysis.standard.; import org.apache.lucene.document.; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; public class Searcher { private static Log log = LogFactory.getLog(Searcher.class); public static void main(String[] args) { String indexDir = "C:\\index"; String q = "查询关键字"; search(indexDir, q); } public static void search(String indexDir, String q) { try { IndexSearcher is = new IndexSearcher(indexDir); QueryParser queryParser = new QueryParser("contents", new StandardAnalyzer()); Query query = queryParser.parse(q); long start = new Date().getTime(); Hits hits = is.search(query); long end = new Date().getTime(); System.out.println("use:" + (end - start)); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<strong><font color='red'>", "</font></strong>"); SimpleFragmenter fragmenter = new SimpleFragmenter(60); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(q)); highlighter.setTextFragmenter(fragmenter); int maxNumFragmentsRequired = 10; String fragmentSeparator = ""; TermPositionVector tpv = null; TokenStream tokenstream = null; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); int id = hits.id(i); System.out.println("The right file:" + doc.get("filename")); tpv = (TermPositionVector) is.getIndexReader().getTermFreqVector(id, "contents"); tokenstream = TokenSources.getTokenStream(tpv); String result = highlighter.getBestFragments(tokenstream, doc.get("contents"), maxNumFragmentsRequired, fragmentSeparator) System.out.println("The right file context is :" + result); } } catch (Exception e) { e.printStackTrace(); } } } 在输出结果中，文件内容中命中的关键字被加上了 "<strong><font color='red'>", "</font></strong>" 在网页中显示内容，即为黑体红字显示效果。声明：ITeye文章版权属于作者，受法律保护。没有作者书面许可不得转载。推荐链接
返回顶楼

论坛首页 → 入门技术版

跳转论坛: