`

lucene3.6入门实例

 
阅读更多

import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.wangxiaowang.article.Article;
import com.wangxiaowang.article.ArticleProvider;
public class ArticleIndexBuilder {

private String indexPath;
private Analyzer analyzer;
private int recordCountPreTime;

public ArticleIndexBuilder(String indexPath, Analyzer analyzer, int recordCountPreTime) {
this.indexPath = indexPath;
this.analyzer = analyzer;
this.recordCountPreTime = recordCountPreTime;
}

public void build() {
FSDirectory directory = null;
IndexWriterConfig conf = null;
IndexWriter writer = null;
try {
directory = FSDirectory.open(new File(indexPath));
conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
conf.setOpenMode(OpenMode.CREATE);
writer = new IndexWriter(directory, conf);

ArticleProvider articleProvider = new ArticleProvider(recordCountPreTime);
while (articleProvider.hasNext()) {
List<Article> articleList = articleProvider.next();
addDocs(writer, articleList);
}

} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
directory.close();
writer = null;
directory = null;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

private void addDocs(IndexWriter writer, List<Article> articleList) throws CorruptIndexException, IOException {
for (Article article : articleList) {
Document doc = new Document();
addFileds(doc, article);
writer.addDocument(doc);
System.out.println("=========>one record ok " + article.getStr("title"));
}
}

private void addFileds(Document doc, Article article) {
doc.add(getKeywordsField("id", article.getInt("id") + ""));
doc.add(getIndexField("title", article.getStr("title")));
doc.add(getIndexField("content", article.getStr("keywords")));
doc.add(getKeywordsField("subject_id", article.getInt("subject_id") + ""));
doc.add(getKeywordsField("subject_name", article.getStr("subject_name")));
doc.add(getKeywordsField("publish_time", fomartPublishTime(article.getTimestamp("publish_time"))));
}

private String fomartPublishTime(Timestamp time) {
String result = "";
if (time == null)
time = new Timestamp(System.currentTimeMillis());
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
result = df.format(time);
return result;
}

private Field getKeywordsField(String name, String value) {
return new Field(name, value, Store.YES, Index.NOT_ANALYZED);
}

private Field getIndexField(String name, String value) {
return new Field(name, value, Store.YES, Index.ANALYZED);
}
}

 

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jfinal.plugin.activerecord.Page;
import com.wangxiaowang.article.Article;

public class ArticleIndexSearcher {
private String indexPath;
private Analyzer analyzer;

public ArticleIndexSearcher(String indexPath, Analyzer analyzer) {
this.indexPath = indexPath;
this.analyzer = analyzer;
}

public Page<Article> search(String queryStr, int pageSize, int pageNum, int limits) {
FSDirectory directory = null;
IndexReader reader = null;
IndexSearcher searcher = null;
List<Article> articleList = new ArrayList<Article>();
Page<Article> articlePage = null;
int start = (pageNum - 1)*pageSize + 1;
int end = pageNum*pageSize;
int total = 0;
try {
directory = FSDirectory.open(new File(indexPath));
reader = IndexReader.open(directory);
searcher = new IndexSearcher(reader);
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, new String[] {"title","keywords"}, analyzer);
Query query = qp.parse(queryStr);

//不需要排序
ScoreDoc[] docs = searcher.search(query, limits).scoreDocs;

//高亮
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='#FF0000''>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(1500));

total = docs.length;
for (int i=start; i<=end && i<total; i++) {
Document d = searcher.doc(docs[i].doc);
String titleToBeHightlight = d.get("title");
if (titleToBeHightlight == null)
titleToBeHightlight = "";
TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(titleToBeHightlight));
String title = highlighter.getBestFragment(tokenStream, titleToBeHightlight);
Article article = buildArticle(d.get("id"), title, d.get("content"), d.get("subject_id"), d.get("subject_name"), d.get("publish_time"));
articleList.add(article);
}

articlePage = new Page<Article>(articleList, pageNum, pageSize, (total+pageSize-1)/pageSize, total);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
} finally {
try {
searcher.close();
reader.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}

return articlePage;
}

private Article buildArticle(String id, String title, String keywords, String subjectId, String subjectName, String publishTime) {
Article article = new Article();
article.set("id", id);
article.set("title", title);
article.set("content", keywords);
article.set("subject_id", subjectId);
article.set("subject_name", subjectName);
article.set("publish_time", publishTime == null ? "2012-06-01" : publishTime);
return article;
}
}

分享到:
评论

相关推荐

    lucene3.6入门实例教程

    lucene3.6入门实例教程 完整代码示例,lucene入门学习很好的资料

    lucene3.6 搜索例子

    《Lucene 3.6 搜索实例解析》 Apache Lucene 是一个开源全文搜索引擎库,为开发者提供了在Java应用程序中实现高效、可扩展的搜索功能的工具。在本篇文章中,我们将深入探讨Lucene 3.6版本中的搜索功能,通过实例...

    lucene3.6实例(索引和查询)

    在网上找了实例,但是发现不能使用,只能简历索引。...lucene3.6版本,能够建立索引,能搜索。inderwriter,indexsearch. 其中包C下的helloword实例能用,其余的全是网上不能用的。直接下载 可以运行

    lucene3.6的入门案例

    **Lucene 3.6 入门案例** Lucene 是一个高性能、全文本搜索库,由 Apache 软件基金会开发。它提供了完整的搜索功能,包括索引、查询、评分等,广泛应用于各种项目和产品中。在这个入门案例中,我们将深入理解如何...

    lucene3.6 模仿百度自动补全

    lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全

    Lucene 3.6 学习笔记

    第一章 LUCENE基础 2 1.1 索引部分的核心类 2 1.2 分词部分的核心类 2 1.3 搜索部分的核心类 2 第二章 索引建立 3 2.1 创建Directory 3 2.2 创建Writer 3 2.3 创建文档并且添加索引 4 2.4 查询索引的基本信息 5 2.5 ...

    lucene3.6.jar

    这个是使用lucene实现全文检索的jar包 包含IkAnalyer jar这个分词器 使用非常方便

    lucene 3.6

    在 Lucene 3.6 版本中,它提供了强大的文本搜索功能,适用于Java开发人员。这个版本相对稳定,对于初学者来说是一个很好的学习起点。 首先,我们来了解一下 Lucene 的核心概念: 1. **索引(Index)**:Lucene 的...

    lucene 3.6 全文检索

    NULL 博文链接:https://yuan-bin1990.iteye.com/blog/1700272

    IKAnalyzer修复源码,Lucene3.6 Jar及使用示例

    修复IKAnalyzer2012存在的无法添加扩展的中文停用词的bug。详见:http://blog.csdn.net/kmguo/article/details/8779522

    lucene3.6 的源代码

    lucene3.6的src包,可以用于附加上去看相应的源代码

    lucene 3.6 索引格式总结

    本文档详细介绍了lucene3.6中的索引,以及每个部分对应于硬盘下的文件夹里的哪个文件。这个根据本人多年学术及编程经验总结的

    lucene3.6工程原文件

    超全的lucene3.6学习记录,实现了3中不同方法的检索,而且都很简单,容易上手,附带工程原文件

    lucene 3.6 检索文件 pdf word ppt excel txt html xml

    《Lucene 3.6 全文检索技术详解与应用》 Lucene 是一个高性能、全文本搜索引擎库,由Apache软件基金会开发。在版本3.6中,它提供了强大的文件检索功能,支持对多种文件类型的搜索,包括PDF、Word、PPT、Excel、TXT...

    基于Lucene3.6进行全文检索的小案例

    基于Lucene3.6进行全文检索的开发

    lucene3.6+IKAnalyzer2012FF_u1

    lucene3.6+IKAnalyzer2012FF_u1,配套的JAR包,google code关了好不容易才下载回来的

    lucene 3.0 入门实例

    **Lucene 3.0 入门实例** Lucene 是一个高性能、全文本搜索库,由 Apache 软件基金会开发。它提供了完整的搜索功能,包括索引、查询解析、排序以及高级的文本分析能力。在 Lucene 3.0 版本中,开发者可以利用其强大...

    第一个Lucene 3.6 (3.X) 入门实例

    NULL 博文链接:https://never-forget.iteye.com/blog/1282200

    Lucene3.0入门实例含jar包

    **Lucene 3.0 入门实例及关键知识点** Lucene 是一个开源的全文搜索引擎库,由 Apache 软件基金会开发。它为开发者提供了在应用程序中实现文本搜索功能的强大工具。本实例主要针对 Lucene 3.0 版本,这个版本虽然...

    Lucene 简单入门程序实例

    Lucene 简单入门程序实例

Global site tag (gtag.js) - Google Analytics