import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.wangxiaowang.article.Article;
import com.wangxiaowang.article.ArticleProvider;
public class ArticleIndexBuilder {
private String indexPath;
private Analyzer analyzer;
private int recordCountPreTime;
public ArticleIndexBuilder(String indexPath, Analyzer analyzer, int recordCountPreTime) {
this.indexPath = indexPath;
this.analyzer = analyzer;
this.recordCountPreTime = recordCountPreTime;
}
public void build() {
FSDirectory directory = null;
IndexWriterConfig conf = null;
IndexWriter writer = null;
try {
directory = FSDirectory.open(new File(indexPath));
conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
conf.setOpenMode(OpenMode.CREATE);
writer = new IndexWriter(directory, conf);
ArticleProvider articleProvider = new ArticleProvider(recordCountPreTime);
while (articleProvider.hasNext()) {
List<Article> articleList = articleProvider.next();
addDocs(writer, articleList);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
writer.close();
directory.close();
writer = null;
directory = null;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
private void addDocs(IndexWriter writer, List<Article> articleList) throws CorruptIndexException, IOException {
for (Article article : articleList) {
Document doc = new Document();
addFileds(doc, article);
writer.addDocument(doc);
System.out.println("=========>one record ok " + article.getStr("title"));
}
}
private void addFileds(Document doc, Article article) {
doc.add(getKeywordsField("id", article.getInt("id") + ""));
doc.add(getIndexField("title", article.getStr("title")));
doc.add(getIndexField("content", article.getStr("keywords")));
doc.add(getKeywordsField("subject_id", article.getInt("subject_id") + ""));
doc.add(getKeywordsField("subject_name", article.getStr("subject_name")));
doc.add(getKeywordsField("publish_time", fomartPublishTime(article.getTimestamp("publish_time"))));
}
private String fomartPublishTime(Timestamp time) {
String result = "";
if (time == null)
time = new Timestamp(System.currentTimeMillis());
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
result = df.format(time);
return result;
}
private Field getKeywordsField(String name, String value) {
return new Field(name, value, Store.YES, Index.NOT_ANALYZED);
}
private Field getIndexField(String name, String value) {
return new Field(name, value, Store.YES, Index.ANALYZED);
}
}
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jfinal.plugin.activerecord.Page;
import com.wangxiaowang.article.Article;
public class ArticleIndexSearcher {
private String indexPath;
private Analyzer analyzer;
public ArticleIndexSearcher(String indexPath, Analyzer analyzer) {
this.indexPath = indexPath;
this.analyzer = analyzer;
}
public Page<Article> search(String queryStr, int pageSize, int pageNum, int limits) {
FSDirectory directory = null;
IndexReader reader = null;
IndexSearcher searcher = null;
List<Article> articleList = new ArrayList<Article>();
Page<Article> articlePage = null;
int start = (pageNum - 1)*pageSize + 1;
int end = pageNum*pageSize;
int total = 0;
try {
directory = FSDirectory.open(new File(indexPath));
reader = IndexReader.open(directory);
searcher = new IndexSearcher(reader);
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, new String[] {"title","keywords"}, analyzer);
Query query = qp.parse(queryStr);
//不需要排序
ScoreDoc[] docs = searcher.search(query, limits).scoreDocs;
//高亮
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='#FF0000''>", "</font>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(1500));
total = docs.length;
for (int i=start; i<=end && i<total; i++) {
Document d = searcher.doc(docs[i].doc);
String titleToBeHightlight = d.get("title");
if (titleToBeHightlight == null)
titleToBeHightlight = "";
TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(titleToBeHightlight));
String title = highlighter.getBestFragment(tokenStream, titleToBeHightlight);
Article article = buildArticle(d.get("id"), title, d.get("content"), d.get("subject_id"), d.get("subject_name"), d.get("publish_time"));
articleList.add(article);
}
articlePage = new Page<Article>(articleList, pageNum, pageSize, (total+pageSize-1)/pageSize, total);
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
} finally {
try {
searcher.close();
reader.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return articlePage;
}
private Article buildArticle(String id, String title, String keywords, String subjectId, String subjectName, String publishTime) {
Article article = new Article();
article.set("id", id);
article.set("title", title);
article.set("content", keywords);
article.set("subject_id", subjectId);
article.set("subject_name", subjectName);
article.set("publish_time", publishTime == null ? "2012-06-01" : publishTime);
return article;
}
}
分享到:
相关推荐
《Lucene 3.6 入门实例教程》是一份专为初学者设计的指南,旨在帮助用户快速掌握Apache Lucene 3.6版本的基本概念和应用。Lucene是一个高性能、全文检索库,广泛用于构建搜索功能强大的应用程序。这份教程通过完整的...
【标题】:“第一个Lucene 3.6 (3.X) 入门实例” ...总之,Lucene 3.6入门实例涉及从设置开发环境、创建索引到实现搜索功能的全过程。理解这些基本步骤和核心概念,是掌握Lucene并进一步构建高效检索系统的基石。
**Lucene 3.6 入门案例** Lucene 是一个高性能、全文本搜索库,由 Apache 软件基金会开发。它提供了完整的搜索功能,包括索引、查询、评分等,广泛应用于各种项目和产品中。在这个入门案例中,我们将深入理解如何...
### Lucene初级教程知识点详解 #### 1. Lucene简介 - **1.1 什么是Lucene** - Lucene是一种高性能的...以上内容涵盖了Lucene的基本概念、工作原理及如何构建索引的关键步骤,对于初学者来说是非常宝贵的入门指南。
#### 三、OpenCms快速入门 **3.1 创建一个简单页面** - **3.1.1 工作区浏览**:登录OpenCms后台管理界面,浏览工作区结构。 - **3.1.2 创建站点目录**:在工作区内创建新的站点目录。 - **3.1.3 创建站点首页**:在...
#### 三、OpenCMS快速入门 **3.1 创建一个简单页面** - **工作区浏览**:了解OpenCMS管理界面的布局。 - **创建站点目录**:规划网站结构。 - **创建站点首页**:设计网站的入口页面。 - **发布项目**:将编辑好的...
#### 三、OpenCMS快速入门 **3.1 创建一个简单页面** - **3.1.1 工作区浏览** - 登录OpenCMS管理界面; - 浏览工作区。 - **3.1.2 创建站点目录** - 在工作区中创建新的站点目录; - 设置目录权限。 - **...
7.1. Zend_Controller 快速入门 7.1.1. 简介 7.1.2. 入门 7.1.2.1. 文件系统的组织结构 7.1.2.2. 设置文件根目录 7.1.2.3. 创建URL重写规则 7.1.2.4. 创建你的bootstrap文件 7.1.2.5. 创建默认的控制器...