/**
* Project Name:docsearch
* File Name:Search.java
* Package Name:cn.tramp.docsearch.search
* Date:2014年2月27日 下午6:40:58
* Copyright (c) 2014, zhangzhaoyu0524@163.com All Rights Reserved.
*
*/
package cn.tramp.docsearch.search;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.core.nodes.RangeQueryNode;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import cn.tramp.docsearch.domain.DocumentInfo;
import cn.tramp.docsearch.index.Index;
import cn.tramp.docsearch.util.IndexPropertyUtil;
import cn.tramp.docsearch.util.Page;
/**
* ClassName:Search <br/>
* Function: Search. <br/>
* Reason: Search. <br/>
* Date: 2014年2月27日 下午6:40:58 <br/>
* @author zhangzhaoyu
* @version
* @since JDK 1.7
* @see
*/
public class Search {
private final static Log logger = LogFactory.getLog(Index.class);
public Directory directory;
private String indexPath;
private String docmentPath;
private static IndexReader reader = null;
public Search() {
try {
indexPath = IndexPropertyUtil.getKeyValueByName("indexPath");
docmentPath = IndexPropertyUtil.getKeyValueByName("docmentPath");
directory = FSDirectory.open(new File(indexPath));
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getIndexSearcher() {
try {
if (reader == null) {
reader = DirectoryReader.open(directory);
}
return new IndexSearcher(reader);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryForDocumentByPage(String field, String searchText, Page<DocumentInfo> page) {
IndexReader reader = null;
IndexSearcher searcher = null;
try {
reader = DirectoryReader.open(directory);
searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(Version.LUCENE_43, field, new StandardAnalyzer(Version.LUCENE_43));
Query query = parser.parse(searchText);
//Query query = new FuzzyQuery(new Term(field, searchText));
//Query query = new TermQuery(new Term(field, searchText));
TopDocs docs = searcher.search(query, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, query, page.getPageSize());
// 组装结果
return convert(hits, searcher, query);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryForListByWildcard(String doc_name, String content, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
Query docNamequery = new WildcardQuery(new Term("doc_name", doc_name + "*"));
Query contentQuery = new WildcardQuery(new Term("content", doc_name + "*"));
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(docNamequery, Occur.SHOULD);
booleanQuery.add(contentQuery, Occur.SHOULD);
//Query query = new FuzzyQuery(new Term(field, value));
TopDocs docs = searcher.search(booleanQuery, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, booleanQuery, page.getPageSize());
// 组装结果
return convert(hits, searcher, booleanQuery);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryByComplexBoolean(List<Query> termQueryList, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
BooleanQuery query = new BooleanQuery();
for (Query termQuery : termQueryList) {
query.add(termQuery, Occur.MUST);
}
TopDocs docs = searcher.search(query, 1000);//获取匹配上元素的一个docid
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, query, page.getPageSize());
// 组装结果
return convert(hits, searcher, query);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryByQueryParse(String searchContent, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
QueryParser parser = new QueryParser(Version.LUCENE_43, "doc_name", new SmartChineseAnalyzer(Version.LUCENE_43));
//parser.setDefaultOperator(Operator.AND);
Query query = parser.parse(searchContent + "*");
parser = new QueryParser(Version.LUCENE_43, "content", new StandardAnalyzer(Version.LUCENE_43));
Query contentQuery = parser.parse(searchContent + "*");
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query, Occur.SHOULD);
booleanQuery.add(contentQuery, Occur.SHOULD);
TopDocs docs = searcher.search(booleanQuery, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, booleanQuery, page.getPageSize());
// 组装结果
return convert(hits, searcher, query);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryByDocType(TypeSearchBean searchBean, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
QueryParser parser = new QueryParser(Version.LUCENE_43, "doc_name", new SmartChineseAnalyzer(Version.LUCENE_43));
Query docNamequery = parser.parse(searchBean.getDoc_name() + "*");
parser = new QueryParser(Version.LUCENE_43, "content", new StandardAnalyzer(Version.LUCENE_43));
Query contentQuery = parser.parse(searchBean.getContent() + "*");
Query typeQuery = new TermQuery(new Term("doc_type", searchBean.getDoc_type()));
parser.setDefaultOperator(Operator.AND);
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(docNamequery, Occur.SHOULD);
booleanQuery.add(contentQuery, Occur.SHOULD);
booleanQuery.add(typeQuery, Occur.SHOULD);
TopDocs docs = searcher.search(booleanQuery, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, booleanQuery, page.getPageSize());
// 组装结果
return convert(hits, searcher, booleanQuery);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryByDocDate(String beginDate, String endDate, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
TermRangeQuery addDaterangeQuery = TermRangeQuery.newStringRange("add_datetime", beginDate, endDate, true, true);
TermRangeQuery modifyDaterangeQuery = TermRangeQuery.newStringRange("modify_datetime", beginDate, endDate, true, true);
//NumericRangeQuery<Long> addDaterangeQuery = NumericRangeQuery.newLongRange("add_datetime", beginDate, endDate, true, true);
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(addDaterangeQuery, Occur.MUST);
booleanQuery.add(modifyDaterangeQuery, Occur.MUST);
TopDocs docs = searcher.search(booleanQuery, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, booleanQuery, page.getPageSize());
// 组装结果
return convert(hits, searcher, booleanQuery);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public List<DocumentInfo> queryByAccurate(AccurateSearchBean searchBean, Page<DocumentInfo> page) {
IndexSearcher searcher = null;
try {
searcher = getIndexSearcher();
QueryParser parser = new QueryParser(Version.LUCENE_43, "content", new SmartChineseAnalyzer(Version.LUCENE_43));
Query docNameQuery = new WildcardQuery(new Term("doc_name", searchBean.getDoc_name() +"*"));
//Query contentQuery = parser.parse(searchBean.getContent() + "*");
Query docTypeQuery = new TermQuery(new Term("doc_type", searchBean.getDoc_type()));
Query docAuthorQuery = new FuzzyQuery(new Term("author", searchBean.getAuthor()), 1, 1);
TermRangeQuery dateRangeQuery = getTermRangeQueryByDate(searchBean.getLimit_date());
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(docNameQuery, Occur.MUST);
//booleanQuery.add(contentQuery, Occur.MUST);
booleanQuery.add(docTypeQuery, Occur.MUST);
booleanQuery.add(docAuthorQuery, Occur.MUST);
if (dateRangeQuery != null) {
booleanQuery.add(dateRangeQuery, Occur.MUST);
}
TopDocs docs = searcher.search(booleanQuery, 1000);
int index = (page.getCurrentPage()-1) * page.getPageSize();
page.setTotalRecord(docs.scoreDocs.length);
ScoreDoc scoreDoc = null;
if (index > 0) {
scoreDoc = docs.scoreDocs[index-1];
}
TopDocs hits = searcher.searchAfter(scoreDoc, booleanQuery, page.getPageSize());
// 组装结果
return convert(hits, searcher, booleanQuery);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private List<DocumentInfo> convert(TopDocs hits, IndexSearcher searcher, Query query) throws IOException, InvalidTokenOffsetsException {
List<DocumentInfo> list = new ArrayList<DocumentInfo>();
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_43);
TokenStream tokenStream = null;
for (ScoreDoc doc : hits.scoreDocs) {
Document d = searcher.doc(doc.doc);
DocumentInfo info = new DocumentInfo();
//加亮处理
//SimpleHTMLFormatter simplehtml=new SimpleHTMLFormatter("<font color='red'>", "</font>");
//SimpleHTMLFormatter simplehtml=new SimpleHTMLFormatter("<font class='highLight'>", "</font>");
SimpleHTMLFormatter simplehtml=new SimpleHTMLFormatter("<span class='label label-warning'>", "</span>");
Highlighter highlighter = new Highlighter(simplehtml,new QueryScorer(query));
String content = d.get("content");
String doc_name = d.get("doc_name");
if (content != null) {
tokenStream = analyzer.tokenStream("content", new StringReader(content));
String hightLightText = highlighter.getBestFragment(tokenStream, content);
if (hightLightText != null) {
info.setContent(hightLightText);
} else
info.setContent(content);
} else {
info.setContent(content);
}
if (doc_name != null) {
tokenStream = analyzer.tokenStream("doc_name", new StringReader(doc_name));
String hightLightText = highlighter.getBestFragment(tokenStream, doc_name);
if (hightLightText != null) {
info.setDoc_name(hightLightText);
} else
info.setDoc_name(doc_name);
} else {
info.setDoc_name(doc_name);
}
info.setDoc_id(Integer.parseInt(d.get("doc_id")));
info.setDoc_type(d.get("doc_type"));
info.setDoc_location(d.get("doc_location"));
info.setAdd_datetime(new Date(Long.parseLong(d.get("add_datetime"))));
info.setModify_datetime(new Date(Long.parseLong(d.get("modify_datetime"))));
info.setAuthor(d.get("author"));
info.setUpload_author(d.get("upload_author"));
list.add(info);
}
return list;
}
/**
*
* getTermRangeQueryByDate:<br />
* 通过时间限制,获取时间区间查询
*
* @author zhangzhaoyu
* @return
*/
private TermRangeQuery getTermRangeQueryByDate(int dateRange) {
TermRangeQuery query = null;
Date nowDate = null;
Date oldDate = null;
if (2 == dateRange) {
nowDate = new Date();
oldDate = new Date(nowDate.getTime() - 24*60*60*1000);
query = TermRangeQuery.newStringRange("add_datetime", DateTools.dateToString(oldDate, Resolution.DAY),
DateTools.dateToString(nowDate, Resolution.DAY), true, true);
} else if (3 == dateRange) {
nowDate = new Date();
oldDate = new Date(nowDate.getTime() - 24*60*60*1000*7);
query = TermRangeQuery.newStringRange("add_datetime", DateTools.dateToString(oldDate, Resolution.DAY),
DateTools.dateToString(nowDate, Resolution.DAY), true, true);
} else if (4 == dateRange) {
nowDate = new Date();
oldDate = new Date(nowDate.getTime() - 24*60*60*1000*210);
query = TermRangeQuery.newStringRange("add_datetime", DateTools.dateToString(oldDate, Resolution.DAY),
DateTools.dateToString(nowDate, Resolution.DAY), true, true);
} else if (5 == dateRange) {
nowDate = new Date();
oldDate = new Date(nowDate.getTime() - 24*60*60*1000*365);
query = TermRangeQuery.newStringRange("add_datetime", DateTools.dateToString(oldDate, Resolution.DAY),
DateTools.dateToString(nowDate, Resolution.DAY), true, true);
} else {
return query;
}
return query;
}
/**
*
* close:<br />
* 关闭reader
*
* @author zhangzhaoyu
*/
public void close() {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
分享到:
相关推荐
《Lucene搜索引擎开发权威经典》是由于天恩编著的一本深入探讨Lucene搜索引擎开发的专业书籍,这本书结合源码分析,旨在帮助读者掌握Lucene的核心技术和应用实践。Lucene是Apache软件基金会的一个开放源代码项目,它...
**Lucene搜索引擎 JSP + JAVA** Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,它提供了索引和搜索大量文本数据的能力。在这个项目中,Lucene被结合了JSP(JavaServer Pages)和JAVA技术,创建了一个...
本书基于Lucene的当前最新版本(2.1)精解了Lucene搜索引擎的相关知识,从基础知识到应用开发,精炼简洁,恰到好处。 本书包含了必要的理论,但以实践为主。所讲的理论都不是纸上谈兵,都是可以立即付诸实践进行...
《Lucene搜索-引擎开发权威经典》是一本深入解析Apache Lucene搜索引擎库的专业书籍,它为读者提供了构建高效全文搜索引擎的全面指南。Lucene是Java领域最著名的全文检索库,被广泛应用于各种信息检索系统中,包括...
《Lucene搜索引擎开发权威经典》是一本深入探讨Apache Lucene的专著,作者于天恩在书中详尽地阐述了Lucene的核心概念、工作原理以及实际应用。这本书旨在帮助读者理解如何利用Lucene构建高性能、可扩展的全文搜索...
**基于Lucene搜索引擎的Java源码详解** Lucene是一个高性能、全文检索库,它由Apache软件基金会开发并维护。此Java源码包提供了一个全面的示例,展示了如何利用Lucene进行索引创建、更新(增量索引)以及搜索操作。...
《深入理解Lucene搜索引擎项目》 Lucene是一个高性能、全文本搜索库,它为开发者提供了在Java应用程序中实现全文检索的工具集。这个名为“lucene搜索引擎项目”的资源,旨在帮助用户更好地理解和应用Lucene来构建...
**正文** ...总结,利用Lucene搜索中文PDF文档涉及多个技术层面,包括中文分词、PDF解析、索引构建、搜索执行和性能优化。通过理解这些关键技术,开发者可以构建出高效、准确的中文PDF文档检索系统。
【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...
【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...
【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...
1. **DemoData.java** - 这个文件很可能是包含测试数据或者示例数据的类,用于演示Lucene搜索功能。它可能包含了创建索引所需的文档对象,以及用于搜索的关键词。 2. **MultiPhraseQueryDemo.java** - 这个文件是多...
[Lucene搜索引擎开发权威经典].于天恩著.扫描版.7z.001[Lucene搜索引擎开发权威经典].于天恩著.扫描版.7z.001
Solr、Elasticsearch和Lucene是三个在搜索引擎领域中至关重要的技术,它们共同构建了现代数据检索的基础架构。下面将分别对这三个组件进行详细解释,并探讨它们之间的关系。 **Lucene** Lucene是一个高性能、全文本...
Lucene搜索-引擎开发权威经典pdf+源码第一部分共2个
在这个"ssh+lucene搜索实例"中,我们可以理解为结合了SSH和Lucene两个技术,以实现远程服务器上的全文检索功能。例如,可能有一个需求是在多个远程服务器上存储大量数据,而这些数据需要通过关键词进行快速搜索。在...
本书基于Lucene的当前最新版本(2.1)精解了Lucene搜索引擎的相关知识,从基础知识到应用开发,精炼简洁,恰到好处。 本书包含了必要的理论,但以实践为主。所讲的理论都不是纸上谈兵,都是可以立即付诸实践进行...