/********以下 共有字段***********/ /** * 检索的内容 */ protected String keyword; /** * 拥有者ID */ protected String owerId; /** * 拥有者name */ protected String owerName; /** * 检索对象的唯一标识位的值 */ protected String id; /** * 检索出对象后进入详情页面的链接 */ protected String link; /** * 创建时间 */ protected String createDate; /** * index类型 */ protected String indexType; //setter,getter方法省略 /********以上 共有字段***********/ /*************以下 其他字段************/ /** * 需要检索出来的字段及其值的对应map */ private Map<String, String> searchValues; /** * 值对象 */ private Object object; /** * 获取检索出来的doIndexFields字段的值 * * @return */ public Map<String, String> getSearchValues() { return searchValues; } /** * 设置检索出来的doIndexFields字段的值 * * @param searchValues */ public void setSearchValues(Map<String, String> searchValues) { this.searchValues = searchValues; } /********************以上 其他字段*******************/<span></span>
/*****************以下 抽象方法******************/ /** * 返回需要进行检索的字段 * * @return */ public abstract String[] getDoSearchFields(); /** * 进行索引的字段 * * @return */ public abstract String[] getDoIndexFields(); /** * 初始化searchBean中的公共字段(每个对象都必须创建的索引字段) * @throws Exception */ public abstract void initPublicFields() throws Exception; /** * 返回索引类型 * * @return */ public abstract String getIndexType(); /*****************以上 抽象方法********************/
/*******************以下 公共方法**********************/ /** * 获取需要创建索引字段的键值对map * * @return */ public Map<String, String> getIndexFieldValues() { if(this.object == null){ logger.warn("given object is null!"); return Collections.emptyMap(); } String[] doIndexFields = this.getDoIndexFields(); if(doIndexFields == null || doIndexFields.length < 1){ logger.debug("given no doIndexFields!"); return Collections.emptyMap(); } Map<String, String> extInfo = new HashMap<String, String>(); for(String f : doIndexFields){ String value = getValue(f, object); extInfo.put(f, value); } return extInfo; } /** * 获取一个对象中的某个字段的值,结果转化成string类型 * * @param field 字段名称 * @param obj 对象 * @return */ private String getValue(String field, Object obj){ if(StringUtils.isEmpty(field)){ logger.warn("field is empty!"); return StringUtils.EMPTY; } String result = StringUtils.EMPTY; try { Object value = ObjectUtils.getFieldValue(object, field); if (value == null) result = StringUtils.EMPTY; else if (value instanceof String) result = (String) value; else if (value instanceof Collections || value instanceof Map) result = ToStringBuilder.reflectionToString(object); else if (value instanceof Date) result = DateUtils.formatDate((Date) value); else result = value.toString(); } catch (IllegalAccessException e) { logger.error("can not find a value for field '{}' in object class '{}'!", field, object.getClass()); } return result; } /** * you must use this method when you create the index, set what object you will to be created its index! * * @param object the object which you will want to be create index */ public void setObject(Object object){ this.object = object; } /** * get what object you want to be created index! * * @return */ public Object getObject(){ return this.object; } /***************以上 公共方法*************/
package com.message.base.search.engine; import com.message.base.pagination.PaginationSupport; import com.message.base.search.SearchBean; import java.util.List; /** * 索引引擎实现构建索引.删除索引.更新索引.检索等操作. * * @author sunhao(sunhao.java@gmail.com) * @version V1.0 * @createTime 13-5-5 上午1:38 */ public interface SearchEngine { /** * 创建索引(考虑线程安全) * * @param searchBeans 对象 * @throws Exception */ public void doIndex(List<SearchBean> searchBeans) throws Exception; /** * 删除索引 * * @param bean 对象 * @throws Exception */ public void deleteIndex(SearchBean bean) throws Exception; /** * 删除索引(删除多个) * * @param beans 对象 * @throws Exception */ public void deleteIndexs(List<SearchBean> beans) throws Exception; /** * 进行检索 * * @param bean 检索对象(一般只需要放入值keyword,即用来检索的关键字) * @param isHighlighter 是否高亮 * @param start 开始值 * @param num 偏移量 * @return * @throws Exception */ public PaginationSupport doSearch(SearchBean bean, boolean isHighlighter, int start, int num) throws Exception; /** * 进行多个检索对象的检索 * * @param beans 多个检索对象(一般只需要放入值keyword,即用来检索的关键字) * @param isHighlighter 是否高亮 * @param start 开始值 * @param num 偏移量 * @return * @throws Exception */ public PaginationSupport doSearch(List<SearchBean> beans, boolean isHighlighter, int start, int num) throws Exception; /** * 删除某个类型的所有索引(考虑线程安全) * * @param clazz 索引类型 * @throws Exception */ public void deleteIndexsByIndexType(Class<? extends SearchBean> clazz) throws Exception; /** * 删除某个类型的所有索引(考虑线程安全) * * @param indexType 索引类型 * @throws Exception */ public void deleteIndexsByIndexType(String indexType) throws Exception; /** * 删除所有的索引 * * @throws Exception */ public void deleteAllIndexs() throws Exception; /** * 更新索引 * * @param searchBean 需要更新的bean * @throws Exception */ public void updateIndex(SearchBean searchBean) throws Exception; /** * 批量更新索引 * * @param searchBeans 需要更新的beans * @throws Exception */ public void updateIndexs(List<SearchBean> searchBeans) throws Exception; }
package com.message.base.search.engine; import com.message.base.pagination.PaginationSupport; import com.message.base.pagination.PaginationUtils; import com.message.base.search.SearchBean; import com.message.base.utils.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collections; /** * 搜索引擎的公用方法. * * @author sunhao(sunhao.java@gmail.com) * @version V1.0 * @createTime 13-5-8 下午10:53 */ public abstract class AbstractSearchEngine implements SearchEngine { private static final Logger logger = LoggerFactory.getLogger(AbstractSearchEngine.class); /** * 进行高亮处理时,html片段的前缀 */ private String htmlPrefix = "<p>"; /** * 进行高亮处理时,html片段的后缀 */ private String htmlSuffix = "</p>"; public String getHtmlPrefix() { return htmlPrefix; } public void setHtmlPrefix(String htmlPrefix) { this.htmlPrefix = htmlPrefix; } public String getHtmlSuffix() { return htmlSuffix; } public void setHtmlSuffix(String htmlSuffix) { this.htmlSuffix = htmlSuffix; } public PaginationSupport doSearch(SearchBean bean, boolean isHighlighter, int start, int num) throws Exception { if(bean == null){ logger.debug("given search bean is empty!"); return PaginationUtils.getNullPagination(); } return doSearch(Collections.singletonList(bean), isHighlighter, start, num); } /** * 获取index类型 * * @param bean * @return */ public String getIndexType(SearchBean bean){ return StringUtils.isNotEmpty(bean.getIndexType()) ? bean.getIndexType() : bean.getClass().getSimpleName(); } }
package com.message.base.search.engine; import com.message.base.pagination.PaginationSupport; import com.message.base.pagination.PaginationUtils; import com.message.base.search.SearchBean; import com.message.base.search.SearchInitException; import com.message.base.utils.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.BeanUtils; import java.io.File; import java.io.IOException; import java.util.*; /** * 基于lucene实现的索引引擎. * * @author sunhao(sunhao.java@gmail.com) * @version V1.0 * @createTime 13-5-5 上午10:38 */ public class LuceneSearchEngine extends AbstractSearchEngine { private static final Logger logger = LoggerFactory.getLogger(LuceneSearchEngine.class); /** * 索引存放路径 */ private String indexPath; /** * 分词器 */ private Analyzer analyzer = new SimpleAnalyzer(); public synchronized void doIndex(List<SearchBean> searchBeans) throws Exception { this.createOrUpdateIndex(searchBeans, true); } public synchronized void deleteIndex(SearchBean bean) throws Exception { if(bean == null){ logger.warn("Get search bean is empty!"); return; } String id = bean.getId(); if(StringUtils.isEmpty(id)){ logger.warn("get id and id value from bean is empty!"); return; } String indexType = getIndexType(bean); Directory indexDir = this.getIndexDir(indexType); IndexWriter writer = this.getWriter(indexDir); writer.deleteDocuments(new Term("pkId", id)); writer.commit(); this.destroy(writer); } public synchronized void deleteIndexs(List<SearchBean> beans) throws Exception { if(beans == null){ logger.warn("Get beans is empty!"); return; } for(SearchBean bean : beans){ this.deleteIndex(bean); } } public PaginationSupport doSearch(List<SearchBean> beans, boolean isHighlighter, int start, int num) throws Exception { if(beans == null || beans.isEmpty()){ logger.debug("given search beans is empty!"); return PaginationUtils.getNullPagination(); } List queryResults = new ArrayList(); int count = 0; for(SearchBean bean : beans){ String indexType = getIndexType(bean); IndexReader reader = IndexReader.open(this.getIndexDir(indexType)); List<String> fieldNames = new ArrayList<String>(); //查询的字段名 List<String> queryValue = new ArrayList<String>(); //待查询字段的值 List<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>(); //要进行检索的字段 String[] doSearchFields = bean.getDoSearchFields(); if(doSearchFields == null || doSearchFields.length == 0) return PaginationUtils.getNullPagination(); //默认字段 if(StringUtils.isNotEmpty(bean.getKeyword())){ for(String field : doSearchFields){ fieldNames.add(field); queryValue.add(bean.getKeyword()); flags.add(BooleanClause.Occur.SHOULD); } } Query query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queryValue.toArray(new String[]{}), fieldNames.toArray(new String[]{}), flags.toArray(new BooleanClause.Occur[]{}), analyzer); logger.debug("make query string is '{}'!", query.toString()); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] scoreDocs = searcher.search(query, 1000000).scoreDocs; //查询起始记录位置 int begin = (start == -1 && num == -1) ? 0 : start; //查询终止记录位置 int end = (start == -1 && num == -1) ? scoreDocs.length : Math.min(begin + num, scoreDocs.length); //高亮处理 Highlighter highlighter = null; if(isHighlighter){ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(this.getHtmlPrefix(), this.getHtmlSuffix()); highlighter = new Highlighter(formatter, new QueryScorer(query)); } List<SearchBean> results = new ArrayList<SearchBean>(); for (int i = begin; i < end; i++) { SearchBean result = BeanUtils.instantiate(bean.getClass()); int docID = scoreDocs[i].doc; Document hitDoc = searcher.doc(docID); result.setId(hitDoc.get("pkId")); result.setLink(hitDoc.get("link")); result.setOwerId(hitDoc.get("owerId")); result.setOwerName(hitDoc.get("owerName")); result.setCreateDate(hitDoc.get("createDate")); result.setIndexType(indexType); String keyword = StringUtils.EMPTY; if(isHighlighter && highlighter != null) keyword = highlighter.getBestFragment(analyzer, "keyword", hitDoc.get("keyword")); if(StringUtils.isEmpty(keyword)) keyword = hitDoc.get("keyword"); result.setKeyword(keyword); Map<String, String> extendValues = new HashMap<String, String>(); for(String field : doSearchFields){ String value = hitDoc.get(field); if(isHighlighter && highlighter != null) value = highlighter.getBestFragment(analyzer, field, hitDoc.get(field)); if(StringUtils.isEmpty(value)) value = hitDoc.get(field); extendValues.put(field, value); } result.setSearchValues(extendValues); results.add(result); } queryResults.addAll(results); count += scoreDocs.length; searcher.close(); reader.close(); } PaginationSupport paginationSupport = PaginationUtils.makePagination(queryResults, count, num, start); return paginationSupport; } public synchronized void deleteIndexsByIndexType(Class<? extends SearchBean> clazz) throws Exception { String indexType = getIndexType(BeanUtils.instantiate(clazz)); this.deleteIndexsByIndexType(indexType); } public synchronized void deleteIndexsByIndexType(String indexType) throws Exception { //传入readOnly的参数,默认是只读的 IndexReader reader = IndexReader.open(this.getIndexDir(indexType), false); int result = reader.deleteDocuments(new Term("indexType", indexType)); reader.close(); logger.debug("the rows of delete index is '{}'! index type is '{}'!", result, indexType); } public synchronized void deleteAllIndexs() throws Exception { File indexFolder = new File(this.indexPath); if(indexFolder == null || !indexFolder.isDirectory()){ //不存在或者不是文件夹 logger.debug("indexPath is not a folder! indexPath: '{}'!", indexPath); return; } File[] children = indexFolder.listFiles(); for(File child : children){ if(child == null || !child.isDirectory()) continue; String indexType = child.getName(); logger.debug("Get indexType is '{}'!", indexType); this.deleteIndexsByIndexType(indexType); } } public void updateIndex(SearchBean searchBean) throws Exception { this.updateIndexs(Collections.singletonList(searchBean)); } public void updateIndexs(List<SearchBean> searchBeans) throws Exception { this.createOrUpdateIndex(searchBeans, false); } /** * 创建或者更新索引 * * @param searchBeans 需要创建或者更新的对象 * @param isCreate 是否是创建索引;true创建索引,false更新索引 * @throws Exception */ private synchronized void createOrUpdateIndex(List<SearchBean> searchBeans, boolean isCreate) throws Exception { if(searchBeans == null || searchBeans.isEmpty()){ logger.debug("do no index!"); return; } Directory indexDir = null; IndexWriter writer = null; for(Iterator<SearchBean> it = searchBeans.iterator(); it.hasNext(); ){ SearchBean sb = it.next(); String indexType = getIndexType(sb); if(sb == null){ logger.debug("give SearchBean is null!"); return; } boolean anotherSearchBean = indexDir != null && !indexType.equals(((FSDirectory) indexDir).getFile().getName()); if(indexDir == null || anotherSearchBean){ indexDir = this.getIndexDir(indexType); } if(writer == null || anotherSearchBean){ this.destroy(writer); writer = this.getWriter(indexDir); } Document doc = new Document(); //初始化一些字段 sb.initPublicFields(); String id = sb.getId(); //主键的索引,不作为搜索字段,并且也不进行分词 Field idField = new Field("pkId", id, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(idField); logger.debug("create id index for '{}', value is '{}'! index is '{}'!", new Object[]{"pkId", id, idField}); String owerId = sb.getOwerId(); if(StringUtils.isEmpty(owerId)){ throw new SearchInitException("you must give a owerId"); } Field owerId_ = new Field("owerId", owerId, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(owerId_); String owerName = sb.getOwerName(); if(StringUtils.isEmpty(owerName)){ throw new SearchInitException("you must give a owerName"); } Field owerName_ = new Field("owerName", owerName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(owerName_); String link = sb.getLink(); if(StringUtils.isEmpty(link)){ throw new SearchInitException("you must give a link"); } Field link_ = new Field("link", link, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(link_); String keyword = sb.getKeyword(); if(StringUtils.isEmpty(keyword)){ throw new SearchInitException("you must give a keyword"); } Field keyword_ = new Field("keyword", keyword, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(keyword_); String createDate = sb.getCreateDate(); if(StringUtils.isEmpty(createDate)){ throw new SearchInitException("you must give a createDate"); } Field createDate_ = new Field("createDate", createDate, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(createDate_); //索引类型字段 Field indexType_ = new Field("indexType", indexType, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(indexType_); //进行索引的字段 String[] doIndexFields = sb.getDoIndexFields(); Map<String, String> indexFieldValues = sb.getIndexFieldValues(); if(doIndexFields != null && doIndexFields.length > 0){ for(String field : doIndexFields){ Field extInfoField = new Field(field, indexFieldValues.get(field), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(extInfoField); } } if(isCreate) writer.addDocument(doc); else writer.updateDocument(new Term("pkId", sb.getId()), doc); writer.optimize(); } this.destroy(writer); logger.debug("create or update index success!"); } public Directory getIndexDir(String suffix) throws Exception { return FSDirectory.open(new File(indexPath + File.separator + suffix)); } public IndexWriter getWriter(Directory indexDir) throws IOException { return new IndexWriter(indexDir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); } public void destroy(IndexWriter writer) throws Exception { if(writer != null) writer.close(); } public void setIndexPath(String indexPath) { this.indexPath = indexPath; } public void setAnalyzer(Analyzer analyzer) { this.analyzer = analyzer; } }
FSDirectory.open(new File("D:\index\xxx"/**一个不存在的目录,或者是一个不是索引的目录**/));
在"lucene+api搜索引擎引工具源码"项目中,开发者可能已经封装了 Lucene 的核心功能,使其更易于在 C#.NET 平台上使用。源码可能会包含以下关键组件和概念: 1. **索引创建**:Lucene 允许开发者将文本数据(如文档...
