Lucene 扩展

Weich_JavaDeveloper

浏览: 100093 次
性别:
来自: 北京

最近访客更多访客>>

孔已己

hotsunshine

zcm1205

hereis00

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

LUCENE

lucene Apache Java Bean log4j

目的：在Lucene上扩展创建索引和查询索引功能。（针对数据库）
所需jar包：
lucene-core-2.4.1.jar
lucene-highlighter-2.4.1.jar
log4j-1.2.14.jar
commons-beanutils-1.5.jar
commons-collections-2.1.1.jar
commons-logging-1.0.4.jar

清单一：DocumentFactory.java

/*
 * @(#)Documents.java 2009-10-09 
 */
package com.ordinov.lucene;

import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

/**
 * 索引文件内容创建类
 * 
 * @author weich
 * @Date 2009-10-09
 *
 */
public class DocumentFactory {
	

	/**
	 * 获取数据内容用来建立索引（针对单个对象）<br>
	 * 默认Bean类中第一个属性处理方式为：建立索引但是不使用分词
	 * 
	 * @param <T> 实体Bean对象
	 * @param fields Bean对象的Field属性
	 * @param obj 需要转换的Bean对象
	 * @return 
	 * @throws java.io.FileNotFoundException
	 * @throws IllegalAccessException
	 * @throws InvocationTargetException
	 * @throws NoSuchMethodException
	 */
    public static <T> Document getDataDocument(java.lang.reflect.Field[] fields,T obj)throws java.io.FileNotFoundException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    	
        Document doc = new Document();
        doc.add(new Field(fields[0].getName(),BeanUtils.getProperty(obj, fields[0].getName()), Field.Store.YES, Field.Index.NOT_ANALYZED));
        for(int i =1; i < fields.length; i++){
        	doc.add(new Field(fields[i].getName(),BeanUtils.getProperty(obj, fields[i].getName()), Field.Store.YES,  Field.Index.ANALYZED));
        }
        return doc;
     }

    /**
     * 获取数据内容用来建立索引（针对多个对象）<br>
     * 默认Bean类中第一个属性处理方式为：建立索引但是不使用分词
     * 
     * @param <T> 实体Bean对象
     * @param cls Bean类的Class对象
     * @param objs 需要转换的Bean对象数组
     * @return
     * @throws java.io.FileNotFoundException
     * @throws IllegalAccessException
     * @throws InvocationTargetException
     * @throws NoSuchMethodException
     */
    public static <T> Document[] getDataDocuments(Class<T> cls,T[] objs)throws java.io.FileNotFoundException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    	
    	List<Document> docs= new ArrayList<Document>();   
    	java.lang.reflect.Field[] fields = cls.getDeclaredFields();
    	for(T obj : objs)
    		docs.add(getDataDocument(fields,obj));
        return docs.toArray(new Document[0]);
     }
}

清单二：IndexManger.java

/*
 * @(#)IndexFactory.java 2009-10-09 
 */
package com.ordinov.lucene;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;

/**
 * 检索工厂类<br>
 * 主要负责索引的创建工作，提供检索类调用。
 * 
 * @author weich
 * @Date 2009-10-09
 * 
 */
public class IndexManger {

	/** 日志记录器 */
	static private Logger logger = Logger.getLogger(IndexManger.class);
	/** 分词器 StandardAnalyzer 按字分词,支持中文分词 */
	private StandardAnalyzer analyzer = null;
	/** 索引写出类对象主要负责索引的创建 */
	private IndexWriter writer = null;
	/** 是否新建索引 true-新建索引，false-追加索引。默认为true */
	private boolean isNewCreat = true;
	/** 索引文件保存路径 */
	private String indexPath= null;
	/** 对搜索结果内容进行过滤，可以禁止搜索部分词汇 */
	private String [] stopStrs = {};

	/**
	 * IndexFactory构造器 <br>
	 * 初始化创建索引文件时必须的一些属性
	 * 
	 * @throws IOException 
	 * @throws LockObtainFailedException 
	 * @throws CorruptIndexException 
	 * 
	 */
	public IndexManger() throws CorruptIndexException, LockObtainFailedException, IOException{

		/* 初始化所需对象实例 */
		init();
	}

	/**
	 * IndexFactory 构造器<br>
	 * 初始化创建索引文件时必须的一些属性
	 * 
	 * @param indexPath 索引文件保存路径
	 * @param stopStrs 搜索结果过滤词汇
	 * @param isCreat 是否新创建索引
	 * 
	 * @throws CorruptIndexException
	 * @throws LockObtainFailedException
	 * @throws IOException
	 */
	public IndexManger(String indexPath,String [] stopStrs,boolean isCreat) throws CorruptIndexException, LockObtainFailedException, IOException{

		if(indexPath != null && !"".equals(indexPath)){
			this.indexPath=indexPath;
		}
		if(stopStrs != null && stopStrs.length > 0){
			this.stopStrs=stopStrs;
		}
		this.isNewCreat=isCreat;
		/* 初始化所需对象实例 */
		init();
	}

	/**
	 * 初始化对象实例<br>
	 * 创建分词器对象以及索引写出对象
	 * 
	 * @throws CorruptIndexException
	 * @throws LockObtainFailedException
	 * @throws IOException
	 */
	private void init() throws CorruptIndexException, LockObtainFailedException, IOException{

		analyzer=new StandardAnalyzer(stopStrs);
		writer= new IndexWriter(new File(indexPath),analyzer,this.isNewCreat,IndexWriter.MaxFieldLength.UNLIMITED);
	}

	/**
	 * 创建索引文件
	 * 
	 * @param docs 需要添加到
	 * 
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 */
	private void addDocs(Document[] docs) throws CorruptIndexException, IOException{

		if(docs != null && docs.length > 0){
			for(int i=0; i<docs.length;i++){
				/* 向IndexWriter对象中加入Document记录 */
				this.addDoc(docs[i]);
			}
		}
	}

	/**
	 * 向IndexWriter对象中添加一条Document记录 
	 * 
	 * @param doc 需要在
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 */
	private void addDoc(Document doc) throws CorruptIndexException, IOException{

		/* 向IndexWriter对象中加入Document记录 */
		writer.addDocument(doc);
	}

	/**
	 * 在磁盘上创建索引文件,并优化合并,最后会关闭IndexWriter对象
	 * 
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 */
	private void close() throws CorruptIndexException, IOException{

		logger.debug("关闭索引写出对象实例...");
		/* 将缓存中索引文件写入磁盘,并优化合并。 */
		writer.optimize(); 
		/* 关闭IndexWriter对象 */
		writer.close();
	}

	/**
	 * 创建索引根据用户指定的类型
	 * 
	 * @param <T>
	 * @param cls Bean类的Class对象
	 * @param objs Bean对象数组
	 * @throws CorruptIndexException
	 * @throws FileNotFoundException
	 * @throws IOException
	 * @throws IllegalAccessException
	 * @throws InvocationTargetException
	 * @throws NoSuchMethodException
	 */
	public <T> void createIndex(Class<T> cls,T[] objs) throws CorruptIndexException, FileNotFoundException, IOException, IllegalAccessException, InvocationTargetException, NoSuchMethodException{

		this.addDocs(DocumentFactory.getDataDocuments(cls, objs));
		/* 关闭索引写出对象 */
		this.close();
	}
}

清单三：SerchIndex.java

package com.ordinov.lucene;

import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

public class SerchIndex {

	/**
	 * 执行查询方法
	 * 
	 * @param <T> 需要转换的Bean类型
	 * @param cls Bean类的class对象
	 * @param keyword 关键字
	 * @param indexPath 索引所在的目录
	 * @param rowCount 每页显示的记录数
	 * @param current 当前需要查看的页数
	 * @return
	 * @throws Exception
	 */
	public <T> List<T> initSearch(Class<T> cls, String keyword, int rowCount,int current, String... indexPaths)throws Exception{

		/* 用来保存填充完毕的返回对象 */
		List<T> objs = new ArrayList<T>();
		/* 分词器 StandardAnalyzer 按字分词,支持中文分词 */
		Analyzer analyzer = new StandardAnalyzer();
		Field[] fields = cls.getDeclaredFields();
		/* 关键字都去匹配那些列 */
		String[] colmuns = new String[fields.length];
		/* 查询关键字 */
		String[] keyWords = new String[fields.length];
		for(int i =0;i < fields.length; i++){
			colmuns[i] = fields[i].getName();
			keyWords[i] = keyword;
		}
		if(indexPaths == null || indexPaths.length <= 0){
			return objs;
		}
		IndexSearcher[] searchers = new IndexSearcher[indexPaths.length];
		for(int i = 0; i < indexPaths.length; i++){
			/* 索引读取对象 */
			IndexReader reader = IndexReader.open(indexPaths[i]);
			/* 创建索引查询对象 */
			searchers[i] = new IndexSearcher(reader);
		}
		MultiSearcher multisearcher = new MultiSearcher(searchers);
		Query query = MultiFieldQueryParser.parse(keyWords, colmuns, analyzer);
		/* 缓冲记录数 */
		TopDocCollector collector = new TopDocCollector(rowCount);
		/* 执行查询 */
		multisearcher.search(query,collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs; 

		for(int i = (current - 1) * rowCount;i<current * rowCount; i++){
			int docId = hits[i].doc;
			Document doc = multisearcher.doc(docId);
			T obj = cls.newInstance();
			for(int j =0;j < fields.length; j++){
				String str = doc.get(fields[j].getName());
				/* 搜索关键字高亮处理 */
				SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter("<font color='red'>", "</font>");   
				Highlighter highlighter = new Highlighter(sHtmlF,new QueryScorer(query));   
				highlighter.setTextFragmenter(new SimpleFragmenter(100)); 
				if (str != null && !"".equals(str)) {   
					TokenStream tokenStream = analyzer.tokenStream(fields[j].getName(), new StringReader(str));
					String value  = highlighter.getBestFragment(tokenStream, str);
					/* 如果不存在关键字依然显示 */
					if(value != null && !"".equals(value)){
						BeanUtils.setProperty(obj, fields[j].getName(), value);
					}else{
						BeanUtils.setProperty(obj, fields[j].getName(), str);
					}
				}
			}
			objs.add(obj);
		}
		multisearcher.close();
		return objs;
	}
}

分享到：

Java 编程的动态性，第 1 部分: 类和类装 ... | Lucene Payload 的研究与应用

2009-12-15 10:51
浏览 1622
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene 扩展

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene 扩展

评论

发表评论

相关推荐

使用 Apache Solr 实现更加灵巧的搜索，第 2 部分: 用于企业的 Solr

使用 Apache Solr 实现更加灵巧的搜索，第 1 部分: 基本特性和 Solr 模式

Lucene Payload 的研究与应用

使用 Apache Lucene 搜索文本

深入Lucene索引机制

Lucene 简介

最近访客更多访客>>