`

lucene全文检索应用示例及代码简析

 
阅读更多
<iframe align="center" marginwidth="0" marginheight="0" src="http://www.zealware.com/csdnblog336280.html" frameborder="0" width="336" scrolling="no" height="280"></iframe>
  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




  Lucene是apache软件基金会 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包及架构,提供了完整的查询引擎和索引引擎,实现了一些通用的分词算法,预留很多词法分析器接口。本文以myrss.easyjf.com网站系统中使用Lucene实现全文检索的代码为例,简单演示Lucene在实际项目中的应用。
  使用Lucene实现全文检索,主要有下面三个步骤:
  1、建立索引库:根据网站新闻信息库中的已有的数据资料建立Lucene索引文件。
  2、通过索引库搜索:有了索引后,即可使用标准的词法分析器或直接的词法分析器实现进行全文检索。
  3、维护索引库:网站新闻信息库中的信息会不断的变动,包括新增、修改及删除等,这些信息的变动都需要进一步反映到Lucene索引文件中。
下面是myrss.easyjf.com相关代码!
一、索引管理(建立及维护)
  索引管理类MyRssIndexManage主要实现根据网站信息库中的数据建立索引,维护索引等。由于索引的过程需要消耗一定的时间,因此,索引管理类实现Runnable接口,使得我们可以在程序中开新线程来运行。
package com.easyjf.lucene;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.dbo.EasyJDB;
import com.easyjf.news.business.NewsDir;
import com.easyjf.news.business.NewsDoc;
import com.easyjf.news.business.NewsUtil;
import com.easyjf.web.tools.IPageList;
public class MyRssIndexManage implements Runnable {
private String indexDir;
private String indexType="add";
public void run() {
// TODO Auto-generated method stub
if("add".equals(indexType))
normalIndex();
else if ("init".equals(indexType)) reIndexAll();
}
public void normalIndex()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),false);
//NewsDir dir=NewsDir.readBySn();
String scope="(needIndexIPageList pList=NewsUtil.pageList(scope,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(scope,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where "+scope);
Date end = new Date();
System.out.print("新增索引"+num+"条信息,一共花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void reIndexAll()
{
try{
Date start = new Date();
int num=0;
IndexWriter writer=new IndexWriter(indexDir,new StandardAnalyzer(),true);
NewsDir dir=NewsDir.readBySn("easyjf");
IPageList pList=NewsUtil.pageList(dir,1,50);
for(int p=0;p<plist.getpages></plist.getpages>{
pList=NewsUtil.pageList(dir,p,100);
List list=pList.getResult();
for(int i=0;i<list.size></list.size>{
NewsDoc doc=(NewsDoc)list.get(i);
writer.addDocument(newsdoc2lucenedoc(doc));
num++;
}
}
writer.optimize();
writer.close();
EasyJDB.getInstance().execute("update NewsDoc set needIndex=2 where dirPath like 'easyjf%'");
Date end = new Date();
System.out.print("全部重新做了一次索引,一共处理了"+num+"条信息,花:"+(end.getTime() - start.getTime())/60000+"分钟!");
}
catch(Exception e)
{
e.printStackTrace();
}
}
private Document newsdoc2lucenedoc(NewsDoc doc)
{
Document lDoc=new Document();
lDoc.add(new Field("title",doc.getTitle(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("content",doc.getContent(),Field.Store.YES,Field.Index.TOKENIZED));
lDoc.add(new Field("url",doc.getRemark(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("cid",doc.getCid(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("source",doc.getSource(),Field.Store.YES,Field.Index.NO));
lDoc.add(new Field("inputTime",doc.getInputTime().toString(),Field.Store.YES,Field.Index.NO));
return lDoc;
}
public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}

public String getIndexType() {
return indexType;
}
public void setIndexType(String indexType) {
this.indexType = indexType;
}
}
二、使用Lucene实现全文搜索
下面是MyRssSearch类的源码,该类主要实现使用Lucene中Searcher及QueryParser实现从索引库中搜索关键词。
package com.easyjf.lucene;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import com.easyjf.search.MyRssUtil;
import com.easyjf.search.SearchContent;
import com.easyjf.web.tools.IPageList;
import com.easyjf.web.tools.PageList;
public class MyRssSearch {
private String indexDir;
IndexReader ir;
Searcher search;
public IPageList search(String key,int pageSize,int currentPage)
{
IPageList pList=new PageList(new HitsQuery(doSearch(key)));
pList.doList(pageSize,currentPage,"","",null);
if(pList!=null)
{
List list=pList.getResult();
if(list!=null){
for(int i=0;i<list.size></list.size>{
list.set(i,lucene2searchObj((Document)list.get(i),key));
}
}
}
try{
if(search!=null)search.close();
if(ir!=null)ir.close();
}
catch(Exception e)
{
e.printStackTrace();
}
return pList;
}
private SearchContent lucene2searchObj(Document doc,String key)
{
SearchContent searchObj=new SearchContent();
String title=doc.getField("title").stringValue();
searchObj.setTitle(title.replaceAll(key,""+key+""));
searchObj.setTvalue(doc.getField("cid").stringValue());
searchObj.setUrl(doc.getField("url").stringValue());
searchObj.setSource(doc.getField("source").stringValue());
searchObj.setLastUpdated(doc.getField("inputTime").stringValue());
searchObj.setIntro(MyRssUtil.content2intro(doc.getField("content").stringValue(),key));
return searchObj;
}
public Hits doSearch(String key)
{
Hits hits=null;
try{
ir=IndexReader.open(indexDir);
search=new IndexSearcher(ir);
String fields[]={"title","content"};
QueryParser parser=new MultiFieldQueryParser(fields,new StandardAnalyzer());
Query query=parser.parse(key);
hits=search.search(query);
}
catch(Exception e)
{
e.printStackTrace();
}
//System.out.println("搜索结果:"+hits.length());
return hits;
}

public String getIndexDir() {
return indexDir;
}
public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}
  在上面的代码中,search方法返回一个封装了分页查询结果的IPageList,IPageList是EasyJWeb Tools业务引擎中的分页引擎,对于IPageList的使用,请看本人写的这篇文章《EasyJWeb Tools中业务引擎分页的设计实现》:

  我们针对Lucene的的查询结果Hits结构,写了一个查询器HitsQuery。代码如下所示:
package com.easyjf.lucene;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.search.Hits;
import com.easyjf.web.tools.IQuery;
public class HitsQuery implements IQuery {
private int begin=0;
private int max=0;
private Hits hits;
public HitsQuery()
{

}
public HitsQuery(Hits hits)
{
if(hits!=null)
{
this.hits=hits;
this.max=hits.length();
}
}
public int getRows(String arg0) {
// TODO Auto-generated method stub
return (hits==null?0:hits.length());
}
public List getResult(String arg0) {
// TODO Auto-generated method stub
List list=new ArrayList();
for(int i=begin;i{
try{
list.add(hits.doc(i));
}
catch(Exception e)
{
e.printStackTrace();
}
}
return list;
}
public void setFirstResult(int begin) {
// TODO Auto-generated method stub
this.begin=begin;
}
public void setMaxResults(int max) {
// TODO Auto-generated method stub
this.max=max;
}
public void setParaValues(Collection arg0) {
// TODO Auto-generated method stub

}
public List getResult(String condition, int begin, int max) {
// TODO Auto-generated method stub
if((begin>=0)&&(begin<max></max>if(!(max>hits.length()))this.max=max;
return getResult(condition);
}
}
三、Web调用
  下面我们来看看在Web中如果调用商业逻辑层的全文检索功能。下面是处理用户请请的Action中关于搜索部分的源码:
package com.easyjf.news.action;
public class SearchAction implements IWebAction {
public Page doSearch(WebForm form,Module module)throws Exception
{
String key=CommUtil.null2String(form.get("v"));
key=URLDecoder.decode(URLEncoder.encode(key,"ISO8859_1"),"utf-8");
form.set("v",key);
form.addResult("v2",URLEncoder.encode(key,"utf-8"));
if(key.getBytes().length>2){
String orderBy=CommUtil.null2String(form.get("order"));
int currentPage=CommUtil.null2Int(form.get("page"));
int pageSize=CommUtil.null2Int(form.get("pageSize"));
if(currentPageif(pageSizeSearchEngine search=new SearchEngine(key,orderBy,pageSize,currentPage);
search.getLuceneSearch().setIndexDir(Globals.APP_BASE_DIR+"/WEB-INF/index");
search.doSearchByLucene();
IPageList pList=search.getResult();
if(pList!=null && pList.getRowCount()>0){
form.addResult("list",pList.getResult());
form.addResult("pages",new Integer(pList.getPages()));
form.addResult("rows",new Integer(pList.getRowCount()));
form.addResult("page",new Integer(pList.getCurrentPage()));
form.addResult("gotoPageHTML",CommUtil.showPageHtml(pList.getCurrentPage(),pList.getPages()));
}
else
{
form.addResult("notFound","true");//找不到数据
}
}
else
form.addResult("errMsg","您输入的关键字太短!");
form.addResult("hotSearch",SearchEngine.getHotSearch(20));
return null;
}
}
其中调用的SearchEngine类中有关Lucene部分的源码:
public class SearchEngine {
private MyRssSearch luceneSearch=new MyRssSearch();
public void doSearchByLucene()
{
SearchKey keyObj=readCache();
if(keyObj!=null){
result=luceneSearch.search(key,pageSize,currentPage);
if(updateStatus){
keyObj.setReadTimes(new Integer(keyObj.getReadTimes().intValue()+1));
keyObj.update();
}
}
else//缓存中没有该关键字信息,生成关键字搜索结果
{
keyObj=new SearchKey();
keyObj.setTitle(key);
keyObj.setLastUpdated(new Date());
keyObj.setReadTimes(new Integer(1));
keyObj.setStatus(new Integer(0));
keyObj.setSequence(new Integer(1));
keyObj.setVdate(new Date());
keyObj.save();
result=luceneSearch.search(key,pageSize,currentPage);;

}
}
}
四、程序演示效果
  这是EasyJF团队官方网站上提供java信息搜索的myrss.easyjf.com的运行效果。




分享到:
评论

相关推荐

    Lucene全文检索引擎

    **Lucene全文检索引擎** Lucene是Apache软件基金会的一个开源项目,...总结来说,Lucene是一个强大的全文检索引擎,通过理解和掌握其工作原理及API,开发者可以构建出高效、灵活的搜索功能,满足各种复杂的搜索需求。

    lucene全文检索简单索引和搜索实例

    Lucene是Apache软件基金会的开源项目,是一款强大的全文检索库,被广泛应用于Java开发中,为开发者提供了构建高性能搜索引擎的能力。在本文中,我们将深入探讨如何基于Lucene 2.4版本创建简单的全文索引并进行搜索...

    lucene 全文检索

    在给定的压缩包中,`src` 文件夹可能包含了Lucene的Java源代码示例,`WebRoot` 可能是Web应用程序的根目录,这表明示例可能是如何在Web环境下集成Lucene进行全文检索的。`.classpath`、`.project` 和 `.settings` ...

    lucene全文检索全面教程

    - **基于Java的全文索引引擎--Lucene.ppt**:这份PPT可能详细解释了如何在Java应用中集成Lucene,包括代码示例和最佳实践。 通过阅读这些文档和实际操作,你将能深入了解Lucene的用法,并能够根据需求构建自己的...

    lucene深入浅出

    - "Lucene的例子" 和 "lucene全文检索应用示例及代码简析" 提供了实际操作的例子,涵盖了基本的索引创建、搜索和更新操作,帮助理解Lucene API的使用方法。 - "Lucene+hibernate+spring配置心得" 描述了如何将这三...

    使用lucene全文检索数据库

    它是Java编写的,能够帮助开发者在各种应用程序中实现强大的全文检索功能。在这个项目中,我们将探讨如何利用Lucene 2.4.0版本与Access数据库结合,实现对数据库内容的全文检索。 首先,我们需要理解Lucene的基本...

    lucene全文检索

    采用lucene全文检索技术,对文档进行全文检索,支持.java,.txt,.xml,.xls等文件的检索。采用mysql数据库。数据库并不包含在系统中,需要用户自己创建,数据库的配置在config/DB.properties文件中。能够检索出字符串...

    lucene全文检索word2007

    Lucene全文检索Word2007示例项目提供了一个实用的方法,将复杂的文本搜索功能引入到Java应用中,尤其适用于处理大量的文档数据。通过结合使用Lucene和Apache POI,我们可以高效地索引和检索Word2007文档内容,为用户...

    SpringMvc+Lucene全文检索

    **Spring MVC + Lucene 全文检索** 在现代Web应用中,实现高效的全文检索功能是提升用户体验的关键之一。本文将详细介绍如何使用Spring MVC框架结合Apache Lucene库来构建一个强大的全文检索系统。首先,让我们了解...

    Lucene.Net 实现全文检索

    4. **全文检索**:用户通过统一的输入框提交查询,应用接收到请求后,使用 Lucene.Net 的 `QueryParser` 类构建查询对象。然后,通过 `IndexSearcher` 对查询进行执行,并返回匹配的文档。搜索结果通常按照相关性...

    Lucene 全文检索的 各种例子

    在`lucene-test-master`这个压缩包中,我们可以期待找到一系列示例代码,演示如何使用上述步骤和不同分词器来实现Lucene的全文检索功能。这些示例将有助于开发者理解如何在实际项目中集成和调整Lucene,以满足特定的...

    Lucene全文检索案例

    这个案例将深入探讨如何使用Lucene进行全文检索,帮助你理解和应用其核心概念。 1. **安装与依赖** 首先,你需要在项目中引入Lucene的依赖。如果你使用的是Maven,可以在pom.xml文件中添加对应的Lucene依赖。目前...

    lucene全文检索案例源码

    在信息技术领域,全文检索技术是搜索引擎的核心,而Lucene作为Java平台上的一个开源全文检索库,被广泛应用于各种搜索应用中。本篇将围绕“lucene全文检索案例源码”展开,深入探讨Lucene的工作原理及其在实际案例中...

    Lucene全文检索框架+Solr搜索引擎(2018版.Java)

    **Lucene全文检索框架** Lucene是一个开源的Java全文检索库,由Apache软件基金会开发。它提供了文本分析、索引创建、文档检索等核心功能,是构建高效、可扩展搜索应用的基础。Lucene的主要特点包括: 1. **高速...

    lucene全文检索引擎资料包(有项目)

    Lucene是Apache软件基金会的一个开放源代码全文搜索引擎库,它为开发者提供了在各种应用程序中实现全文搜索功能的基础架构。这个资料包显然包含了关于Lucene的相关信息以及可能的实践项目,让我们来深入了解一下...

    lucene全文检索教程

    **Lucene全文检索教程** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。这个教程,"Lucene In Action, 2nd Edition MEAP"(预先发行版),提供了一个深入理解如何利用Lucene进行全文检索的宝贵...

    基于Lucene的全文检索引擎研究与应用.pdf

    ### 基于Lucene的全文检索引擎研究与应用 #### 概述 随着信息技术的飞速发展,尤其是互联网的普及,企业和个人积累了大量的电子文档。如何高效地管理和检索这些文档成为了亟待解决的问题。全文检索技术作为一种...

Global site tag (gtag.js) - Google Analytics