lucene第二步，lucene搜索 -

zcwfeng

浏览: 106565 次
性别:
来自: 吉林

最近访客更多访客>>

NOthingAj

csqstronger

iseejun

wanterwanter

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

lucene第二步，lucene搜索

出自：http://blog.csdn.net/wxwzy738/article/details/8799656 的整理

1、工程结构

2、查询语法代码

[java] view plain copy

packageorg.itat.index;
importjava.io.File;
importjava.io.IOException;
importjava.io.StringReader;
importjava.text.ParseException;
importjava.text.SimpleDateFormat;
importjava.util.Date;
importjava.util.HashMap;
importjava.util.Map;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.TokenStream;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.analysis.tokenattributes.CharTermAttribute;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.document.NumericField;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.IndexWriterConfig;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.BooleanClause.Occur;
importorg.apache.lucene.search.BooleanQuery;
importorg.apache.lucene.search.FuzzyQuery;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.NumericRangeQuery;
importorg.apache.lucene.search.PhraseQuery;
importorg.apache.lucene.search.PrefixQuery;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.ScoreDoc;
importorg.apache.lucene.search.TermQuery;
importorg.apache.lucene.search.TermRangeQuery;
importorg.apache.lucene.search.TopDocs;
importorg.apache.lucene.search.WildcardQuery;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.store.LockObtainFailedException;
importorg.apache.lucene.util.Version;
importorg.wltea.analyzer.lucene.IKAnalyzer;
publicclassSearcherUtil{
privateDirectorydirectory;
privateAnalyzeranalyzer=newIKAnalyzer();
privateIndexReaderreader;
privateString[]ids={"1","2","3","4","5","6"};
privateString[]emails={"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
privateString[]contents={
"welcometovisitedthespace,Ilikebook",
"helloboy,Ilikepingpengball",
"mynameisccIlikegame",
"Ilikefootball",
"IlikefootballandIlikebasketballtoo",
"Ilikemovieandswim"
};
privateDate[]dates=null;
privateint[]attachs={2,3,1,4,5,5};
privateString[]names={"zhangsan","lisi","john","jetty","mike","jake"};
privateMap<String,Float>scores=newHashMap<String,Float>();
publicSearcherUtil(){
//directory=newRAMDirectory();
try{
directory=FSDirectory.open(newFile("F:\\Workspaces\\lucenes\\02_lucene_searcher\\index"));
setDates();
scores.put("itat.org",2.0f);
scores.put("zttc.edu",1.5f);
//index();
}catch(IOExceptione){
e.printStackTrace();
}
}
privatevoidsetDates(){
SimpleDateFormatsdf=newSimpleDateFormat("yyyy-MM-dd");
try{
dates=newDate[ids.length];
dates[0]=sdf.parse("2010-02-19");
dates[1]=sdf.parse("2012-01-11");
dates[2]=sdf.parse("2011-09-19");
dates[3]=sdf.parse("2010-12-22");
dates[4]=sdf.parse("2012-01-01");
dates[5]=sdf.parse("2011-05-19");
}catch(ParseExceptione){
e.printStackTrace();
}
}
publicvoidindex(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
writer.deleteAll();
Documentdoc=null;
for(inti=0;i<ids.length;i++){
doc=newDocument();
doc.add(newField("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(newField("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(newField("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(newField("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//存储数字
doc.add(newNumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
//存储日期
doc.add(newNumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
Stringet=emails[i].substring(emails[i].lastIndexOf("@")+1);
if(scores.containsKey(et)){
doc.setBoost(scores.get(et));
}else{
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicIndexSearchergetSearcher(){
try{
if(reader==null){
reader=IndexReader.open(directory);
}else{
IndexReadertr=IndexReader.openIfChanged(reader);
if(tr!=null){
reader.close();
reader=tr;
}
}
returnnewIndexSearcher(reader);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
returnnull;
}
publicIndexSearchergetSearcher(Directorydirectory){
try{
if(reader==null){
reader=IndexReader.open(directory);
}else{
IndexReadertr=IndexReader.openIfChanged(reader);
if(tr!=null){
reader.close();
reader=tr;
}
}
returnnewIndexSearcher(reader);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
returnnull;
}
publicvoidsearchByTerm(Stringfield,Stringname,intnum){
try{
IndexSearchersearcher=getSearcher();
Queryquery=newTermQuery(newTerm(field,name));
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByTermToken(Stringfield,Stringname,intnum){
try{
IndexSearchersearcher=getSearcher();
//Queryquery=newTermQuery(newTerm(field,name));
//当用户输入两个关键字时，QueryParser默认它们之间的关系为“或”关系
//下面这么写的话在对用户输入进行扫描时，就会用空格分开的关键字理解为“与”，
//其实也就是构建了一个“与”关系的布尔型查询
//parser.setDefaultOperator(Operator.AND);
QueryParserparser=newQueryParser(Version.LUCENE_35,field,analyzer);
Stringk=analyzerKey(name);
Queryquery=parser.parse(name);
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(Exceptione){
e.printStackTrace();
}
}
privateStringanalyzerKey(Stringkey){
//StandardAnalyzeranalyzer=newStandardAnalyzer(Version.LUCENE_35);
StringReaderreader=newStringReader(key);
TokenStreamtokenStream=analyzer.tokenStream("",reader);
CharTermAttributetermattr=tokenStream.addAttribute(CharTermAttribute.class);
StringBuildersb=newStringBuilder();
try{
while(tokenStream.incrementToken()){
Stringk=termattr.toString();
sb.append(k).append("");
}
}catch(IOExceptione){
e.printStackTrace();
}
key=sb.toString().trim();
key=key.replaceAll("\\s+","AND");
returnsb.toString();
}
publicvoidprintDocument(IndexSearchersearcher,TopDocstds){
System.out.println("共查询了【"+tds.totalHits+"】条");
for(ScoreDocsd:tds.scoreDocs){
try{
Documentdoc=searcher.doc(sd.doc);
System.out.println("filename:"+doc.get("filename"));
System.out.println("path:"+doc.get("path"));
System.out.println("date:"+doc.get("date"));
System.out.println("size:"+doc.get("size"));
System.out.println("content:"+doc.get("content"));
System.out.println("-------------------------------------------");
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicvoidsearchByTermRange(Stringfield,Stringstart,Stringend,intnum){
try{
IndexSearchersearcher=getSearcher();
Queryquery=newTermRangeQuery(field,start,end,true,true);
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
/**
*建立索引时：使用的Field，而使用NumericRangeQuery，必须使用NumericField
*@paramfield
*@paramstart
*@paramend
*@paramnum
*/
publicvoidsearchByNumricRange(Stringfield,intstart,intend,intnum){
try{
IndexSearchersearcher=getSearcher();
Queryquery=NumericRangeQuery.newIntRange(field,start,end,true,true);
//DateTools.dateToString(newDate(),null);
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByPrefix(Stringfield,Stringvalue,intnum){
try{
IndexSearchersearcher=getSearcher();
Queryquery=newPrefixQuery(newTerm(field,value));
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByWildcard(Stringfield,Stringvalue,intnum){
try{
IndexSearchersearcher=getSearcher();
//在传入的value中可以使用通配符:?和*,?表示匹配一个字符，*表示匹配任意多个字符
Queryquery=newWildcardQuery(newTerm(field,value));
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByBoolean(intnum){
try{
IndexSearchersearcher=getSearcher();
BooleanQueryquery=newBooleanQuery();
/*
*BooleanQuery可以连接多个子查询
*Occur.MUST表示必须出现
*Occur.SHOULD表示可以出现
*Occur.MUSE_NOT表示不能出现
*/
query.add(newTermQuery(newTerm("name","3")),Occur.MUST_NOT);
query.add(newTermQuery(newTerm("content","健壮")),Occur.SHOULD);
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByPhrase(intnum){
try{
IndexSearchersearcher=getSearcher();
PhraseQueryquery=newPhraseQuery();
query.setSlop(10);
query.add(newTerm("content","java"));
//第一个Term
query.add(newTerm("content","程序"));
//产生距离之后的第二个Term
//query.add(newTerm("content","football"));
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
/**
*查询用于匹配与指定项相似的项
*默认是匹配一个有不同的，其他一样的，比如like和mike，就是距离算法的相似距离为1
*这种方式少用，影响效率
*/
publicvoidsearchByFuzzy(intnum){
try{
IndexSearchersearcher=getSearcher();
//最后两个参数为匹配率和距离
FuzzyQueryquery=newFuzzyQuery(newTerm("content","总统"),0.4f,0);
System.out.println(query.getPrefixLength());
System.out.println(query.getMinSimilarity());
TopDocstds=searcher.search(query,num);
printDocument(searcher,tds);
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchByQueryParse(Queryquery,intnum){
try{
IndexSearchersearcher=getSearcher();
TopDocstds=searcher.search(query,num);
System.out.println("一共查询了:"+tds.totalHits);
for(ScoreDocsd:tds.scoreDocs){
Documentdoc=searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date")+"=="+sd.score);
}
searcher.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
/**
*lucene3.5之前采用的是一种再查询的方式，也就是说先把全部的结果的docid查询出来，然后
*分页得到该页的docid，然后根据docid得到document信息，
*lucene官方是说他的速度已经够快，再查询不会有效率问题
*@paramquery
*@parampageIndex
*@parampageSize
*/
publicvoidsearchPage(Stringquery,intpageIndex,intpageSize){
try{
Directorydir=FileIndexUtils.getDirectory();
IndexSearchersearcher=getSearcher(dir);
QueryParserparser=newQueryParser(Version.LUCENE_35,"content",analyzer);
Queryq=parser.parse(query);
TopDocstds=searcher.search(q,500);
ScoreDoc[]sds=tds.scoreDocs;
intstart=(pageIndex-1)*pageSize;
intend=pageIndex*pageSize;
for(inti=start;i<end;i++){
Documentdoc=searcher.doc(sds[i].doc);
System.out.println("filename:"+doc.get("filename"));
System.out.println("path:"+doc.get("path"));
System.out.println("date:"+doc.get("date"));
System.out.println("size:"+doc.get("size"));
System.out.println("content:"+doc.get("content"));
System.out.println("-------------------------------------------");
}
searcher.close();
}catch(org.apache.lucene.queryParser.ParseExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
/**
*目前没有办法只取当前这页的数据，而是要全部查询然后得到docid
*一种增加效率的方式是取的条数做下限制，比如不要每次都取500条，
*也是把取的条数设置为当前页的所在位置数，比如每页10条，
*取第一页数据则取10条，取第二页则取20条，取五页则去50条
*根据页码和分页大小获取上一次的最后一个ScoreDoc
*/
privateScoreDocgetLastScoreDoc(intpageIndex,intpageSize,Queryquery,IndexSearchersearcher)throwsIOException{
if(pageIndex==1)returnnull;//如果是第一页就返回空
intnum=pageSize*(pageIndex-1);//获取上一页的数量
TopDocstds=searcher.search(query,num);
returntds.scoreDocs[num-1];
}
/**
*使用这种方式的话是把上一页的最后一个元素给拿到，然后再把pagesize传入，
*就可以得到当页的数据，其实就是简便了查询，原理还是把全部的docid查询后在得到document
*@paramquery
*@parampageIndex
*@parampageSize
*/
publicvoidsearchPageByAfter(Stringquery,intpageIndex,intpageSize){
try{
Directorydir=FileIndexUtils.getDirectory();
IndexSearchersearcher=getSearcher(dir);
QueryParserparser=newQueryParser(Version.LUCENE_35,"content",analyzer);
Queryq=parser.parse(query);
//先获取上一页的最后一个元素
ScoreDoclastSd=getLastScoreDoc(pageIndex,pageSize,q,searcher);
//通过最后一个元素搜索下页的pageSize个元素
TopDocstds=searcher.searchAfter(lastSd,q,pageSize);
printDocument(searcher,tds);
searcher.close();
}catch(org.apache.lucene.queryParser.ParseExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidsearchNoPage(Stringquery){
try{
Directorydir=FileIndexUtils.getDirectory();
IndexSearchersearcher=getSearcher(dir);
QueryParserparser=newQueryParser(Version.LUCENE_35,"content",newStandardAnalyzer(Version.LUCENE_35));
Queryq=parser.parse(query);
TopDocstds=searcher.search(q,20);
ScoreDoc[]sds=tds.scoreDocs;
for(inti=0;i<sds.length;i++){
Documentdoc=searcher.doc(sds[i].doc);
System.out.println(sds[i].doc+":"+doc.get("path")+"-->"+doc.get("filename"));
}
searcher.close();
}catch(org.apache.lucene.queryParser.ParseExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}

3、查询语法的测试单元类

[java] view plain copy

packageorg.itat.test;
importjava.io.File;
importjava.io.IOException;
importorg.apache.commons.io.FileUtils;
importorg.apache.commons.io.FilenameUtils;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.queryParser.ParseException;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.util.Version;
importorg.itat.index.FileIndexUtils;
importorg.itat.index.SearcherUtil;
importorg.junit.Before;
importorg.junit.Test;
importorg.wltea.analyzer.lucene.IKAnalyzer;
publicclassTestSearch{
privateSearcherUtilsu;
privateAnalyzeranalyzer=newIKAnalyzer();
@Before
publicvoidinit(){
su=newSearcherUtil();
}
@Test
publicvoidtestCopyFiles(){
try{
Filefile=newFile("F:\\Workspaces\\lucenes\\02_lucene_searcher\\resource");
for(Filef:file.listFiles()){
StringdestFileName=FilenameUtils.getFullPath(f.getAbsolutePath())+
FilenameUtils.getBaseName(f.getName())+".she";
FileUtils.copyFile(f,newFile(destFileName));
}
}catch(IOExceptione){
e.printStackTrace();
}
}
@Test
publicvoidsearchByTerm(){
//su.searchByTerm("content","",10);
su.searchByTermToken("content","头脑风暴",10);
}
@Test
publicvoidsearchByTermRange(){
//查询name以a开头和s结尾的
//su.searchByTermRange("name","a","s",10);
//由于attachs是数字类型，使用TermRange无法查询
//su.searchByTermRange("size",newNumericField("200").stringValue(),newNumericField("500").stringValue(),10);
QueryParserparser=newQueryParser(Version.LUCENE_35,"size",analyzer);
Queryquery;
try{
query=parser.parse("size:[100TO500]");
su.searchByQueryParse(query,10);
}catch(ParseExceptione){
e.printStackTrace();
}
}
@Test
publicvoidsearchByNumRange(){
//su.searchByNumricRange("attach",2,10,5);
su.searchByNumricRange("size",100,300,10);
}
@Test
publicvoidsearchByPrefix(){
su.searchByPrefix("content","人",10);
}
@Test
publicvoidsearchByWildcard(){
//匹配@itat.org结尾的所有字符
//su.searchByWildcard("email","*@itat.org",10);
//匹配j开头的有三个字符的name
//su.searchByWildcard("name","j???",10);
su.searchByWildcard("content","类?",10);
}
@Test
publicvoidsearchByBoolean(){
su.searchByBoolean(10);
}
@Test
publicvoidsearchByPhrase(){
su.searchByPhrase(10);
}
@Test
publicvoidsearchByFuzzy(){
su.searchByFuzzy(10);
}
@Test
publicvoidsearchByQueryParse()throwsParseException{
//1、创建QueryParser对象,默认搜索域为content
QueryParserparser=newQueryParser(Version.LUCENE_35,"content",newStandardAnalyzer(Version.LUCENE_35));
//改变空格的默认操作符，以下可以改成AND
//parser.setDefaultOperator(Operator.AND);
//开启第一个字符的通配符匹配，默认关闭因为效率不高
parser.setAllowLeadingWildcard(true);
//搜索content中包含有like的
Queryquery=parser.parse("like");
//有basketball或者football的，空格默认就是OR
query=parser.parse("basketballfootball");
//改变搜索域为name为mike
//query=parser.parse("content:like");
//同样可以使用*和?来进行通配符匹配
//query=parser.parse("name:j*");
//通配符默认不能放在首位
//query=parser.parse("email:*@itat.org");
//匹配name中没有mike但是content中必须有football的，+和-要放置到域说明前面
query=parser.parse("-name:mike+like");
//匹配一个区间，注意:TO必须是大写
//query=parser.parse("id:[1TO6]");
//闭区间匹配只会匹配到2
//query=parser.parse("id:{1TO3}");
//完全匹配ILikeFootball的
//query=parser.parse("\"Ilikefootball\"");
//匹配I和football之间有一个单词距离的
//query=parser.parse("\"Ifootball\"~1");
//模糊查询
//query=parser.parse("name:make~");
//没有办法匹配数字范围（自己扩展Parser）
//query=parser.parse("attach:[2TO10]");
su.searchByQueryParse(query,10);
}
@Test
publicvoidindexFile(){
FileIndexUtils.index(true);
}
@Test
publicvoidtestSearchPage01(){
su.searchPage("java",2,5);
System.out.println("-------------------------------");
//su.searchNoPage("java");
su.searchPageByAfter("java",2,2);
}
@Test
publicvoidtestSearchPage02(){
su.searchPageByAfter("java",3,20);
}
}

4、创建索引的类

[java] view plain copy

packageorg.itat.index;
importjava.io.File;
importjava.io.FileReader;
importjava.io.IOException;
importorg.apache.commons.io.FileUtils;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.document.NumericField;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.IndexWriterConfig;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.store.LockObtainFailedException;
importorg.apache.lucene.util.Version;
importorg.wltea.analyzer.lucene.IKAnalyzer;
publicclassFileIndexUtils{
privatestaticDirectorydirectory=null;
privatestaticAnalyzeranalyzer=newIKAnalyzer();
static{
try{
directory=FSDirectory.open(newFile("F:\\Workspaces\\lucenes\\02_lucene_searcher\\index"));
}catch(IOExceptione){
e.printStackTrace();
}
}
publicstaticDirectorygetDirectory(){
returndirectory;
}
publicstaticvoidindex(booleanhasNew){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,newIndexWriterConfig(Version.LUCENE_35,analyzer));
if(hasNew){
writer.deleteAll();
}
Filefile=newFile("F:\\Workspaces\\lucenes\\02_lucene_searcher\\resource");
Documentdoc=null;
for(Filef:file.listFiles()){
doc=newDocument();
doc.add(newField("content",FileUtils.readFileToString(f),Field.Store.YES,Field.Index.ANALYZED));
doc.add(newField("filename",f.getName(),Field.Store.YES,Field.Index.ANALYZED));
doc.add(newField("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.ANALYZED));
doc.add(newNumericField("date",Field.Store.YES,true).setLongValue(f.lastModified()));
doc.add(newNumericField("size",Field.Store.YES,true).setIntValue((int)(f.length())));
writer.addDocument(doc);
}
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
}

5、对索引进行操作的类

[java] view plain copy

packageorg.itat.index;
importjava.io.IOException;
importjava.text.ParseException;
importjava.text.SimpleDateFormat;
importjava.util.Date;
importjava.util.HashMap;
importjava.util.Map;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.document.NumericField;
importorg.apache.lucene.index.CorruptIndexException;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.IndexWriterConfig;
importorg.apache.lucene.index.StaleReaderException;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.LockObtainFailedException;
importorg.apache.lucene.store.RAMDirectory;
importorg.apache.lucene.util.Version;
publicclassIndexUtil{
privateString[]ids={"1","2","3","4","5","6"};
privateString[]emails={"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
privateString[]contents={
"welcometovisitedthespace,Ilikebook",
"helloboy,Ilikepingpengball",
"mynameisccIlikegame",
"Ilikefootball",
"IlikefootballandIlikebasketballtoo",
"Ilikemovieandswim"
};
privateDate[]dates=null;
privateint[]attachs={2,3,1,4,5,5};
privateString[]names={"zhangsan","lisi","john","jetty","mike","jake"};
privateDirectorydirectory=null;
privateMap<String,Float>scores=newHashMap<String,Float>();
publicIndexUtil(){
setDates();
scores.put("itat.org",2.0f);
scores.put("zttc.edu",1.5f);
directory=newRAMDirectory();
index();
}
privatevoidsetDates(){
SimpleDateFormatsdf=newSimpleDateFormat("yyyy-MM-dd");
try{
dates=newDate[ids.length];
dates[0]=sdf.parse("2010-02-19");
dates[1]=sdf.parse("2012-01-11");
dates[2]=sdf.parse("2011-09-19");
dates[3]=sdf.parse("2010-12-22");
dates[4]=sdf.parse("2012-01-01");
dates[5]=sdf.parse("2011-05-19");
}catch(ParseExceptione){
e.printStackTrace();
}
}
publicvoidundelete(){
//使用IndexReader进行恢复
try{
IndexReaderreader=IndexReader.open(directory,false);
//恢复时，必须把IndexReader的只读(readOnly)设置为false
reader.undeleteAll();
reader.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(StaleReaderExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidmerge(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,
newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
//会将索引合并为2段，这两段中的被删除的数据会被清空
//特别注意：此处Lucene在3.5之后不建议使用，因为会消耗大量的开销，
//Lucene会根据情况自动处理的
writer.forceMerge(2);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicvoidforceDelete(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,
newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicvoiddelete(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,
newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
//参数是一个选项，可以是一个Query，也可以是一个term，term是一个精确查找的值
//此时删除的文档并不会被完全删除，而是存储在一个回收站中的，可以恢复
writer.deleteDocuments(newTerm("id","1"));
writer.commit();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicvoidupdate(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,
newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
/*
*Lucene并没有提供更新，这里的更新操作其实是如下两个操作的合集
*先删除之后再添加
*/
Documentdoc=newDocument();
doc.add(newField("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(newField("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(newField("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
doc.add(newField("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.updateDocument(newTerm("id","1"),doc);
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
publicvoidquery(){
try{
IndexReaderreader=IndexReader.open(directory);
//通过reader可以有效的获取到文档的数量
System.out.println("numDocs:"+reader.numDocs());
System.out.println("maxDocs:"+reader.maxDoc());
System.out.println("deleteDocs:"+reader.numDeletedDocs());
reader.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
publicvoidindex(){
IndexWriterwriter=null;
try{
writer=newIndexWriter(directory,newIndexWriterConfig(Version.LUCENE_35,newStandardAnalyzer(Version.LUCENE_35)));
writer.deleteAll();
Documentdoc=null;
for(inti=0;i<ids.length;i++){
doc=newDocument();
doc.add(newField("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(newField("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(newField("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(newField("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//存储数字
doc.add(newNumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
//存储日期
doc.add(newNumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
Stringet=emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(et);
if(scores.containsKey(et)){
doc.setBoost(scores.get(et));
}else{
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(LockObtainFailedExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}finally{
try{
if(writer!=null)writer.close();
}catch(CorruptIndexExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
}
}
}