通过updateDocument更新索引

ttitfly

浏览: 623883 次
性别:
来自: 杭州

最近访客更多访客>>

xubbsun

xuwenyan

u011158808

zws

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

搜索技术

lucene Apache junit

package com.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

public class UpdateDocument {
	
	private static String path = "d:/index";
	
	
	public static void main(String[] args){
//		addIndex();
		updateIndex();
		search("李四");
		search("王五");
	}
	
	public static void addIndex(){
		try {
			IndexWriter write = new IndexWriter(path,new StandardAnalyzer(),true);
			
			Document doc = new Document();
			doc.add(new Field("id","123456",Field.Store.YES,Field.Index.UN_TOKENIZED));
			doc.add(new Field("userName","张三",Field.Store.YES,Field.Index.TOKENIZED));
			doc.add(new Field("comefrom","北京",Field.Store.YES,Field.Index.TOKENIZED));
			
			write.addDocument(doc);
			
			write.close();
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	
	public static void updateIndex(){
		try {
			
			IndexWriter write = new IndexWriter(path,new StandardAnalyzer(),false);
			Document docNew = new Document();
			docNew.add(new Field("id","123456",Field.Store.YES,Field.Index.UN_TOKENIZED));
			docNew.add(new Field("userName","王五",Field.Store.YES,Field.Index.TOKENIZED));
			Term term = new Term("id","123456");
			/**
			  调用updateDocument的方法，传给它一个新的doc来更新数据，
			  Term term = new Term("id","1234567");
			  先去索引文件里查找id为1234567的Doc,如果有就更新它(如果有多条，最后更新后只有一条)。如果没有就新增.
			 
			  数据库更新的时候，我们可以只针对某个列来更新，而lucene只能针对一行数据更新。
			 */
			write.updateDocument(term, docNew);
			
			write.close();
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static Query queryParser(String str){
		QueryParser queryParser = new QueryParser("userName", new StandardAnalyzer());
		try {
			Query query =  queryParser.parse(str);
			return query;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
	
	public static void search(String str){
		try {
			IndexSearcher search = new IndexSearcher(path);
			
			Query query = queryParser(str);
			
			Hits hits = search.search(query);
			if(hits==null){
				return;
			}
			if(hits.length() == 0){
				System.out.println(" 没有搜索到'" + str+"'");
				return;
			}
			for (int i = 0; i < hits.length(); i++) {
				Document doc = hits.doc(i);
				System.out.println("id = "+hits.id(i));
				System.out.println("own id = " + doc.get("id"));
				System.out.println("userName = "+doc.get("userName"));
				System.out.println("come from  = "+doc.get("comefrom"));
				System.out.println("");
			}
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}

分享到：

hibernate之one-to-many详细 | 通过addIndexes将内存中的索引加入到磁盘索 ...

2008-09-20 15:57
浏览 4874
评论(3)
查看更多

3 楼 TonyLian 2010-02-02

有没有发现：更新后，下次再打开索引。
indexReader.maxDoc()的数量翻番了。
而且索引目录所占用的磁盘空间也翻番了（旧的索引文件还在，新的索引文件大小和旧的一样大）

只有在write.close();
前写 writer.optimize();
才可以避免此问题。

但是，writer.optimize();是一个很耗时、耗资源的动作。单单空白磁盘空间的需求就只是要有2倍于翻了翻后的索引文件的大小。

如果经常要updateDocument的话，每次都writer.optimize();会大大影响性能，不知道有没有两全其美的好方法？

2 楼 hqman 2008-09-21

package com.wangkai.lucene;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

public class UpdateDocumentTests extends TestCase {

private static String path = "/home/hqman/shell/index";

public static void main(String[] args) {
// addIndex();
updateIndex();
search("李四");
search("王五");
}

public static void addIndex() {
try {
IndexWriter write = new IndexWriter(path, new StandardAnalyzer(),
true);

Document doc = new Document();
doc.add(new Field("id", "123456", Field.Store.YES,
Field.Index.UN_TOKENIZED));
doc.add(new Field("userName", "张三", Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("comefrom", "北京", Field.Store.YES,
Field.Index.TOKENIZED));

write.addDocument(doc);

write.close();

} catch (IOException e) {
e.printStackTrace();
}
}

public static void updateIndex() {
try {

IndexWriter write = new IndexWriter(path, new StandardAnalyzer(),
false);
Document docNew = new Document();
docNew.add(new Field("id", "123456", Field.Store.YES,
Field.Index.UN_TOKENIZED));
docNew.add(new Field("userName", "王五", Field.Store.YES,
Field.Index.TOKENIZED));
Term term = new Term("id", "123456");
/**
* 调用updateDocument的方法，传给它一个新的doc来更新数据， Term term = new
* Term("id","1234567");
* 先去索引文件里查找id为1234567的Doc,如果有就更新它(如果有多条，最后更新后只有一条)。如果没有就新增.
*
* 数据库更新的时候，我们可以只针对某个列来更新，而lucene只能针对一行数据更新。
*/
//write.updateDocument(term, docNew);
//write.addDocument( docNew);

Document doc = new Document();
doc.add(new Field("id", "123456", Field.Store.YES,
Field.Index.UN_TOKENIZED));
doc.add(new Field("userName", "张三", Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("comefrom", "北京", Field.Store.YES,
Field.Index.TOKENIZED));

write.addDocument(doc);

write.close();

} catch (IOException e) {
e.printStackTrace();
}
}

public static Query queryParser(String str) {
QueryParser queryParser = new QueryParser("userName",
new StandardAnalyzer());
try {
Query query = queryParser.parse(str);
return query;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}

public static void search(String str) {
try {
IndexSearcher search = new IndexSearcher(path);

Query query = queryParser(str);

Hits hits = search.search(query);
if (hits == null) {
return;
}
if (hits.length() == 0) {
System.out.println(" 没有搜索到'" + str + "'");
return;
}
System.out.println(" 搜索到:" + hits.length() + "'");

for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
System.out.println("id = " + hits.id(i));
System.out.println("own id = " + doc.get("id"));
System.out.println("userName = " + doc.get("userName"));
System.out.println("come from = " + doc.get("comefrom"));
System.out.println("");
}

} catch (Exception e) {
e.printStackTrace();
}
}

public void testUpdate(){
addIndex();
updateIndex();
search("张三");

}
}

测试了结果是 2条

搜索到:2'
id = 0
own id = 123456
userName = 张三
come from = 北京

id = 1
own id = 123456
userName = 张三
come from = 北京

1 楼 hqman 2008-09-21

奇怪我直接 writer.addDocument(doc); 也可以完成更新索引

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论