【Lucene】基本索引操作

Mr_Tank_

浏览: 22589 次
性别:

最近访客更多访客>>

wukun_kevin

javacold

w592376568

lu3180

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

lucene

最近在学Lucene，参考书籍为《Lucene in action 中文版》，这里的代码例子也是参考里面的【有些出入，不过不少很大】，欢迎各位大神们拍砖；至于一些介绍什么的大家可以参考一下前面说的参考书；程序用到的包我是在官网那里下的，也把他的参考文档下了【英文版】，看文档给写代码提供了不少帮助；

1、Lucene重要的类：

（1）Document；（2）Field；（3）Store；（4）Index；

（5）IndexWriter；（6）Directory；（7）IndexReader；（8）Query；等

2、向索引添加文档；添加文档的方法有两个：

（1）addDocument(Document);（2）addDocument(Document，Analyzer);

3、更新索引中的文档：

IndexReader提供了两个方法用于更新索引中的文档：

（1）updateDocument(Term，Document);

（2）updateDocument(Term，Document，Analyzer);

4、删除索引中的文档

5、下面是示例代码【人比较懒只写了添加和更新两个操作】：

package com.tan.code;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class IndexingTest {

	// 準備數據
	protected String[] ids = { "1", "2" };
	protected String[] unindexed = { "Netherlands", "Italy" };
	protected String[] unstored = { "Amsterdam has lots of bridges",
			"Venice has lots of cancals" };
	protected String[] text = { "Amsterda", "Venice" };
	private String indexpath = "C:/index";
	File file = new File(indexpath);
	private Directory directory;

	@SuppressWarnings("deprecation")
	public void setUp() throws IOException {

		directory = new SimpleFSDirectory(file);

		// 索引位置在内存
		// directory = new RAMDirectory();
		// 创建IndexWriter对象
		IndexWriter indexWriter = getIndexWriter();

		for (int i = 0; i < ids.length; i++) {
			Document document = new Document();
			document.add(new Field("id", ids[i], Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			document.add(new Field("country", unindexed[i], Field.Store.NO,
					Field.Index.NOT_ANALYZED));
			document.add(new Field("contents", unstored[i], Field.Store.NO,
					Field.Index.NOT_ANALYZED));
			document.add(new Field("city", text[i], Field.Store.YES,
					Field.Index.NOT_ANALYZED));
			indexWriter.addDocument(document);
		}
		// IndexReader[] ir = { IndexReader.open(directory) };
		// indexWriter.addIndexes(ir);
		// indexWriter.optimize();
		// indexWriter.addIndexes();
		indexWriter.close();
	}

	private IndexWriter getIndexWriter() throws IOException {
		return new IndexWriter(directory, new IndexWriterConfig(
				Version.LUCENE_44, new CJKAnalyzer(Version.LUCENE_44)));
		// return new IndexWriter(directory,new WhitespaceAnalyzer(),
		// IndexWriter.MaxFieldLength.UNLIMITED);
	}

	public int getHitsCount(String filedname, String searchString)
			throws IOException {

		directory = new SimpleFSDirectory(file);
		IndexReader indexReader = IndexReader.open(directory);
		// IndexSearcher searcher = new IndexSearcher(directory);
		IndexSearcher indexSearcher = new IndexSearcher(indexReader);
		Term term = new Term(filedname, searchString);

		// 建立Term查尋
		Query query = new TermQuery(term);
		TopDocs topDocs = indexSearcher.search(query, null, 10);
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		return scoreDocs.length;

	}

	@SuppressWarnings("deprecation")
	public void updateIndex() throws IOException {

		directory = new SimpleFSDirectory(file);
		IndexWriter indexWriter = getIndexWriter();
		Document document = new Document();
		document.add(new Field("id", "1", Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		document.add(new Field("country", "China", Field.Store.NO,
				Field.Index.NOT_ANALYZED));
		document.add(new Field("contents", "Beijing has lots of people",
				Field.Store.NO, Field.Index.NOT_ANALYZED));
		document.add(new Field("city", "Beijing", Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		indexWriter.updateDocument(new Term("id", "1"), document);
		indexWriter.close();
	}

}

6、测试代码：

package com.tan.test;

import static org.junit.Assert.*;

import java.io.IOException;

import org.junit.Test;

import com.tan.code.IndexingTest;

public class MyTest {

	@Test
	public void test() throws IOException {
		//fail("Not yet implemented");
		IndexingTest indexingTest=new IndexingTest();
		//indexingTest.setUp();
		//System.out.println(indexingTest.getHitsCount("city", "Amsterda"));
		
		//updatetest
		indexingTest.updateIndex();
		//确认旧文档已经删除
		assertEquals(0, indexingTest.getHitsCount("city", "Amsterdam"));
		//确认新文档被索引
		assertEquals(1, indexingTest.getHitsCount("city", "Beijing"));
	}

}

7、源代码已经上传到了我的资源里面，有兴趣可以去下载，下载链接：http://download.csdn.net/detail/mr_tank_/6017929

【敬请批评指正】

分享到：

一个很简单的Spring MVC 入门实例 | 【Jsoup】“广西师范大学”校园快讯和公告 ...

2013-08-27 10:37
浏览 382
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论