package org.se.lucene;
//主类
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class lucene_index {
private String[] ids={"1","2","3","4","5","6"};
private String[] emails={"aa@aa.org","cc@cc.org","dd@@dd.org","bb@bb.org","ee@ee.org","ff@ff.org"};
private String[] contents={"welcometotyu","hellowboy","higirl","howareyou","googluck","badgosh"};
private int[] attachs={1,2,3,4,5,6};
private String[] names={"liwu","zhangsan","xiaoqinag","laona","dabao","lisi"};
private Directory directory=null;
public lucene_index()
{
try {
directory=FSDirectory.open(new File("f:/lucene/index02"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void quary()
{
try {
IndexReader reader=IndexReader.open(directory);
System.out.println("numdocs"+reader.numDocs());
System.out.println("maxDocs"+reader.maxDoc());
System.out.println("detelemaxDocs"+reader.numDeletedDocs());
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@SuppressWarnings("deprecation")
public void undelete()
{
try {
//回复时必须把reader的只读设为false
IndexReader reader=IndexReader.open(directory,false);
reader.undeleteAll();
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//清空回收站,强制优化
public void forceDelete()
{
IndexWriter writer=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
//参数十一个选项,可以是一个query,也可以是一个term term就是一个精确查找的值
//此时删除的文档并未完全删除,而是存储在回收站中,可以恢复的
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
finally{
if (writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public void merge()
{
IndexWriter writer=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.forceMerge(2);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
finally{
if (writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public void delete()
{
IndexWriter writer=null;
try {
writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
//参数十一个选项,可以是一个query,也可以是一个term term就是一个精确查找的值
//此时删除的文档并未完全删除,而是存储在回收站中,可以恢复的
writer.deleteDocuments(new Term("id","1"));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
finally{
if (writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public void index()
{
IndexWriter writer=null;
Document doc=null;
try {
writer =new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
//writer.deleteAll();
for(int i=0;i<ids.length;i++)
{
doc=new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally{
if(writer!=null)
{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
//更新索引
public void update()
{
/*lucene本身不支持更新
*
* 通过删除索引然后再建立索引来更新
*/
IndexWriter writer=null;
Document doc=null;
try {
writer =new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_36,
new StandardAnalyzer(Version.LUCENE_36)));
writer.deleteAll();
for(int i=0;i<ids.length;i++)
{
doc=new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("emails",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.updateDocument(new Term("id","1"), doc);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally{
if(writer!=null)
{
try {
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
//测试类
package org.se.lucene;
import org.junit.Test;
public class test {
@Test
public void testIndex()
{
lucene_index l_index=new lucene_index();
l_index.index();
}
@Test
public void testquary()
{
lucene_index l_index=new lucene_index();
l_index.quary();
}
@Test
public void testDelete()
{
lucene_index l_index=new lucene_index();
l_index.delete();
}
@Test
public void testunDelete()
{
lucene_index l_index=new lucene_index();
l_index.undelete();
}
@Test
public void testForceDelete()
{
lucene_index l_index=new lucene_index();
l_index.forceDelete();
}
@Test
public void testmerge()
{
lucene_index l_index=new lucene_index();
l_index.merge();
}
@Test
public void upDate()
{
lucene_index l_index=new lucene_index();
l_index.update();
}
}
分享到:
相关推荐
4. **更新和删除**:了解如何在索引中添加、更新或删除文档,以及维护索引的一致性和性能。 5. **多字段搜索**:掌握如何在多个字段之间执行跨字段搜索,以及如何对不同字段设置权重。 6. **内存缓存和持久化**:...
索引删除策略规定了旧索引何时被清除,防止了磁盘空间的浪费。 #### SpellCheck/DidYouMean 拼写检查和“您是不是想说”功能提高了用户体验,即使用户输入错误也能找到相关结果。 ### OSEM - Object/Search Engine...
首先,我们需要了解的是Elasticsearch中的关键概念——倒排索引(Inverted Index)。在传统的索引方式中,我们通常会通过关键词找到对应的文档,而在倒排索引中,我们是通过文档找到包含特定关键词的位置。这种索引...
3. **文档(Document)**:索引中的基本单位,通常表示为JSON对象,每个文档都有唯一的标识符——`_id`。 4. **类型(Type)**:在旧版本中,索引内的分类,新版本中已被映射为`_doc`。 5. **分片(Shard)**:为了提高...
- **删除数据**: DELETE 方法根据索引、类型和 ID 删除文档。 **Logstash 和 Kibana** - **Logstash**: 作为数据收集器,可以从不同来源(如日志文件)收集数据,进行预处理,并发送至 Elasticsearch。 - **Kibana...
- **数据实时处理**:如订单处理、库存更新等需要实时反馈。 - **高性能架构设计**:采用微服务、分布式存储等技术提高性能。 - **安全性保障**:确保交易安全、用户信息安全。 #### 二、开源全文检索技术概述 ...
3. **RESTful API**:Elasticsearch 提供了简单易用的 RESTful API,允许开发者通过 HTTP 请求进行索引、搜索、更新和删除等操作。 4. **文档模型**:Elasticsearch 以 JSON 文档作为数据存储单位,易于理解和操作...
Jackrabbit的事件机制允许开发者监听并响应节点或属性上的创建、更新、删除等操作。这种机制极大地增强了系统的灵活性,使得开发人员可以根据不同的业务场景定制相应的逻辑。 总结起来,`jackrabbit-core`是...
5. **搜索功能**: 可能基于全文索引和关键词匹配,如使用Lucene.NET等开源搜索引擎库,提高查找效率。 6. **权限管理**: 实现用户角色划分,通过角色权限分配来控制对文档的操作,如读、写、删除等。 7. **版本...