浏览 1749 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2009-06-01
最后修改:2009-06-02
package chapter5; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexCommitPoint; import org.apache.lucene.store.LockObtainFailedException; public class LuceneIndexTest { /** * @param args */ private static String dest_Index_Path = "D:\\workshop\\TextIndex"; static protected String[] keywords = { "001", "002", "003" }; static protected String[] textdetail = { "记录一", "记录二", "记录三" }; public static void main(String[] args) { Date start = new Date(); Analyzer textAnalyzer = new SimpleAnalyzer(); try { IndexWriter textIndex = new IndexWriter(dest_Index_Path, textAnalyzer, true);//true or false menus create or update for (int i = 0; i < 3; i++) { Document document = new Document(); Field field_id = new Field("id", keywords[i], Field.Store.YES, Field.Index.UN_TOKENIZED ); document.add(field_id); Field field_content = new Field("content", textdetail[i], Field.Store.YES, Field.Index.TOKENIZED ); document.add(field_content); } textIndex.optimize();// 不关闭索引只保存在内存里面。 textIndex.close(); Date end = new Date(); long index_tm = end.getTime() - start.getTime(); System.out.println("Total Time :(ms)"); System.out.println(index_tm); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("Index Success!"); } } 注意,测试代码的版本是lucene2.3.jar A IndexWriter textIndex = new IndexWriter(dest_Index_Path, textAnalyzer, true);//true or false menus create or update 表示的是索引创建器,3个参数是,路径,分析器,是否重建。第3个参数为true,表示重新建立索引(假若存在则删除原文件),假若为false,那么在原来的基础上更改,这就是创建增量索引。 还有一个构造函数参数是一个目录,可以用以下方法取得。
Directory dir=FSDirectory.getDirectory(dir_name);
B Analyzer textAnalyzer = new SimpleAnalyzer(); Analyzer是索引分析器,每个域的数据在添加时都会使用它来进行分析。
C
Document document = new Document(); Field field_id = new Field("id", keywords[i], Field.Store.YES, Field.Index.UN_TOKENIZED );
这里的Document并不是真正意识的文档,而是一个抽象的概念,可以理解为一个要被索引的内容的一个容器,有不同的Field组成的Document .
D 另外就是索引管理器IndexReader ,用来管理索引的强大工具。可以用它来删除索引 File indexDir=new File("D:\\workshop\\TextIndex"); try { IndexReader indexReader=IndexReader.open(indexDir); //准备索引文件的目录,生成对象读取索引内容 Term term=new Term("name","xx.txt");//创建要删除的对象的索引项的表示(Term) indexReader.deleteDocuments (term);//删除符合索引项的文档 indexReader.close ();//关闭,实现物力删除 } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } 下面贴一个给文本文件建立索引的代码,一共参考: package chapter5; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class LunceneIndexManager { /** * @param args */ private static String dest_Index_Path = "D:\\workshop\\TextIndex"; private static String text_File_Path = "D:\\largeData\\xx.txt"; public static void main(String[] args) throws IOException { Date start=new Date(); File file=new File(text_File_Path); try { FileReader fileReader=new FileReader(file); String dir_name=dest_Index_Path; Directory dir=FSDirectory.getDirectory(dir_name); Analyzer textAnalyzer=new StandardAnalyzer(); IndexWriter indexWriter=new IndexWriter(dir,textAnalyzer,false); Document document=new Document(); Field field_name=new Field("name",file.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED); document.add(field_name); InputStream inputStream=new FileInputStream(file); int leng=inputStream.available(); byte[] by=new byte[leng]; inputStream.read(by); inputStream.close(); String content=new String(by); Field field_content=new Field("content",content,Field.Store.YES,Field.Index.TOKENIZED); document.add(field_content); indexWriter.addDocument(document); indexWriter.optimize(); indexWriter.close(); Date end=new Date(); long ss=end.getTime()-start.getTime(); System.out.println("Total Time:(ms)"); System.out.println(ss); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("Index Sucess!"); } }
声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |