建立索引，搜索关键字

qzxfl008

浏览: 80144 次
性别:
来自: 浙江

最近访客更多访客>>

sgq0085

malie0

mazhongxing_jay

chenjun296

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene学习笔记

lucene Apache junit 单元测试

这里用单元测试
使用的版本是lucene3.1.0

import java.io.File;
import java.io.IOException;
import java.util.Date;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.xfl.lucene.utils.LuceneUtils;

public class LuceneIndexSearch{
	
	//当前工程下的resoruce文件夹放要被索引的文件，lucenenIndex放索引文件
	String filePath = "resource";
	String indexDir = "luceneIndex";
	
	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
	/**
	 * 创建索引
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 */
	
	@Test
	public void createIndex() throws CorruptIndexException, IOException
	{
		
		Date startTime = new Date();
		
		Directory dir = FSDirectory.open(new File(indexDir));
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
		IndexWriter indexWriter = new IndexWriter(dir,iwc);
		//添加文件到索引
		LuceneUtils.file2Document(indexWriter, filePath);
		Date endTime = new Date();
		System.out.println("共有"+indexWriter.numDocs()+"个索引");
		System.out.println("总共花了"+(endTime.getTime()-startTime.getTime())+"毫秒时间");
		//关闭索引
		indexWriter.close();
	}
	
	/**
	 * 搜索
	 * @throws IOException 
	 * @throws Exception 
	 */
	@Test
	public void search() throws IOException, Exception
	{
		//要搜索的内容
		String queryString = "distribution";
		Directory dir = FSDirectory.open(new File(indexDir));
		
		//1、把要搜索的文本解析为Query对象
		String[] fields = {"filename","content"};
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31, fields, analyzer); //解析对象
		Query query = queryParser.parse(queryString);
		//2、进行查询
		
		IndexSearcher indexSearcher  = new IndexSearcher(dir);
		Filter filter = null;
		//搜索结果 TopDocs里面有scoreDocs[]数组，里面保存着索引值  
		TopDocs hits = indexSearcher.search(query,filter, 10000);
		
		System.out.println("找到了"+hits.totalHits+"条记录");
		//3、打印结果
		
		for(ScoreDoc scoreDocs: hits.scoreDocs )
		{
			int docSn = scoreDocs.doc;
			Document docs = indexSearcher.doc(docSn);   //根据文档编号取出相应的文档
			//打印文档信息
			LuceneUtils.printDocunentInfo(docs);
		}
	}
}

********************************************
Lucenen工具类

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;

public class LuceneUtils {

	//存放文件名 name,内容concent,路径path
	public static void file2Document(IndexWriter writer,String fileDir) throws CorruptIndexException, IOException
	{
		File[] files = new File(fileDir).listFiles();
		
		for (int i = 0; i < files.length; i++) {
			Document doc = new Document();
			doc.add(new Field("filename",files[i].getName(),Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("content",readFileContent(files[i]),Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("size",String.valueOf(files[i].length()),Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("path",files[i].getAbsolutePath(),Store.YES,Index.NO));
			writer.addDocument(doc);
		}
	}
	
	//读取文件内容
	private static String readFileContent(File file) {
		InputStreamReader isr;
		StringBuffer content = null;
		try {
			isr = new InputStreamReader(new FileInputStream(file));
			BufferedReader reader = new BufferedReader(isr);
			content = new StringBuffer();
			for (String line = null; (line=reader.readLine())!=null; ) {
				content.append(line).append("\n");
			}
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}  
		
		return content.toString();
	}

	public static void document2File(Document doc)
	{
		
	}
	/**
	 * <pre>
	 * 获取filename的方法有两种
	 * 1、Field field = doc.getField("filename"); 
	 * Field field = doc.getField("filename"); 
	 * 
	 * 2、doc.get(&quot;filename&quot;); 
	 * </pre>
	 * @param doc
	 */
	public static void printDocunentInfo(Document doc)
	{
		//Field field = doc.getField("filename");  第一种方法
		//field.stringValue();	
		System.out.println("filename   "+doc.get("filename"));   
		System.out.println("content    "+doc.get("content"));   
		System.out.println("size       "+doc.get("size"));   
		System.out.println("path       "+doc.get("path"));   
	}
}

分享到：

lucene3.1.0 简单分词实例 | lucene ——全文检索

2011-05-01 18:06
浏览 1190
评论(0)
分类:互联网
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

建立索引，搜索关键字

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

建立索引，搜索关键字

评论

发表评论

相关推荐

页面高亮显示问题

lucene3.1.0 简单分词实例

lucene ——全文检索

lucene 索引过程的核心类

2lucene如何创建一个索引

如何使用lucene3.1.0，创建一个应用的步骤

最近访客更多访客>>